# Solution of the olympic task "Спасти парашютиста" ***with torch modules***

### Useful links:
* [**How does `yield` work?**](https://habr.com/ru/post/132554/)
* [Kaggle "Titanic" get started contest](https://www.kaggle.com/c/titanic/overview)
* [torch.utils.data.Dataset](https://github.com/pytorch/pytorch/blob/master/torch/utils/data/dataset.py)
* [torch.optim](https://pytorch.org/docs/stable/optim.html)

### Import libraries

In [1]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

### Create original dataset

In [2]:
class SynthDataset(Dataset):
    def __init__(self, k1: float, k2: float, noise_intensity: float, t_range=range(0,1000)):
        '''
        Initialise base parameters, create
        and stack `x` and `y` datasets
        '''
        
        super().__init__()
        self.t = t = torch.tensor(list(t_range))
        ni = noise_intensity
        x = k2 * torch.cos(k1 * t) + ni * torch.randn(len(t))
        y = k1 * torch.sin(k2 * t) + ni * torch.randn(len(t))
        self.coords = torch.stack([x, y], dim=1)
        
        
        
    def __len__(self):
        '''
        Total count of samples
        '''
        
        return len(self.coords) # self.coords.shape[0]
    
    
    def __getitem__(self, i):
        '''
        Return i-th time segment (which is `t`) and i-th batch (from `coords`)
        '''
        
        return self.t[i:i+1], self.coords[i]

ds = SynthDataset(3., 5., 0.01)
# dl = DataLoader(ds, batch_size=7, shuffle=False)

In [3]:
def dump_dataset(ds, fname):
    '''
    Dump dataset to csv file
    '''
    
    with open(fname, 'w') as f:
        for i in range(len(ds)):
            (t,), (x, y) = ds[i]
            f.write(f'{t},{x},{y}\n')

dump_dataset(ds, 'data.csv')

In [4]:
! head data.csv

0,5.010119915008545,0.003452320583164692
1,-4.941121578216553,-2.852698564529419
2,4.803633213043213,-1.6175178289413452
3,-4.5691237449646,1.9425715208053589
4,4.232048511505127,2.767404794692993
5,-3.799129009246826,-0.3912357985973358
6,3.2893497943878174,-2.964826822280884
7,-2.7297542095184326,-1.297222375869751
8,2.1268696784973145,2.2155773639678955
9,-1.4521633386611938,2.551121234893799


In [6]:
class RealDataset(Dataset):
    def __init__(self, y):
        super().__init__()
        self.test(y)
        self.coords = torch.tensor(y, dtype=torch.float)
        self.t = t = torch.arange(len(y), dtype=torch.float)
    
    @classmethod
    def from_csv(cls, fname):
        y = pd.read_csv(fname, header=None).values[:, 1:]
        return cls(y)
    
    @staticmethod
    def test(y):
        assert y.shape[1] == 2
        assert len(y.shape) == 2
            
        
    def __len__(self):
        '''
        Total count of samples
        '''
        
        return len(self.coords) # self.coords.shape[0]
    
    
    def __getitem__(self, i):
        '''
        Return i-th time segment (which is `t`) and i-th batch (from `coords`)
        '''
        
        return self.t[i:i+1], self.coords[i]

In [7]:
ds.test(torch.ones((2,2)))

AttributeError: 'SynthDataset' object has no attribute 'test'

In [None]:
ds = RealDataset.from_csv('data.csv')

In [None]:
ds.coords.shape

In [None]:
class DummyDataLoader:
    def __init__(self, ds, batch_size, shuffle=False):
        self.ds = ds
        if shuffle:
            self.idxs = torch.randperm(len(ds))
        else:
            self.idxs = torch.arange(len(ds))
        self.split = torch.split(self.idxs, batch_size)
        
    def __iter__(self):
        for chunk in self.split:
            t = torch.stack([self.ds[idx][0] for idx in chunk], dim=0)
            coords = torch.stack([self.ds[idx][1] for idx in chunk], dim=0)
            yield t, coords

In [None]:
ddl = DummyDataLoader(ds, 7)

In [None]:
for i, batch in enumerate(dl):
    print(batch)
    print('=' * 20)
    if i == 5:
        break

In [None]:
class ParaModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.k1 = nn.Parameter(torch.tensor([1.], dtype=torch.float, requires_grad=True))
        self.k2 = nn.Parameter(torch.tensor([1.], dtype=torch.float, requires_grad=True))
    
    def forward(self, t):
        '''
        t: (B, 1)
        '''
        x = self.k2 * torch.cos(self.k1 * t) # (B, 1)
        y = self.k1 * torch.sin(self.k2 * t) # (B, 1)
        return torch.cat([x, y], dim=1) # (B, 2)

In [None]:
m = ParaModel()
list(m.parameters())

In [None]:
class DummyMSELoss:
    def __init__(self, reduction='mean'):
        self.reduction = reduction
    
    def __call__(self, pred, gt):
        res = ((pred - gt) ** 2)
        if self.reduction == 'mean':
            return res.mean()
        elif self.reduction == 'none':
            return res

In [None]:
ds = TaskDataset(3., 5., 0.01, )

m = ParaModel()

criterion = nn.MSELoss(reduction='mean')
# criterion2 = DummyMSELoss(reduction='mean')

optimizer = optim.SGD(m.parameters(), lr=0.001)

epochs = 3
for epoch in range(epochs):
    dl = DataLoader(ds, batch_size=7, shuffle=True)
    for batch in dl:
        optimizer.zero_grad()
        t, coords = batch
        pred = m(t)
        gt = coords   # gt = ground truth
        loss = criterion(pred, gt)
#         print(loss)
        loss.backward()
        optimizer.step()
#         print(m.k1, m.k2)