In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

In [3]:
data_train = pd.read_table('./train.csv', sep = ',')
data_test = pd.read_table('./test.csv', sep = ',')
data_validation = pd.read_table('./validation.csv', sep = ',')

In [4]:
def dist_ortho(lon1, lat1, lon2, lat2):
    R = 6377726
    pi = np.pi
    a = np.sin((lat1 - lat2)/2*pi/180)**2
    b = np.cos(lat1*pi/180)*np.cos(lat2*pi/180)
    c = np.sin((lon1- lon2)/2* pi/180)**2

    dist = R * 2* np.arcsin( np.sqrt(a + b*c))
    return dist


class TrajDataSet(Dataset):
    def __init__(self,  df, window, transform=None):
        self.df = df.set_index(np.arange(len(df))) #reorder idx
        self.window = window
        self.start_idx = np.where([self.df.trip[i]==self.df.trip[i+self.window-1] for i in range(len(self.df)-self.window+1)])[0]
        self.transform = transform

    def __len__(self):
        return len(self.start_idx)

    def __getitem__(self, idx):

        i = self.start_idx[idx]

        # select variable of interest
        traj = self.df.loc[i:i+self.window-1, ('lon', 'lat')]
        traj = np.array(traj).T

        # select coordinates
        coord = self.df.loc[i:i+self.window-1, ('lon', 'lat')]
        coord = np.array(coord).T
        lon = np.vstack([coord[0] for i in range(traj.shape[1])])
        lat = np.vstack([coord[1]  for i in range(traj.shape[1])])
        dd = dist_ortho(lon, lat, lon.T, lat.T)

        sample = (dd, dd)

        if self.transform:
            sample = self.transform(sample)

        return sample

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        dd, dd, = sample
        dd, dd  = (torch.FloatTensor(dd), torch.FloatTensor(dd))
        return (dd.unsqueeze(0), dd.unsqueeze(0))

class Divide(object):
    """Convert ndarrays in sample to Tensors."""

    def __init__(self, ratio):
        self.ratio = ratio
        
    def __call__(self, sample):
        dd, dd = sample
        dd = dd/self.ratio
        return (dd, dd)


In [88]:
# hyperparameters
window = 128
batch_size = 64

## reduce size dataset
train_set = TrajDataSet(data_train, window, transform = transforms.Compose([Divide(12000), ToTensor()]))
validation_set = TrajDataSet(data_validation, window, transform = transforms.Compose([Divide(12000), ToTensor()]))

train_loader = DataLoader(train_set, batch_size=batch_size, num_workers = 0, shuffle = True, drop_last=True)
validation_loader = DataLoader(validation_set, batch_size=batch_size, num_workers = 0, shuffle = True, drop_last=True)

In [89]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()

        self.cnn_1 = nn.Sequential(
            nn.Conv2d(1, 4, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(4, 8, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        self.pooling_1 = nn.Sequential(
            nn.MaxPool2d(kernel_size = 5, stride = 2, padding = 0, dilation = 1)
        )

        self.cnn_2 = nn.Sequential(
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 16, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        self.pooling_2 = nn.Sequential(
            nn.MaxPool2d(kernel_size = 5, stride = 2, padding = 0, dilation = 1)
        )

        self.cnn_3 = nn.Sequential(
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(64, 128,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        
        self.pooling_3 = nn.Sequential(
            nn.MaxPool2d(kernel_size = 8, stride = 2, padding = 0, dilation = 1)
        )
        
    def forward(self, x):

        out = self.cnn_1(x)
        out = self.pooling_1(out)
        out = self.cnn_2(out)
        out = self.pooling_2(out)
#         out = self.cnn_3(out)
#         out = self.pooling_3(out)

        return out

    
    
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()

     
        self.upsampling_3 = nn.Sequential(
            nn.ConvTranspose2d(128, 128, kernel_size = 8, stride = 2, padding = 0, dilation = 1)
        )
        
        self.cnn_3 = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 64,  kernel_size = 5, stride = 1, padding = 4, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(64, 32,  kernel_size = 5, stride = 1, padding = 4, dilation = 1),
            nn.ReLU()
        )
        
        self.upsampling_2 = nn.Sequential(
            nn.ConvTranspose2d(32, 32, kernel_size = 5, stride = 2, padding = 0, dilation = 1)
        )
        
        self.cnn_2 = nn.Sequential(
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 16,  kernel_size = 5, stride = 1, padding = 4, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(16, 8,  kernel_size = 5, stride = 1, padding = 4, dilation = 1),
            nn.ReLU()
        )
        
        self.upsampling_1 = nn.Sequential(
            nn.ConvTranspose2d(8, 8, kernel_size = 5, stride = 2, padding = 0, dilation = 1)
        )
        
        self.cnn_1 = nn.Sequential(
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 4, kernel_size = 5, stride = 1, padding = 4, dilation = 1),
            nn.ReLU(),
            nn.Conv2d(2, 1,  kernel_size = 5, stride = 1, padding = 4, dilation = 1),
            nn.ReLU()
        )
        
    def forward(self, x):

        out = self.upsampling_3(x)
        out = self.cnn_3(out)
        out = self.upsampling_2(out)
        out = self.cnn_2(out)
        out = self.upsampling_1(out)
        out = self.cnn_1(out)

        return out

      

In [90]:
# get sample
x, y = next(iter(train_loader)) 

# Forward model
encod = Encoder()
decod = Decoder()

out = encod(x)

out.shape
# decod(out).shape

torch.Size([64, 32, 29, 29])

In [None]:




out = model.threshold(y)
out = torch.sum(out, 2)

out = torch.cat((out, x.squeeze(1)), 1)

out_1 = model.cnn_input_1(out)
out = model.pooling_1(out_1)
out_2 = model.cnn_input_2(out)
out = model.pooling_2(out_2)
out = model.cnn_input_3(out)

out = model.upconv_2(out)
out = torch.cat((out, out_2), 1)
out = model.cnn_output_2(out)

out = model.upconv_1(out)
out = torch.cat((out, out_1), 1)
out = model.cnn_output_1(out)

out.size()
