In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

from utils.trip import * 

In [2]:
data_train = pd.read_csv('./data/SV_train.csv')
data_validation = pd.read_csv('./data/SV_validation.csv')
data_test = pd.read_csv('./data/SV_test.csv')

In [3]:
resolution = 5

data_train_new = change_resolution(data_train, resolution)
data_validation_new = change_resolution(data_validation, resolution)
data_test_new = change_resolution(data_test, resolution)

data_train_new = standardize_data(data_train_new)
data_validation_new = standardize_data(data_validation_new)
data_test_new = standardize_data(data_test_new)

data_train_new.head()

Unnamed: 0,trip,datetime,lon,lat,gaps,dive,step_speed,step_direction,lon_std,lat_std,step_speed_std,step_direction_cos,step_direction_sin
2,P1108_46_SV_T4,2008-12-06 16:06:25,-77.265897,-11.774297,0.0,0,15.301035,26.025648,2.309584,0.788287,0.826924,0.898598,0.438773
3,P1108_46_SV_T4,2008-12-06 16:06:30,-77.266435,-11.774462,0.0,0,12.287117,-18.22855,2.293957,0.776022,0.663327,0.949816,-0.312808
4,P1108_46_SV_T4,2008-12-06 16:06:35,-77.266843,-11.774868,0.0,0,12.679214,-28.073536,2.282105,0.745841,0.68461,0.882344,-0.470604
5,P1108_46_SV_T4,2008-12-06 16:06:40,-77.2672,-11.775312,0.0,0,12.579325,-6.324066,2.271735,0.712835,0.679188,0.993915,-0.110152
6,P1108_46_SV_T4,2008-12-06 16:06:45,-77.26739,-11.775845,0.0,0,12.567659,-18.969971,2.266216,0.673214,0.678555,0.945689,-0.325073


In [4]:
# Hyperparameters
window = 20
variable = ('lon_std', 'lat_std', 'gaps')

train_set = TrajDataSet(data_train_new, window, variable)

In [5]:
# hyperparameters
batch_size = 64

## reduce size dataset
train_set = TrajDataSet(data_train_new, window, variable, transform = ToTensor())
validation_set = TrajDataSet(data_validation_new, window, variable, transform = ToTensor())

train_loader = DataLoader(train_set, batch_size=batch_size, num_workers = 0, shuffle = True, drop_last=True)
validation_loader = DataLoader(validation_set, batch_size=batch_size, num_workers = 0, shuffle = True, drop_last=True)

In [6]:
class UNet(nn.Module):
    def __init__(self, nb_features):
        super(UNet, self).__init__()

        self.feature = nb_features
        
        self.threshold = nn.Sequential(
#             nn.BatchNorm2d(1),
            nn.Conv2d(1, self.feature, kernel_size = 1, stride = 1, padding = 0, dilation = 1, bias = True),
            nn.ReLU()
        )

        self.cnn_input_1 = nn.Sequential(
            nn.BatchNorm1d(self.feature+3),
            nn.Conv1d(self.feature+3, 8, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(8, 8, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        self.pooling_1 = nn.Sequential(
            nn.MaxPool1d(kernel_size = 5, stride = 2, padding = 2, dilation = 1)
        )

        self.cnn_input_2 = nn.Sequential(
            nn.BatchNorm1d(8),
            nn.Conv1d(8, 16, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(16, 16, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        self.pooling_2 = nn.Sequential(
            nn.MaxPool1d(kernel_size = 5, stride = 2, padding = 2, dilation = 1)
        )

        self.cnn_input_3 = nn.Sequential(
            nn.BatchNorm1d(16),
            nn.Conv1d(16, 32,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(32, 32,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        self.upconv_2 = nn.Sequential(
             nn.ConvTranspose1d(32, 16, kernel_size = 6, stride = 2, padding = 2, dilation = 1)
         )

        self.cnn_output_2 = nn.Sequential(
            nn.BatchNorm1d(16*2),
            nn.Conv1d(16*2, 16,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(16, 16,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU()
        )

        self.upconv_1 = nn.Sequential(
             nn.ConvTranspose1d(16, 8, kernel_size = 6, stride = 2, padding = 2, dilation = 1)
         )
        
        self.cnn_output_1 = nn.Sequential(
            nn.BatchNorm1d(8*2),
            nn.Conv1d(8*2, 8,  kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(8, 4, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(4, 2, kernel_size = 5, stride = 1, padding = 2, dilation = 1),
            nn.ReLU(),
            nn.Conv1d(2, 1,  kernel_size = 5, stride = 1, padding = 2, dilation = 1)
        )

    def forward(self, x, y):
        out = self.threshold(y)
        out = torch.sum(out, 2)

        out = torch.cat((out, x.squeeze(1)), 1)

        out_1 = self.cnn_input_1(out)
        out = self.pooling_1(out_1)
        out_2 = self.cnn_input_2(out)
        out = self.pooling_2(out_2)
        out = self.cnn_input_3(out)

        out = self.upconv_2(out)
        out = torch.cat((out, out_2), 1)
        out = self.cnn_output_2(out)

        out = self.upconv_1(out)
        out = torch.cat((out, out_1), 1)
        out = self.cnn_output_1(out)


        return out

def get_score(out, y):
    out, y = out.cpu(), y.cpu()
    out = 1*(out>0)
    true_positive = np.mean(out[y == True].numpy()) 
    true_negative = 1-np.mean(out[y == False].numpy())
    
    return (round(true_positive*100) , round(true_negative*100))

In [15]:
parameters = []
for feature in [1, 2, 4, 8]:

    weight = torch.FloatTensor([30])
    learning_rate = 0.01

    # switch to GPU
    model = UNet(feature)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    weight = weight.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCEWithLogitsLoss(pos_weight = weight)
    
    epoch_loss_train = []
    epoch_loss_validation = []

    for batch, (x, y, z) in enumerate(train_loader):
        
        model.train()
        # send to GPU
        x, y, z = x.to(device), y.to(device), z.to(device)
        # Run the forward pass
        out = model(x, y)
        loss = criterion(out, z)

        # Backprop and perform optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()        

    ### Evaluation + Validation every epoch
    model.eval()
    with torch.no_grad():      
        j = 0
        # evaluation
        list_loss_train = []
        list_score_train = []
        for batch, (x, y, z) in enumerate(train_loader):
            j+= 1
            # send to GPU
            x, y, z = x.to(device), y.to(device), z.to(device)

            # Run the forward pass
            out =  model(x, y)
            loss = criterion(out, z)
            score = get_score(out,z)
            list_loss_train.append(loss.item())
            list_score_train.append(score)

        train_loss = np.mean(list_loss_train)
        train_trueP = np.mean([tp for (tp, tn) in list_score_train])
        train_trueN = np.mean([tn for (tp, tn) in list_score_train])

        k = 0
        # validation
        list_loss_validation = []
        list_score_validation = []
        for batch, (x, y, z) in enumerate(validation_loader):
            k+= 1
            # send to GPU
            x, y, z = x.to(device), y.to(device), z.to(device)

            # Run the forward pass
            out =  model(x, y)
            loss = criterion(out, z)
            score = get_score(out,z)
            list_loss_validation.append(loss.item())
            list_score_validation.append(score)

        validation_loss = np.mean(list_loss_validation)
        validation_trueP = np.mean([tp for (tp, tn) in list_score_validation])
        validation_trueN = np.mean([tn for (tp, tn) in list_score_validation])

    epoch_loss_train.append(train_loss)
    epoch_loss_validation.append(validation_loss)
    
    param = - model.threshold[0].bias.squeeze().detach().numpy() / model.threshold[0].weight.squeeze().detach().numpy()
    parameters.append(param)
    
    print('Feature [{}] -------------------------------------------------------------------------------------'
          .format(feature))
    print('Train Loss: {}, Train True Positive : {} %, Train True Negative : {} %'
            .format(round(train_loss, 2), round(train_trueP, 2), round(train_trueN, 2)))
    print('Validation Loss: {}, Validation True Positive : {} %, Validation True Negative : {} %'
            .format(round(validation_loss, 2), round(validation_trueP, 2), round(validation_trueN, 2)))

KeyboardInterrupt: 