In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Any results you write to the current directory are saved as output.
from numpy import array
import torch
import gc
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader

In [7]:
tides = pd.read_csv('2016_2022.csv')
tides = tides[lambda x : x.Source == 3]
tides

Unnamed: 0,Date,Valeur,Source
1,01/01/2016 00:00:00,3.607,3
13,01/01/2016 00:10:00,3.496,3
25,01/01/2016 00:20:00,3.446,3
37,01/01/2016 00:30:00,3.298,3
49,01/01/2016 00:40:00,3.229,3
...,...,...,...
4034625,30/06/2022 23:10:00,1.931,3
4034637,30/06/2022 23:20:00,1.915,3
4034649,30/06/2022 23:30:00,1.918,3
4034661,30/06/2022 23:40:00,1.942,3


In [36]:
n = len(tides)
end = 0.1
train_set = tides[:round(0.05*n)]
valid_set = tides[round(0.05*n):round(end*n)]
print('Proportion of train_set : {:.2f}%'.format(len(train_set)/len(tides)))
print("Train set")
print("=========")
print(train_set)
print('\n')
print('Proportion of valid_set : {:.2f}%'.format(len(valid_set)/len(tides)))
print("Validation set")
print("==============")
print(valid_set)

Proportion of train_set : 0.05%
Train set
                       Date  Valeur  Source
1       01/01/2016 00:00:00   3.607       3
13      01/01/2016 00:10:00   3.496       3
25      01/01/2016 00:20:00   3.446       3
37      01/01/2016 00:30:00   3.298       3
49      01/01/2016 00:40:00   3.229       3
...                     ...     ...     ...
203528  27/04/2016 10:30:00   3.551       3
203540  27/04/2016 10:40:00   3.426       3
203552  27/04/2016 10:50:00   3.293       3
203565  27/04/2016 11:00:00   3.173       3
203577  27/04/2016 11:10:00   3.054       3

[16913 rows x 3 columns]


Proportion of valid_set : 0.05%
Validation set
                       Date  Valeur  Source
203589  27/04/2016 11:20:00   2.954       3
203601  27/04/2016 11:30:00   2.845       3
203613  27/04/2016 11:40:00   2.752       3
203625  27/04/2016 11:50:00   2.647       3
203638  27/04/2016 12:00:00   2.558       3
...                     ...     ...     ...
402996  24/08/2016 01:50:00   1.526       3
403

In [37]:
def split_sequence(sequence, n_steps):
    x, y = list(), list()
    for i in range(len(sequence)):
        
        end_ix = i + n_steps
        
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    return array(x), array(y)

raw_seq = [10,20,30,40,50,60,70,80,90]
n_steps = 3
train_x,train_y = split_sequence(train_set.Valeur.values,n_steps)
valid_x,valid_y = split_sequence(valid_set.Valeur.values,n_steps)

In [38]:
class TidesDataset():
    def __init__(self,feature,target):
        self.feature = feature
        self.target = target
    
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self,idx):
        item = self.feature[idx]
        label = self.target[idx]
        
        return item,label

In [39]:
class CNN_ForecastNet(nn.Module):
    def __init__(self):
        super(CNN_ForecastNet,self).__init__()
        self.conv1d = nn.Conv1d(3,64,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(64*2,50)
        self.fc2 = nn.Linear(50,1)
        
    def forward(self,x):
        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(-1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

In [47]:
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
model = CNN_ForecastNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.MSELoss()

In [48]:
train = TidesDataset(train_x.reshape(train_x.shape[0],train_x.shape[1],1),train_y)
valid = TidesDataset(valid_x.reshape(valid_x.shape[0],valid_x.shape[1],1),valid_y)
train_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False)
valid_loader = torch.utils.data.DataLoader(train,batch_size=2,shuffle=False)

In [49]:
train_losses = []
valid_losses = []
def Train():
    
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        preds = model(inputs.float())
        loss = criterion(preds,labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss
        
    train_loss = running_loss/len(train_loader)
    train_losses.append(train_loss.detach().numpy())
    
    print(f'train_loss {train_loss}')
    
def Valid():
    running_loss = .0
    
    model.eval()
    
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs.float())
            loss = criterion(preds,labels)
            running_loss += loss
            
        valid_loss = running_loss/len(valid_loader)
        valid_losses.append(valid_loss.detach().numpy())
        print(f'valid_loss {valid_loss}')

In [50]:
epochs = 10
for epoch in range(epochs):
    print('epochs {}/{}'.format(epoch+1,epochs))
    Train()
    Valid()
    gc.collect()

epochs 1/10
train_loss 0.9139229655265808
valid_loss 0.046612241989759995
epochs 2/10
train_loss 0.028898589313030243
valid_loss 0.011228391148505362
epochs 3/10
train_loss 0.006905952002853155
valid_loss 0.005580909767806383
epochs 4/10
train_loss 0.005547999404370785
valid_loss 0.0053910056296970765
epochs 5/10
train_loss 0.005391974467784166
valid_loss 0.005291639392261365
epochs 6/10
train_loss 0.005302421748638153
valid_loss 0.005235789751715926
epochs 7/10
train_loss 0.005245508160442114
valid_loss 0.005196563863530286
epochs 8/10
train_loss 0.005213175434619188
valid_loss 0.005197334874071859
epochs 9/10
train_loss 0.005201965570449829
valid_loss 0.005213788038440041
epochs 10/10
train_loss 0.005196485202759504
valid_loss 0.00521175663664073
