In [1]:
import torch
import ipywidgets as widgets
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
import numpy as np
from tqdm.notebook import tqdm_notebook
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
import random

import magnav

from torch.utils.data import Dataset, DataLoader

In [2]:
device = magnav.get_device()

In [3]:
def create_timeframe(X,y):
    
    y = y.drop(index=[y.index[0]])
    
    idx = X.drop(index=[X.index[0],X.index[1],X.index[-1],X.index[-2]]).index
    
    X = pd.concat([df.drop(index=[df.index[-1],df.index[-2],df.index[-3],df.index[-4]]).set_index(idx),
                   df.drop(index=[df.index[0],df.index[-1],df.index[-2],df.index[-3]]).set_index(idx),
                   df.drop(index=[df.index[0],df.index[1],df.index[-1],df.index[-2]]),
                   df.drop(index=[df.index[0],df.index[1],df.index[2],df.index[-1]]).set_index(idx),
                   df.drop(index=[df.index[0],df.index[1],df.index[2],df.index[3]]).set_index(idx)
               ],axis=1)
    
    return X,y

In [5]:
# add transformations

class ChallDataset(Dataset):
    # train : if true, return train data. If false, return validation data
    def __init__(self,train=True):
        df = pd.read_hdf('../data/interim/Sol_dataset.h5', key=f'Flt1003')
        
        X_100302 = df[df['LINE']==1003.02].drop(columns=['LINE',])

In [6]:
X,y = create_Xy(df.drop(columns='LINE'))

In [7]:
dataset_test = MyDataset(X,y)
print(len(dataset_test))
print(dataset_test[100])

160026
(tensor([ 5.2645e+04,  3.5635e+04,  5.6030e+04, -2.6191e+04, -2.4432e+04,
         3.1635e+03, -9.4792e+02,  5.2181e+04, -1.8693e+03, -1.2193e+02,
         2.6032e+01,  5.2647e+04,  3.5581e+04,  5.6023e+04, -2.6226e+04,
        -2.4470e+04,  3.1031e+03, -9.5066e+02,  5.2171e+04, -1.8679e+03,
        -1.2082e+02,  2.6085e+01,  5.2648e+04,  3.5519e+04,  5.6016e+04,
        -2.6259e+04, -2.4506e+04,  3.0327e+03, -9.4386e+02,  5.2169e+04,
        -1.8673e+03, -1.2163e+02,  2.6304e+01,  5.2648e+04,  3.5451e+04,
         5.6009e+04, -2.6291e+04, -2.4541e+04,  2.9548e+03, -9.3135e+02,
         5.2174e+04, -1.8673e+03, -1.2389e+02,  2.6282e+01,  5.2646e+04,
         3.5382e+04,  5.6002e+04, -2.6324e+04, -2.4576e+04,  2.8733e+03,
        -9.1887e+02,  5.2179e+04, -1.8680e+03, -1.2689e+02,  2.6158e+01]), tensor([-195.8120]))


In [8]:
train_loader  = torch.utils.data.DataLoader(dataset=dataset_test,batch_size=64,shuffle=False,num_workers=4,pin_memory=False)

In [9]:


def init_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)
        
class Chall_MLP(torch.nn.Module):
    
    def __init__(self):
        super(Chall_MLP, self).__init__()
        self.architecture = torch.nn.Sequential(
            torch.nn.Linear(55,50),
            torch.nn.Tanh(),
            torch.nn.Linear(50,30),
            torch.nn.Tanh(),
            torch.nn.Linear(30,10),
            torch.nn.Tanh(),
            torch.nn.Linear(10,1),
        )
        self.architecture.apply(init_weights)
        
        
    def forward(self, x):
        logits = self.architecture(x)
        return logits

model = Chall_MLP().to(device)

class RMSELoss(torch.nn.Module):
    
    def __init__(self):
        super(RMSELoss,self).__init__()
        
    def forward(self,yhat,y):
        criterion = torch.nn.MSELoss()
        loss = torch.sqrt(criterion(yhat,y)+1e-6)
        return loss 
    
criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',factor=0.9,patience=5,min_lr=1e-5)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter(f'runs/Chall_MLP_{timestamp}')

EPOCHS = 500

pbar = tqdm_notebook(total=EPOCHS,unit="epoch",desc='Training')

for epoch in range(EPOCHS):

    train_running_loss = 0.

    # Make sure gradient tracking is on, and do a pass over the data
    model.train()

    # Enumerate allow to track batch index and intra-epoch reporting 
    for i, (inputs, labels) in enumerate(train_loader):

        inputs, labels = inputs.to(device), labels.to(device)
        # Zero gradients for every batch
        optimizer.zero_grad()

        # Make prediction for this batch
        predictions = model(inputs)

        # Compute the loss and its gradients
        loss = criterion(predictions, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        train_running_loss += loss.detach().item() * inputs.size(0)
        

    avg_loss = train_running_loss/ len(train_loader.dataset)
    writer.add_scalar('training_loss',avg_loss,epoch)
    
#     # Desactivate layers such as dropout or batch-normalization
#     model.eval()

#     val_running_loss = 0.0
    
    
#     with torch.set_grad_enabled(False):
#         for i, (vinputs, vlabels) in enumerate(valconcat_loader):

#             voutputs = model(vinputs)
#             vloss = criterion(voutputs, vlabels)
#             val_running_loss += vloss.detach().item()* vinputs.size(0)

#     avg_vloss = val_running_loss/len(valconcat_loader.dataset)
    
#     scheduler.step(avg_vloss)
    
#     writer.add_scalar('validation_loss',avg_vloss,epoch)
    
    pbar.set_postfix(train_loss=avg_loss,lr=optimizer.param_groups[0]['lr'])
    pbar.update()

Training:   0%|          | 0/500 [00:00<?, ?epoch/s]

KeyboardInterrupt: 