In [1]:
import torch
import ipywidgets as widgets
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
import numpy as np
from tqdm.notebook import tqdm_notebook
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
import random

import magnav

from torch.utils.data import Dataset, DataLoader

In [2]:
device = magnav.get_device()
#device = 'cpu'

Currently using cuda


In [3]:
def create_timeframe(X,y):
    
    y = y.drop(index=[y.index[0],y.index[1],y.index[-2],y.index[-1]])

    idx = X.drop(index=[X.index[0],X.index[1],X.index[-1],X.index[-2]]).index
    
    X = pd.concat([X.drop(index=[X.index[-1],X.index[-2],X.index[-3],X.index[-4]]).set_index(idx),
                   X.drop(index=[X.index[0],X.index[-1],X.index[-2],X.index[-3]]).set_index(idx),
                   X.drop(index=[X.index[0],X.index[1],X.index[-1],X.index[-2]]),
                   X.drop(index=[X.index[0],X.index[1],X.index[2],X.index[-1]]).set_index(idx),
                   X.drop(index=[X.index[0],X.index[1],X.index[2],X.index[3]]).set_index(idx)
               ],axis=1)
    
    return X,y

In [4]:
class ChallDatatset(Dataset):
    # train : if true, return train data. If false, return validation data
    def __init__(self,train=True):
        df = pd.read_hdf('../data/interim/Sol_dataset.h5', key=f'Flt1003')
        
        X_100302 = df[df['LINE']==1003.02].drop(columns=['LINE','IGRFMAG1'])
        X_100308 = df[df['LINE']==1003.08].drop(columns=['LINE','IGRFMAG1'])
        
        y_100302 = df[df['LINE']==1003.02].loc[:,['IGRFMAG1']]
        y_100308 = df[df['LINE']==1003.08].loc[:,['IGRFMAG1']]
        
        # Create 5s time frame
        X_100302, y_100302 = create_timeframe(X_100302,y_100302)
        X_100308, y_100308 = create_timeframe(X_100308,y_100308)
        
        X_100302_train, X_100302_val, y_100302_train, y_100302_val = train_test_split(X_100302,y_100302,test_size=1/5,shuffle=False)
        X_100308_train, X_100308_val, y_100308_train, y_100308_val = train_test_split(X_100308,y_100308,test_size=1/5,shuffle=False)
        
        X_train = pd.concat([X_100302_train,X_100308_train],axis=0)
        y_train = pd.concat([y_100302_train,y_100308_train],axis=0)
        X_val   = pd.concat([X_100302_val,X_100308_val],axis=0)
        y_val   = pd.concat([y_100302_val,y_100308_val],axis=0)
        
        self.X_train = torch.tensor(X_train.to_numpy(),dtype=torch.float32)
        self.y_train = torch.tensor(y_train.to_numpy(),dtype=torch.float32)
        self.X_val   = torch.tensor(X_val.to_numpy(),dtype=torch.float32)
        self.y_val   = torch.tensor(y_val.to_numpy(),dtype=torch.float32)
        
        self.train = train
        
    def __len__(self):
        if self.train == True:
            return len(self.X_train)
        elif self.train == False:
            return len(self.X_val)
        
    def __getitem__(self,idx):
        if self.train == True:
            return self.X_train[idx], self.y_train[idx]
        elif self.train == False:
            return self.X_val[idx], self.y_val[idx]

In [5]:
train = ChallDatatset(train=True)
val = ChallDatatset(train=False)
print(len(train))
print(len(val))

65010
16254


In [6]:
train_loader  = DataLoader(train,
                           batch_size=64,
                           shuffle=False,
                           num_workers=8,
                           pin_memory=False)

val_loader    = DataLoader(val,
                           batch_size=64,
                           shuffle=False,
                           num_workers=2,
                           pin_memory=True)

In [7]:
def init_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)
        
class Chall_MLP(torch.nn.Module):
    
    def __init__(self):
        super(Chall_MLP, self).__init__()
        self.architecture = torch.nn.Sequential(
            torch.nn.Linear(55,50),
            torch.nn.Tanh(),
            torch.nn.Linear(50,30),
            torch.nn.Tanh(),
            torch.nn.Linear(30,10),
            torch.nn.Tanh(),
            torch.nn.Linear(10,1),
        )
        self.architecture.apply(init_weights)
        
        
    def forward(self, x):
        logits = self.architecture(x)
        return logits

model = Chall_MLP().to(device)

class RMSELoss(torch.nn.Module):
    
    def __init__(self):
        super(RMSELoss,self).__init__()
        
    def forward(self,yhat,y):
        criterion = torch.nn.MSELoss()
        loss = torch.sqrt(criterion(yhat,y)+1e-6)
        return loss 
    
criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3) 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',factor=0.9,patience=5,min_lr=1e-5)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter(f'runs/Chall_MLP_{timestamp}')

EPOCHS = 500

pbar = tqdm_notebook(total=EPOCHS,unit="epoch",desc='Training')

for epoch in range(EPOCHS):

    train_running_loss = 0.

    # Make sure gradient tracking is on, and do a pass over the data
    model.train()

    # Enumerate allow to track batch index and intra-epoch reporting 
    for i, (inputs, labels) in enumerate(train_loader):

        inputs, labels = inputs.to(device), labels.to(device)
        # Zero gradients for every batch
        optimizer.zero_grad()

        # Make prediction for this batch
        predictions = model(inputs)

        # Compute the loss and its gradients
        loss = criterion(predictions, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        train_running_loss += loss.detach().item() * inputs.size(0)
        

    avg_loss = train_running_loss/ len(train_loader.dataset)
    writer.add_scalar('training_loss',avg_loss,epoch)
    
#     # Desactivate layers such as dropout or batch-normalization
#     model.eval()

#     val_running_loss = 0.0
    
    
#     with torch.set_grad_enabled(False):
#         for i, (vinputs, vlabels) in enumerate(valconcat_loader):

#             voutputs = model(vinputs)
#             vloss = criterion(voutputs, vlabels)
#             val_running_loss += vloss.detach().item()* vinputs.size(0)

#     avg_vloss = val_running_loss/len(valconcat_loader.dataset)
    
#     scheduler.step(avg_vloss)
    
#     writer.add_scalar('validation_loss',avg_vloss,epoch)
    
    pbar.set_postfix(train_loss=avg_loss,lr=optimizer.param_groups[0]['lr'])
    pbar.update()

Training:   0%|          | 0/500 [00:00<?, ?epoch/s]

KeyboardInterrupt: 