In [1]:
import glob
import os
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl  
import torchmetrics
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = "star_10000_1_5.csv"
mdl = 'gru'
wandb = True
sq = 5

cfg = dict(
    num_classes = 1,
    wandb = 'sdn',
    epochs = 25,
    lr = 1e-4,
    mdl = mdl,
    rnnh = 512,
    rnnl = 2,
    drp = 0.5,
    seq = sq,
    name = mdl + "_" + ds.split('.')[0] + "_" + str(sq),
    ds = ds
)

dataset = "../input/sdn-metrics/" + cfg["ds"]
torch.manual_seed(10)
np.random.seed(10)
# torch.set_default_tensor_type(torch.DoubleTensor)
torch.set_default_tensor_type(torch.FloatTensor)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
if wandb:
    import wandb
    from kaggle_secrets import UserSecretsClient

    os.environ["WANDB_SILENT"] = "true"

    user_secrets = UserSecretsClient()
    key = user_secrets.get_secret("wandb_key")
    wandb.login(key=key)

    run = wandb.init(project="sdn", entity="manoj312", name= cfg['name'] , config = cfg )

In [None]:
df = pd.read_csv(dataset)
df.head()

In [None]:
df[df['id'] == 0].sort_values(by="Sno").to_numpy()[:,2:]

In [None]:
df["id"].unique().shape[0]

In [None]:
class metrics(Dataset):
    def __init__(self , df ):
        self.df = df
    
    def __len__( self ):
        return df["id"].unique().shape[0] - cfg['seq']
    
    def __getitem__(self, i):
        x = []
        i += cfg['seq']
        for j in range(cfg['seq']):
            x.append(df[df['id'] == i-j].sort_values(by="Sno").to_numpy()[:,2:])
            
        x = np.array(x)
        x = np.swapaxes(x,0,1)
        x = torch.from_numpy(x)
        
        y = df[df['id'] == i].sort_values(by="Sno").to_numpy()[:,2:]
        y = torch.from_numpy(y)
        
        # return x.double().cuda() , y.double().cuda()
        return x.float().cuda() , y.float().cuda()

In [None]:
metrics_data = metrics(df)
metrics_data[0][0].shape , metrics_data[0][1].shape

In [3]:
class RNNModel(pl.LightningModule):
    def __init__(self ):
        super().__init__()
    
        num_features = 3
        self.rnn = nn.GRU(num_features, cfg['rnnh'] , cfg['rnnl'] , batch_first=True)
        self.dropout = nn.Dropout(cfg['drp'])
        self.fc = nn.Linear(cfg['rnnh'], num_features )
        
        self.lr = cfg['lr']
        self.ls = nn.MSELoss()
        self.metrics = {
#             "mae" :  torchmetrics.MeanAbsoluteError() ,
            "cos_sim" : torchmetrics.CosineSimilarity(),
        }
        self.vmetrics = {
#             "mae" :  torchmetrics.MeanAbsoluteError() ,
            "cos_sim" : torchmetrics.CosineSimilarity(),
        }
        
    def loss( self, x , y ):        
        return self.ls(x, y)
    
    def forward(self, x ):
        out,x = self.rnn(x)
        out = out[:,-1,:]
        out = self.fc( out )
        return out
    
    def predict_step(self, batch, batch_idx: int , dataloader_idx: int = None):
        return self(batch)
            
    def training_step(self, dt , bid ):
        x, target = dt
        x = x.to(device)
        target = target.to(device)
        
        out = self.forward( x[0] )
        loss = self.loss( out   , target[0] )
        
        for i in self.metrics :
            self.metrics[i](out , target[0]) 
            
        return {"loss":loss,"out": out,"target" : target }
    
    def training_epoch_end(self ,x):
        v = 0
        for i in x: v += i['loss']
        dt = {"train/loss": v/len(x) } 
        for i in self.metrics:
            dt["train/" + i] = self.metrics[i].compute()
            
        if wandb : wandb.log( dt , step = self.current_epoch )
        
        
    def validation_step(self, dt, bid ):
        x, target = dt
        x = x.to(device)
        target = target.to(device)
        
        out = self.forward( x[0] )
        loss = self.loss( out   , target[0] )
        
        for i in self.vmetrics :
            self.vmetrics[i](out , target[0]) 
        
        return {"loss":loss,"out": out,"target" : target }
    
    def validation_epoch_end(self , x):
        v = 0
        for i in x: v += i['loss']
            
        dt = {"val/loss": v/len(x) } 
        for i in self.vmetrics:
            dt["val/" + i] = self.vmetrics[i].compute()
            
        if wandb : wandb.log(dt, step = self.current_epoch )
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW( self.parameters() , lr=self.lr  )
        return optimizer
    
md = RNNModel()

In [None]:
train = df.sample(frac=0.8)
test = df.drop( train.index )

train_loader = DataLoader( metrics(train) , batch_size=1)
val_loader = DataLoader( metrics(test) , batch_size=1)

In [None]:
# train_loader = train_loader.to(device)
# val_loader = val_loader.to(device)
md = md.to(device)

In [None]:
trainer = pl.Trainer(max_epochs = cfg['epochs'] , gpus=-1  )
trainer.fit( md , train_loader , val_loader )
torch.save(md , cfg['name'] + ".pt")

In [None]:
x = metrics_data[0][0].to("cpu")
md = md.to("cpu")
metrics_data[0][0].shape , metrics_data[0][1].shape, x.shape

In [None]:
# Export the model
torch.onnx.export(md,               
                  x,                         
                  "gru_model.onnx",   
                  export_params=True,        
                  opset_version=10,          
                  do_constant_folding=True,  
                  input_names = ['input'],   
                  output_names = ['output'], 
                  dynamic_axes={'input' : {0 : 'batch_size'},   
                                'output' : {0 : 'batch_size'}})