<a href="https://colab.research.google.com/github/Marouen07/Dreem-Deep-Sleep-Net/blob/master/ColanNotebooks/Dreem_deepSleepNet_NN_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import files
upload=files.upload()
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!kaggle competitions download -c dreem-sleep-stages-2020 -p /content
!unzip \X_train.h5.zip

In [0]:
import h5py
import pandas as pd 
import numpy as np 
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt


In [0]:
def Create_dataset(measures,f):
  """getting data from h5py file
    Parameters:
    -------------
    measures: List of sensors to take data from 
    f: H5py file containing the dataset ( memory efficient)

    Output :
    -------------
    df : DataFrame containing the formatted data 
    y  : Dataframe of corrresponding Labels 
    """

  df=pd.concat([pd.DataFrame(f[i][:]) for i in measures],ignore_index=True)
  y=pd.read_csv('y_train.csv')
  y=y['sleep_stage'].values
  #Standardizing Input
  df=(df.sub(df.mean(axis=1),axis=0)).div(df.std(axis=1),axis=0)#(df.max(axis=1)-df.min(axis=1))
  #reformatting the data so that we can feed it to our Neural Network Later on 
  n_measures=len(measures)
  n_samples=df.shape[0]//n_measures
  df['id']=df.index%n_samples
  df['channel']=[measures[i] for i in df.index//n_samples]
  df=df.set_index(['id','channel'])
  df=df.sort_index()
  return df,y,n_measures,n_samples

In [0]:
def create_loader(X,y,batch_size,shuffle=True):
  """Creating DataLoaders to feed our NN with Data 
  """
  data=TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y))
  loader = DataLoader(data, shuffle=shuffle, batch_size=batch_size)
  return loader

In [0]:
def Oversample(df,y,shape):
  """Oversampling Data to avoid Class inbalance issue 
  Parmaeters : 
  ------------
  df: dataset 
  y: Labels
  shape: for 1d Conv (n_samples,N_channels(=N_measures),Length(Sampling_Frequency*Duration))
         for 2d Conv (n_samples,N_channels(=1),N_measures,Length(Sampling_Frequency*Duration))
  Output: 
  -----------
  oversampled splitted data (Train and Validation)
  """
  X=df.values.reshape(shape[0],-1)
  X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.2)
  X_resampled, y_resampled = SMOTE(n_jobs=-1).fit_resample(X_train, y_train)
 #Adapting Data Shape 
  X_resampled=X_resampled.reshape((X_resampled.shape[0],)+shape[1:])
  X_val=X_val.reshape((X_val.shape[0],)+shape[1:])
  return X_resampled,y_resampled,X_val,y_val

In [0]:
def Train_model(model,criterion,optimizer,loader,scheduler,mode='train'):
  """Model Training/Validation 
  ------------------------------
  Output: 
  -----------
  mean_loss: Mean loss over Epoch"""

  if mode=='train':
    model.train()
  elif mode=='val': 
    model.eval()
  else: 
    print('mode should be Train or Val')
    return
  losses=[]
  correct = 0
  total = 0
  for inputs, labels in loader:
    inputs, labels = inputs.to(device), labels.to(device)
    model.init_hidden(inputs.size(0))
    optimizer.zero_grad()
    with torch.set_grad_enabled(mode=='train'):
      output = model(inputs)
      loss = criterion(output, labels)
      _,pred=torch.max(output, 1)
      if mode=='train':
        loss.backward()
        optimizer.step()
    losses.append(loss.item())
    correct+=float(torch.sum(pred==labels.data))
    total+=float(inputs.shape[0])
  if mode =='train':
    scheduler.step()
  mean_loss=np.mean(losses)
  print('{} Loss {:.4f}  Acc : {:.2%}  '.format(mode,mean_loss,correct/total))
  return mean_loss

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import lr_scheduler
class Classifier(nn.Module):
    def __init__(self, n_measures,fs, drop=.5,lstm=False):
        super().__init__()
        self.measures=n_measures
        self.lstm_bool= lstm 
        self.n_layers=1
        self.temporal = nn.Sequential(
            nn.Conv1d(self.measures,128, fs//2,fs//10),nn.ReLU(inplace=True),nn.BatchNorm1d(128),nn.MaxPool1d(8,stride=8),nn.Dropout(drop),
            nn.Conv1d(128,128, 8,stride=1),nn.ReLU(inplace=True),nn.BatchNorm1d(128),
            nn.Conv1d(128,128, 8,stride=1),nn.ReLU(inplace=True),nn.BatchNorm1d(128),
            nn.Conv1d(128,128, 8,stride=1),nn.ReLU(inplace=True),nn.BatchNorm1d(128),nn.MaxPool1d(4,stride=4),
            nn.Dropout(drop))
        
        self.freq = nn.Sequential(
            nn.Conv1d(self.measures,128, fs*4,stride=fs//3),nn.ReLU(inplace=True),nn.BatchNorm1d(128),nn.MaxPool1d(4,stride=4),nn.Dropout(drop),
            nn.Conv1d(128,128, 6,stride=1),nn.ReLU(inplace=True),nn.BatchNorm1d(128),
            nn.Conv1d(128,128, 6,stride=1),nn.ReLU(inplace=True),nn.BatchNorm1d(128),
            nn.Conv1d(128,128, 6,stride=1),nn.ReLU(inplace=True),nn.BatchNorm1d(128),nn.MaxPool1d(2,stride=2),
            nn.Dropout(drop))
        
        self.hidden_size=10*2
        self.lstm=nn.LSTM(input_size=6,hidden_size=self.hidden_size,
                    num_layers=self.n_layers,dropout=drop,batch_first=True,
                    bidirectional =True)
        self.post_lstm=nn.Sequential(nn.Linear(2560*2,5))
        self.lin=nn.Sequential(nn.Linear(768,5))

    def init_hidden(self, batch_size):
        # even with batch_first = True this remains same as docs
        hidden_state = torch.zeros(self.n_layers*2,batch_size,self.hidden_size)
        cell_state = torch.zeros(self.n_layers*2,batch_size,self.hidden_size)
        self.hidden = (hidden_state.to(device), cell_state.to(device))
    def forward(self, x):
        x_t = self.temporal(x)
        x_f=self.freq(x)
        #print(x_t.shape,'  ',x_f.shape)
        if self.lstm_bool:
            x=torch.cat([x_f, x_t], dim=2)
            
            x,self.hidden=self.lstm(x,self.hidden)
            x=x.contiguous().view(x.size(0), -1)
            return self.post_lstm(x)
        x_t=x_t.view(x_t.size(0), -1)
        x_f=x_f.view(x_f.size(0), -1)
        x=torch.cat([x_f, x_t], dim=1)
        return self.lin(x)

In [64]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()
# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [0]:
f= h5py.File('X_train.h5', 'r')
L=[i for i in f.keys()]
df_eeg,y,n_measures,n_samples=Create_dataset(L[:1],f)
F_s=50 #Sampling Frequency
shape=(n_samples,n_measures,F_s*30)
X_train,y_train,X_val,y_val=Oversample(df_eeg,y,shape)

In [0]:
batch_size=256
train_loader=create_loader(X_train,y_train,batch_size)
val_loader=create_loader(X_val,y_val,batch_size)

In [125]:
classifier = Classifier(n_measures,F_s,lstm=False)
classifier.to(device)
optimizer = optim.Adam(classifier.parameters(),lr=0.1)#,weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler=lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.1)

  "num_layers={}".format(dropout, num_layers))


In [130]:
epochs=10
Losses_v=[]
Losses_t=[]
for epoch in range(epochs):
  print('epoch: ',epoch)
  Losses_t+=Train_model(classifier,criterion,optimizer,train_loader,scheduler,mode='train')
  Losses_v+=Train_model(classifier,criterion,optimizer,val_loader,scheduler,mode='val')

epoch:  0
train Loss 0.9344  Acc : 62.79%  
val Loss 1.0551  Acc : 59.01%  
epoch:  1
train Loss 0.9293  Acc : 63.06%  
val Loss 1.0547  Acc : 59.15%  
epoch:  2
train Loss 0.9311  Acc : 62.96%  
val Loss 1.0556  Acc : 59.28%  
epoch:  3
train Loss 0.9313  Acc : 63.06%  
val Loss 1.0540  Acc : 59.23%  
epoch:  4
train Loss 0.9318  Acc : 63.14%  
val Loss 1.0608  Acc : 59.09%  
epoch:  5
train Loss 0.9305  Acc : 62.85%  
val Loss 1.0506  Acc : 59.25%  
epoch:  6
train Loss 0.9338  Acc : 62.87%  
val Loss 1.0591  Acc : 59.30%  
epoch:  7
train Loss 0.9294  Acc : 62.72%  
val Loss 1.0476  Acc : 59.21%  
epoch:  8
train Loss 0.9297  Acc : 63.24%  
val Loss 1.0606  Acc : 58.85%  
epoch:  9
train Loss 0.9294  Acc : 62.97%  
val Loss 1.0732  Acc : 58.95%  


In [0]:
!unzip \X_test.h5.zip

Archive:  X_test.h5.zip
  inflating: X_test.h5               


In [0]:
import h5py
import pandas as pd 
f= h5py.File('X_test.h5', 'r')