In [18]:
import re 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split
import random 
import math
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset
from tqdm import tqdm_notebook
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import torch.nn.functional as F
import d2l
import time
import traceback
import fastprogress
from torchmetrics.classification import BinaryAccuracy, Accuracy 
import torch.nn.init as init
import torch.optim.lr_scheduler as lr_scheduler
from itertools import repeat


In [29]:
# Custom Dataset class for loading data
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        # Implement data retrieval for each index
        input_data = self.X[idx]
        target_data = self.y[idx]
        input_data = input_data.unsqueeze(0)
        
        # Convert data to torch tensors if required
        input_tensor = torch.Tensor(input_data)
        target_tensor = torch.Tensor(target_data)
        
        return input_tensor, target_tensor

In [30]:
#with scaling
df = pd.read_csv("C:/Users/kacpe/Desktop/study/research lab/data_model_v2.csv")
# List of column names to drop
columns_to_drop = ['lKnee_x','lKnee_y','lKnee_z','lAnkle_x','lAnkle_y','lAnkle_z','rKnee_x','rKnee_y','rKnee_z','rAnkle_x','rAnkle_y','rAnkle_z']
#columns_to_keep =  ['id', 'trial','lShoulder_x', 'lShoulder_y', 'lShoulder_z', 'lElbow_x', 'lElbow_y', 'lElbow_z', 'lWrist_x', 'lWrist_y', 'lWrist_z']
#df = df.drop(columns=columns_to_drop)
#df = df[columns_to_keep]
# Step 1: Separate 'id' and 'trial' columns from the rest of the data
data_to_scale = df.drop(columns=['id', 'trial'])

# Step 2: Apply MinMaxScaler to the remaining columns
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_to_scale)

# Convert the scaled data back to a DataFrame
scaled_df = pd.DataFrame(scaled_data, columns=data_to_scale.columns)

# Step 3: Merge 'id' and 'trial' columns with the scaled data
scaled_df[['id', 'trial']] = df[['id', 'trial']]

# Step 4: Split the data into training and test sets based on the 'trial' column
train_set = scaled_df[scaled_df['trial'].isin(range(1, 15))].drop(columns=['id', 'trial'])
test_set = scaled_df[scaled_df['trial']==15].drop(columns=['id', 'trial'])
val_set = scaled_df[scaled_df['trial']==16].drop(columns=['id', 'trial'])
full_set = scaled_df.drop(columns=['id','trial'])

# split data into x and y 
X_train, y_train = train_set.iloc[:,:-4], train_set.iloc[:,-4:]
X_test, y_test = test_set.iloc[:,:-4], test_set.iloc[:,-4:]
X_val, y_val = val_set.iloc[:,:-4], val_set.iloc[:,-4:]
X, y = full_set.iloc[:,:-4], full_set.iloc[:,-4:]

# Create custom datasets for training, validation, and testing
full_dataset = MyDataset(torch.tensor(X.values), torch.tensor(y.values))
train_dataset = MyDataset(torch.tensor(X_train.values), torch.tensor(y_train.values))
val_dataset = MyDataset(torch.tensor(X_val.values), torch.tensor(y_val.values))
test_dataset = MyDataset(torch.tensor(X_test.values), torch.tensor(y_test.values))

# Create a DataLoader
#batch_size = 5561#67  # Set your desired batch size
#shuffle = False  # Set to False to preserve the order of your data
#fullset_b_size = X.shape[0]/2
fullset_dataloader = DataLoader(full_dataset, batch_size=X.shape[0], shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=X_train.shape[0], shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=X_test.shape[0], shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=X_val.shape[0], shuffle=True)





In [4]:
#without scaling

df = pd.read_csv("C:/Users/kacpe/Desktop/study/research lab/data_model_v2.csv")
# List of column names to drop
columns_to_drop = ['id','lKnee_x','lKnee_y','lKnee_z','lAnkle_x','lAnkle_y','lAnkle_z','rKnee_x','rKnee_y','rKnee_z','rAnkle_x','rAnkle_y','rAnkle_z']
df = df.drop(columns=columns_to_drop)


# Step 4: Split the data into training and test sets based on the 'trial' column
train_set = df[df['trial'].isin(range(1, 15))].drop(columns='trial')
test_set = df[df['trial']==15].drop(columns='trial')
val_set = df[df['trial']==16].drop(columns='trial')
full_set = df.drop(columns='trial')

# split data into x and y 
X_train, y_train = train_set.iloc[:,:-4], train_set.iloc[:,-4:]
X_test, y_test = test_set.iloc[:,:-4], test_set.iloc[:,-4:]
X_val, y_val = val_set.iloc[:,:-4], val_set.iloc[:,-4:]
X, y = full_set.iloc[:,:-4], full_set.iloc[:,-4:]

In [119]:
#with scaling
df = pd.read_csv("C:/Users/kacpe/Desktop/study/research lab/data_model_v2.csv")
# List of column names to drop
#columns_to_drop = ['lKnee_x','lKnee_y','lKnee_z','lAnkle_x','lAnkle_y','lAnkle_z','rKnee_x','rKnee_y','rKnee_z','rAnkle_x','rAnkle_y','rAnkle_z']
#df = df.drop(columns=columns_to_drop)

# List of column names to keep
columns_to_keep = ['id', 'trial','lShoulder_x', 'lShoulder_y', 'lShoulder_z', 'lElbow_x', 'lElbow_y', 'lElbow_z', 'lWrist_x', 'lWrist_y', 'lWrist_z']

# Select the columns you want to keep
df = df[columns_to_keep]
# Step 1: Separate 'id' and 'trial' columns from the rest of the data
data_to_scale = df.drop(columns=['id', 'trial'])

# Step 2: Apply MinMaxScaler to the remaining columns
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data_to_scale)

# Convert the scaled data back to a DataFrame
scaled_df = pd.DataFrame(scaled_data, columns=data_to_scale.columns)

# Step 3: Merge 'id' and 'trial' columns with the scaled data
scaled_df[['id', 'trial']] = df[['id', 'trial']]

# Step 4: Split the data into training and test sets based on the 'trial' column
train_set = scaled_df[scaled_df['trial'].isin(range(1, 15))].drop(columns=['id', 'trial'])
test_set = scaled_df[scaled_df['trial']==15].drop(columns=['id', 'trial'])
val_set = scaled_df[scaled_df['trial']==16].drop(columns=['id', 'trial'])
full_set = scaled_df.drop(columns=['id','trial'])

# split data into x and y 
X_train, y_train = train_set.iloc[:,:-4], train_set.iloc[:,-4:]
X_test, y_test = test_set.iloc[:,:-4], test_set.iloc[:,-4:]
X_val, y_val = val_set.iloc[:,:-4], val_set.iloc[:,-4:]
X, y = full_set.iloc[:,:-4], full_set.iloc[:,-4:]

In [9]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [6]:
class GRUCell_net(nn.Module):
    def __init__(self, input_dim, hidden_dim, bias=True):
        super(GRUCell_net, self).__init__()
        self.input_dim = input_dim 
        self.hidden_dim = hidden_dim
        self.gru = nn.GRUCell(input_dim, hidden_dim, bias=True)
        #Xavier initialization for GRU weights
        for name, param in self.gru.named_parameters():
            if 'weight' in name:
                init.xavier_uniform_(param.data)
            elif 'bias' in name:
                init.constant_(param.data, 0.0)
                
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sotfplus = nn.Softplus()
        self.relu = nn.ReLU()
    
    def init_state(self, batch_size):
        return nn.Parameter(torch.zeros([batch_size, self.gru.hidden_size]))

        
    def forward(self, x, hidden_state=None, reverse=True):    
        hidden_states = []
        hidden_state = self.init_state(x.shape[0]) if hidden_state is None else hidden_state
        
        rng = range(x.shape[1])
        rng = reversed(rng) if reverse else rng
        
        for t in rng:
            input_ = x[:,t:t+1]
            hidden_state = self.gru(input_, hidden_state)
            hidden_states.append(hidden_state)
            
        hidden_states = torch.stack(hidden_states, 1)
        return hidden_states

In [10]:
class GRUNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2, bidirectional=False):
        super(GRUNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.gru = nn.GRUCell(input_dim, hidden_dim, bias=True)
        #Xavier initialization for GRU weights
        for name, param in self.gru.named_parameters():
            if 'weight' in name:
                init.xavier_uniform_(param.data)
            elif 'bias' in name:
                init.constant_(param.data, 0.0)
                
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sotfplus = nn.Softplus()
        self.relu = nn.ReLU()

        
        
    def forward(self, x, h):
        out, h = self.gru(x, h)
        out = self.fc(self.sotfplus(out[:,-1]))
        #out = self.fc(self.relu(out[:,-1]))
        out = F.softmax(out, dim=1)
        return out, h
    
    #def init_hidden(self, batch_size):
        #weight = next(self.parameters()).data
        #hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
        #return hidden
    def init_hidden(self, batch_size):
        if batch_size > 1:
            weight = next(self.parameters()).data
            hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
        else:
            weight = next(self.parameters()).data
            hidden = weight.new(self.n_layers, self.hidden_dim).zero_().to(device)
        return hidden

In [11]:
class LSTMNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.1):
        super(LSTMNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob, bidirectional=False)
            # Xavier initialization for LSTM weights
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                init.xavier_uniform_(param.data)
            elif 'bias' in name:
                init.constant_(param.data, 0.0)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.softplus = nn.Softplus()
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        out, (h, c) = self.lstm(x, h)  # Unpack both hidden state h and cell state c
        out = self.fc(self.softplus(out[:,-1]))
        #out = self.fc(self.relu(out[:, -1]))
        out = torch.sigmoid(out)
        return out, h, c  # Return both the output, hidden state, and cell state
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
        return hidden

In [305]:
input_shape = next(iter(train_dataloader))[0].shape
print("Input shape:", input_shape)

Input shape: torch.Size([6107, 1, 42])


In [28]:
def train_gru(train_loader, learn_rate, hidden_dim=32, epochs=5, model_type="GRU", threshold=0.5, criterion=nn.BCEWithLogitsLoss(),    input_dim = 42, n_layers = 10):
    
    # Setting common hyperparameters
    
    output_dim = 4
    
    # Instantiating the models
    if model_type == "GRU":
        model = GRUNet(input_dim, hidden_dim, output_dim, n_layers)
    else:
        model = LSTMNet(input_dim, hidden_dim, output_dim, n_layers)
    model.to(device)
    
    # Defining loss function and optimizer
    #criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
    
    model.train()
    print("Starting Training of {} model".format(model_type))
    epoch_times = []
    train_losses = []
    
    # Start training loop
    for epoch in range(1,epochs+1):
        start_time = time.perf_counter()
        h = model.init_hidden(batch_size)
        avg_loss = 0.
        counter = 0
        total_correct = 0
        total_samples = 0  
        for x, label in train_loader:
            counter += 1
            if model_type == "GRU":
                h = h.data
            else:
                h = tuple([e.data for e in h])
            model.zero_grad()
            
            out, h = model(x.to(device).float(), h)
            predicted_labels = (out > threshold).float()
            #accuracy = Accuracy(predicted_labels, label.to(device).float())
            loss = criterion(out, label.to(device).float())
            loss.backward()
            optimizer.step()
            avg_loss += loss.item()
            #avg_acc = accuracy.mean()
            # Compute accuracy
            #predicted_labels = (out > threshold).long()
            total_correct += (predicted_labels == label.to(device)).sum().item()
            total_samples += label.size(0)
            train_losses.append(avg_loss)
            if counter%200 == 0:
                print("Epoch {}......Step: {}/{}....... Average Loss for Epoch: {}".format(epoch, counter, len(train_loader), avg_loss/counter))
        current_time = time.perf_counter()
            # Calculate accuracy metric
        
    
    
        current_time = time.perf_counter()
        #print("Epoch {}/{} Done, avg Loss: {}, Accuracy: {}".format(epoch, epochs, avg_loss, avg_acc.item()))
        train_outputs, train_targets, train_accuracy, train_predictions = evaluate(model, train_loader, threshold)
        print("Epoch {}/{} Done, avg Loss: {}".format(epoch, epochs, avg_loss),"Train Accuracy: {:.2f}%".format(train_accuracy * 100))
        #print("Total Time Elapsed: {} seconds".format(str(current_time - start_time)))
        epoch_times.append(current_time - start_time)
        
        #print("Epoch {}/{} Done, Total Loss: {}".format(epoch, epochs, avg_loss/len(train_loader)))
        #print("Total Time Elapsed: {} seconds".format(str(current_time-start_time)))
        #epoch_times.append(current_time-start_time)
    print("Total Training Time: {} seconds".format(str(sum(epoch_times))),)
    return model, train_losses 

In [50]:
# training function with added gates for LSTM and GRU 
def train_lstm(train_loader, learn_rate, hidden_dim=32, epochs=5, model_type="GRU", threshold=0.5, criterion=nn.BCEWithLogitsLoss(), input_dim = 42, n_layers = 5, lr_scheduler=None):
    
    # Setting common hyperparameters
    output_dim = 4
    # Instantiating the models
    if model_type == "GRU":
        model = GRUNet(input_dim, hidden_dim, output_dim, n_layers)
    else:
        model = LSTMNet(input_dim, hidden_dim, output_dim, n_layers)
    model.to(device)
    
    # Defining loss function and optimizer
    #criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)

    if lr_scheduler is not None:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=2)
    
    model.train()
    print("Starting Training of {} model".format(model_type))
    epoch_times = []
    train_losses = []
    # Inside your training loop:
    gate_values_list = []
    hidden_state_list = []
    predictions_list = []
    outputs_list = []
    
    # Start training loop
    for epoch in range(1,epochs+1):
        start_time = time.perf_counter()
        h = model.init_hidden(batch_size)
        avg_loss = 0.
        counter = 0
        total_correct = 0
        total_samples = 0  
        for x, label in train_loader:
            counter += 1
            if model_type == "GRU":
                h = h.data
            else:
                h = tuple([e.data for e in h])
            model.zero_grad()
            
            out, h, c = model(x.to(device).float(), h)
            predicted_labels = (out > threshold).float()
            #accuracy = Accuracy(predicted_labels, label.to(device).float())
            loss = criterion(out, label.to(device).float())
            loss.backward()
            optimizer.step()

        # Save the gate values and hidden state values in the lists
            lstm_gates = model.lstm.named_parameters()
            for name, param in lstm_gates:
                if 'weight_ih' in name or 'weight_hh' in name:
                    gate_values = param.detach().cpu().numpy()  # Convert to numpy array
                    gate_values_list.append(gate_values)

            hidden_state_values = h.detach().cpu().numpy()  # Convert to numpy array
            hidden_state_list.append(hidden_state_values)


            avg_loss += loss.item()
            #avg_acc = accuracy.mean()
            # Compute accuracy
            #predicted_labels = (out > threshold).long()
            total_correct += (predicted_labels == label.to(device)).sum().item()
            total_samples += label.size(0)
            train_losses.append(avg_loss)
            train_outputs, train_targets, train_accuracy, train_predictions = evaluate(model, train_loader)
            if counter%200 == 0:
                print("Epoch {}......Step: {}/{}....... Average Loss for Epoch: {}".format(epoch, counter, len(train_loader), avg_loss/counter),"Train Accuracy: {:.2f}%".format(train_accuracy * 100))
        current_time = time.perf_counter()
        
            # Calculate accuracy metric
        
    
    
        current_time = time.perf_counter()
        #print("Epoch {}/{} Done, avg Loss: {}, Accuracy: {}".format(epoch, epochs, avg_loss, avg_acc.item()))
        print("Epoch {}/{} Done, avg Loss: {}".format(epoch, epochs, avg_loss))
        #print("Total Time Elapsed: {} seconds".format(str(current_time - start_time)))
        epoch_times.append(current_time - start_time)
        
        #print("Epoch {}/{} Done, Total Loss: {}".format(epoch, epochs, avg_loss/len(train_loader)))
        #print("Total Time Elapsed: {} seconds".format(str(current_time-start_time)))
        #epoch_times.append(current_time-start_time)
    print("Total Training Time: {} seconds".format(str(sum(epoch_times))))
    return model, train_losses, gate_values_list, hidden_state_list

In [51]:
def evaluate(model, test_loader, threshold=0.5):
    model.eval()
    outputs = []
    targets = []
    predictions = []
    
    with torch.no_grad():
        for x, label in test_loader:
            h = model.init_hidden(x.shape[0])
            out, h, c = model(x.to(device).float(), h)
            outputs.append(torch.sigmoid(out).cpu().detach().numpy())  # Apply sigmoid
            targets.append(label.cpu().detach().numpy())
            threshold = 0.5  # You can adjust this threshold as needed
            sigmoid_out = torch.sigmoid(out)
            binary_predictions = (sigmoid_out > threshold).long()
            predictions.append(binary_predictions.cpu().detach().numpy().reshape(-1))
    
    accuracy = calculate_accuracy(outputs, targets, threshold)
    print("Accuracy: {:.2f}%".format(accuracy * 100))
    return outputs, targets, accuracy, predictions 

def calculate_accuracy(outputs, targets, threshold=0.5):
    total_samples = 0
    total_correct = 0
    
    for output, target in zip(outputs, targets):
        predicted_labels = (output > threshold).astype(int)
        total_correct += (predicted_labels == target).sum()
        total_samples += target.size
    
    accuracy = total_correct / total_samples
    return accuracy

In [308]:
X_train.shape

(6107, 42)

In [309]:
threshold = 0.5
type(threshold)

float

In [140]:
batch_size=X.shape[0]
pos_weight = torch.tensor([1.5])
criterion = nn.BCEWithLogitsLoss()#weight=pos_weight)
#criterion = nn.BCELoss(weight=pos_weight)
lr = 0.1
LSTM_model, losses, gate_values_list, hidden_state_list= train_gru(fullset_dataloader, lr, model_type="GRU",hidden_dim=24, epochs=1000, criterion = criterion, input_dim=54
                                                                    ,n_layers = 1)

Starting Training of GRU model
Accuracy: 62.48%
Epoch 1/1000 Done, avg Loss: 0.6934271454811096 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 2/1000 Done, avg Loss: 0.6581339836120605 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 3/1000 Done, avg Loss: 0.639103889465332 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 4/1000 Done, avg Loss: 0.6322697401046753 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 5/1000 Done, avg Loss: 0.6320079565048218 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 6/1000 Done, avg Loss: 0.6320298314094543 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 7/1000 Done, avg Loss: 0.6320427060127258 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 8/1000 Done, avg Loss: 0.6320475339889526 Train Accuracy: 62.48%
Accuracy: 62.48%
Epoch 9/1000 Done, avg Loss: 0.6320494413375854 Train Accuracy: 62.48%
Accuracy: 66.26%
Epoch 10/1000 Done, avg Loss: 0.6320502161979675 Train Accuracy: 66.26%
Accuracy: 68.09%
Epoch 11/1000 Done, avg Loss: 0.6320505738258362 Train Accuracy: 68.09%


ValueError: not enough values to unpack (expected 4, got 2)

In [141]:
outputs, targets, accuracy, predictions = evaluate(LSTM_model, fullset_dataloader)

ValueError: too many values to unpack (expected 2)

In [136]:
batch_size=X.shape[0]
pos_weight = torch.tensor([1.5])
criterion = nn.BCEWithLogitsLoss()#weight=pos_weight)
#criterion = nn.BCELoss(weight=pos_weight)
lr = 0.1
LSTM_model, losses, gate_values_list, hidden_state_list= train_lstm(fullset_dataloader, lr, model_type="LSTM",hidden_dim=24, epochs=1000, criterion = criterion, input_dim=54
                                                                    ,n_layers = 1)



Starting Training of LSTM model
Accuracy: 62.48%
Epoch 1/1000 Done, avg Loss: 0.6706575751304626
Accuracy: 62.48%
Epoch 2/1000 Done, avg Loss: 0.6284540295600891
Accuracy: 62.48%
Epoch 3/1000 Done, avg Loss: 0.6169496774673462
Accuracy: 62.48%
Epoch 4/1000 Done, avg Loss: 0.6150909662246704
Accuracy: 62.48%
Epoch 5/1000 Done, avg Loss: 0.6140050888061523
Accuracy: 62.48%
Epoch 6/1000 Done, avg Loss: 0.6132603287696838
Accuracy: 62.48%
Epoch 7/1000 Done, avg Loss: 0.6124299764633179
Accuracy: 62.48%
Epoch 8/1000 Done, avg Loss: 0.6094207167625427
Accuracy: 62.48%
Epoch 9/1000 Done, avg Loss: 0.6085997223854065
Accuracy: 62.48%
Epoch 10/1000 Done, avg Loss: 0.6062278151512146
Accuracy: 62.48%
Epoch 11/1000 Done, avg Loss: 0.6046302914619446
Accuracy: 62.48%
Epoch 12/1000 Done, avg Loss: 0.6030977368354797
Accuracy: 62.48%
Epoch 13/1000 Done, avg Loss: 0.6018347144126892
Accuracy: 62.48%
Epoch 14/1000 Done, avg Loss: 0.5992864966392517
Accuracy: 62.48%
Epoch 15/1000 Done, avg Loss: 0.5975

In [57]:
outputs, targets, accuracy, predictions = evaluate(LSTM_model, val_dataloader)

Accuracy: 67.46%


In [62]:
predictions

[array([1, 1, 0, ..., 1, 0, 0], dtype=int64)]

In [69]:
for i in np.arange(len(predictions[0])):
    print(predictions[0][i])

1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
0


In [311]:
hidden_states[0][0].shape

(6952, 12)

In [102]:
batch_size = 32
#batch_size=X.shape[0]
pos_weight = torch.tensor([1.3])
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
GRU_model, losses_gru, gates_gru, hidden_states_gru = train_lstm(train_dataloader, lr, model_type="LSTM",hidden_dim=24, epochs=2500, criterion = criterion, input_dim=56)

Starting Training of LSTM model


RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

In [None]:
def accuracy(correct, total): 
    """Compute accuracy as percentage.

    Args:
        correct (int): Number of samples correctly predicted.
        total (int): Total number of samples

    Returns:
        float: Accuracy
    """
    return float(correct)/total

In [None]:
def evaluate(model, test_x, test_y):
    model.eval()
    outputs = []
    targets = []
    predictions = []  # Add this list to store the binary predictions

    start_time = time.perf_counter()
    for i in test_x.keys():
        inp = torch.from_numpy(np.array(test_x[i]))
        labs = torch.from_numpy(np.array(test_y[i]))
        h = model.init_hidden(inp.shape[0])
        out, h = model(inp.to(device).float(), h)
        #outputs.append(label_scalers[i].inverse_transform(out.cpu().detach().numpy()).reshape(-1))
        #targets.append(label_scalers[i].inverse_transform(labs.numpy()).reshape(-1))
        
        # Apply sigmoid and threshold to get binary predictions
        sigmoid_out = torch.sigmoid(out)
        threshold = 0.5  # You can adjust this threshold as needed
        binary_predictions = (sigmoid_out > threshold).long()
        predictions.append(binary_predictions.cpu().detach().numpy().reshape(-1))
        
    print("Evaluation Time: {}".format(str(time.perf_counter()-start_time)))
    sMAPE = 0
    for i in range(len(outputs)):
        sMAPE += np.mean(abs(outputs[i]-targets[i])/(targets[i]+outputs[i])/2)/len(outputs)
    print("sMAPE: {}%".format(sMAPE*100))
    return outputs, targets, sMAPE, predictions  # Return the binary predictions as well

In [None]:
X_train_dict = X_train.to_dict(orient='dict')
y_train_dict = y_train.to_dict(orient='dict')

In [None]:
def evaluate(model, test_loader, threshold=0.5):
    model.eval()
    outputs = []
    targets = []
    predictions = []
    
    with torch.no_grad():
        for x, label in test_loader:
            h = model.init_hidden(x.shape[0])
            out, h, c= model(x.to(device).float(), h)
            outputs.append(F.sigmoid(out).cpu().detach().numpy())  # Apply sigmoid
            targets.append(label.cpu().detach().numpy())
            threshold = 0.5  # You can adjust this threshold as needed
            sigmoid_out = torch.sigmoid(out)
            binary_predictions = (sigmoid_out > threshold).long()
            predictions.append(binary_predictions.cpu().detach().numpy().reshape(-1))
    
    accuracy = calculate_accuracy(outputs, targets, threshold)
    print("Accuracy: {:.2f}%".format(accuracy * 100))
    return outputs, targets, accuracy, predictions 

def calculate_accuracy(outputs, targets, threshold=0.5):
    total_samples = 0
    total_correct = 0
    
    for output, target in zip(outputs, targets):
        predicted_labels = (output > threshold).astype(int)
        total_correct += (predicted_labels == target).sum()
        total_samples += target.size
    
    accuracy = total_correct / total_samples
    return accuracy

In [None]:
outputs, targets, accuracy, predictions = evaluate(LSTM_model, train_dataloader)

In [None]:
predictions

In [None]:
sum(predictions[0]==0)


In [None]:
np.mean(outputs[0])

In [None]:
targets

In [None]:
batch_size = X.shape[0]
criterion = nn.BCEWithLogitsLoss()
lr = 0.001
#LSTM_model, losses_LSTM_1000 = train(fullset_dataloader, lr, model_type="LSTM",hidden_dim=24, epochs=1500, criterion = criterion)

In [None]:
outputs, targets, accuracy = evaluate(LSTM_model, fullset_dataloader)

In [None]:
outputs

In [None]:
targets

In [36]:
def train(train_loader, learn_rate, hidden_dim=256, EPOCHS=5, model_type="GRU", input_dim=42, ):
    
    # Setting common hyperparameters
    
    output_dim = 4
    n_layers = 2
    # Instantiating the models
    if model_type == "GRU":
        model = GRUNet(input_dim, hidden_dim, output_dim, n_layers)
    else:
        model = LSTMNet(input_dim, hidden_dim, output_dim, n_layers)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
    # Defining loss function and optimizer
    pos_weight = torch.tensor([1.3])
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    
    model.train()
    print("Starting Training of {} model".format(model_type))
    epoch_times = []
    # Start training loop
    for epoch in range(1,EPOCHS+1):
        start_time = time.perf_counter()
        h = model.init_hidden(batch_size)
        avg_loss = 0.
        counter = 0
        for x, label in train_loader:
            counter += 1
            if model_type == "GRU":
                h = h.data
            else:
                h = tuple([e.data for e in h])
            model.zero_grad()
            
            out, h = model(x.to(device).float(), h)
            loss = criterion(out, label.to(device).float())
            loss.backward()
            optimizer.step()
            avg_loss += loss.item()
            if counter%200 == 0:
                print("Epoch {}......Step: {}/{}....... Average Loss for Epoch: {}".format(epoch, counter, len(train_loader), avg_loss/counter))
        current_time = time.perf_counter()
        print("Epoch {}/{} Done, Total Loss: {}".format(epoch, EPOCHS, avg_loss/len(train_loader)))
        print("Total Time Elapsed: {} seconds".format(str(current_time-start_time)))
        epoch_times.append(current_time-start_time)
    print("Total Training Time: {} seconds".format(str(sum(epoch_times))))
    return model

In [139]:
def evaluate(model, test_loader, threshold=0.5):
    model.eval()
    outputs = []
    targets = []
    predictions = []
    
    with torch.no_grad():
        for x, label in test_loader:
            h = model.init_hidden(x.shape[0])
            out, h= model(x.to(device).float(), h)
            outputs.append(torch.sigmoid(out).cpu().detach().numpy())  # Apply sigmoid
            targets.append(label.cpu().detach().numpy())
            threshold = 0.5  # You can adjust this threshold as needed
            sigmoid_out = torch.sigmoid(out)
            binary_predictions = (sigmoid_out > threshold).long()
            predictions.append(binary_predictions.cpu().detach().numpy().reshape(-1))
    
    accuracy = calculate_accuracy(outputs, targets, threshold)
    print("Accuracy: {:.2f}%".format(accuracy * 100))
    return outputs, targets, accuracy, predictions 

def calculate_accuracy(outputs, targets, threshold=0.5):
    total_samples = 0
    total_correct = 0
    
    for output, target in zip(outputs, targets):
        predicted_labels = (output > threshold).astype(int)
        total_correct += (predicted_labels == target).sum()
        total_samples += target.size
    
    accuracy = total_correct / total_samples
    return accuracy

In [39]:
pos_weight = torch.tensor([100])
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
lr = 0.001
LSTM_model= train_lstm(fullset_dataloader, lr, model_type="LSTM",hidden_dim=1000, epochs=1000, input_dim=42, criterion=criterion,lr_scheduler=True)

TypeError: train_lstm() got an unexpected keyword argument 'lr_scheduler'

In [53]:
outputs, targets, accuracy, predictions = evaluate(LSTM_model, val_dataloader)
predictions

Accuracy: 82.54%


[array([1, 1, 1, ..., 1, 1, 1], dtype=int64)]

Training Functions

In [None]:
class CoreAndReadout(nn.Module):
    def __init__(self, core, readout, nonlinearity=None):
        super().__init__()
        self.core = core
        self.readout = readout
        self.nonlinearity = elu1 if nonlinearity is None else nonlinearity
        
    def forward(self, x):
        latents = self.core(x)
        output = self.readout(latents)
        return self.nonlinearity(output), latents

In [21]:
class GRUCell_net(nn.Module):
    def __init__(self, input_dim, hidden_dim, bias=True):
        super(GRUCell_net, self).__init__()
        self.input_dim = input_dim 
        self.hidden_dim = hidden_dim
        self.gru = nn.GRUCell(input_dim, hidden_dim, bias=True)
        #Xavier initialization for GRU weights
                
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sotfplus = nn.Softplus()
        self.relu = nn.ReLU()
    
    def init_state(self, batch_size):
        return nn.Parameter(torch.zeros([batch_size, self.gru.hidden_size]))

        
    def forward(self, x, hidden_state=None, reverse=True):    
        hidden_states = []
        hidden_state = self.init_state(x.shape[0]) if hidden_state is None else hidden_state
        
        rng = range(x.shape[1])
        rng = reversed(rng) if reverse else rng
        
        for t in rng:
            input_ = x[:,t:t+1]
            hidden_state = self.gru(input_, hidden_state)
            hidden_states.append(hidden_state)
            
        hidden_states = torch.stack(hidden_states, 1)
        return hidden_states

In [None]:
def train(dataloader, model, optimizer, loss_fn, scheduler=None, device=None):
    epoch_loss = []
    epoch_correct, epoch_total = 0, 0
    
    model.train()
    for x, y in dataloader:
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        
        y_pred, _ = model(x)
        
        loss = loss_fn(y_pred, y)
        epoch_loss.append(loss.item())
        
        y_pred = torch.round(y_pred)
        epoch_correct += sum((y == y_pred).flatten()).item()
        epoch_total += y.numel()
        
        loss.backward()
        optimizer.step()
        if scheduler:
            scheduler.step()
    
    return np.mean(epoch_loss), accuracy(epoch_correct, epoch_total)

In [None]:
def validate(dataloader, model, loss_fn, device=None):
    epoch_loss = []
    epoch_correct, epoch_total = 0, 0
    
    model.eval()
    with torch.no_grad():
        for x, y in dataloader:
            x = x.to(device)
            y = y.to(device)
            
            y_pred, _ = model(x)
            
            loss = loss_fn(y_pred, y)
            epoch_loss.append(loss.item())

            y_pred = torch.round(y_pred)
            epoch_correct += sum((y == y_pred).flatten())
            epoch_total += y.numel()
    
    return np.mean(epoch_loss), accuracy(epoch_correct, epoch_total)

In [9]:
def run_training(train_dataloader, val_dataloader, model, optimizer, loss_fn, num_epochs, scheduler=None, device=None, schedule_on_train=True, verbose=True):
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    
    for epoch in range(num_epochs):
        epoch_train_loss, epoch_train_acc = train(train_dataloader, model, optimizer, loss_fn, scheduler, device)
        
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)
        
        if val_dataloader is not None:
            epoch_val_loss, epoch_val_acc = validate(val_dataloader, model, loss_fn, device)
        
            val_losses.append(epoch_val_loss)
            val_accs.append(epoch_val_acc)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(epoch_train_acc if schedule_on_train or val_dataloader is None else epoch_val_acc)
            
        if epoch % 1000 == 0:
            val_str = f", val loss: {epoch_val_loss}, val acc: {epoch_val_acc}" if val_dataloader is not None else ""
            print(f"Epoch {epoch}, train loss: {epoch_train_loss}, train acc: {epoch_train_acc}{val_str}")
            
    return train_losses, train_accs, val_losses, val_accs

In [27]:
class GRUCell_net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob=0.2, bidirectional=False):
        super(GRUNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # Create a list of GRUCell layers
        self.gru_cells = nn.ModuleList()
        for _ in range(n_layers):
            self.gru_cells.append(nn.GRUCell(input_dim, hidden_dim))
            input_dim = hidden_dim  # Update input_dim for subsequent layers
        
        # Other layers and initialization code
        
    def forward(self, x, h):
        h_states = []  # To store hidden states for each layer
        
        for layer_idx in range(self.n_layers):
            h = self.gru_cells[layer_idx](x, h)
            h_states.append(h)
            x = h  # Update x with the new hidden state
        
        out = self.fc(self.sotfplus(h))  # Use the last hidden state for the output
        out = F.softmax(out, dim=1)
        return out, h_states
    
    def init_hidden(self, batch_size):
        if batch_size > 1:
            weight = next(self.parameters()).data
            hidden = [weight.new(batch_size, self.hidden_dim).zero_().to(device) for _ in range(self.n_layers)]
        else:
            weight = next(self.parameters()).data
            hidden = [weight.new(self.hidden_dim).zero_().to(device) for _ in range(self.n_layers)]
        return hidden

In [31]:
batch_size=X.shape[0]
pos_weight = torch.tensor([1.5])
criterion = nn.BCEWithLogitsLoss()#weight=pos_weight)
#criterion = nn.BCELoss(weight=pos_weight)
lr = 0.1
model, losses, gate_values_list, hidden_state_list= train_gru(fullset_dataloader, lr, model_type="GRU",hidden_dim=24, epochs=1000, criterion = criterion, input_dim=54
                                                                    ,n_layers = 1)

NameError: name 'GRUNet' is not defined