In [24]:
import sys
import setproctitle
sys.path.append('../src')

import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from torch.optim import SGD

from Models import MLP
from dataset import load_data, get_dataset
from Trainer import MLPTrainer
from sklearn import metrics
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt


In [2]:
# Load the saved model state
model = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model.load_state_dict(torch.load('../trained_models/base_mlp_model_49_64_64_7.pth'))
model.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [3]:
_, _, test_loader = load_data(batch_size=64)

In [4]:
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

y_preds_tensor = torch.tensor(y_preds)
y_trues_tensor = torch.tensor(y_trues)
y_trues_tensor = torch.flatten(y_trues_tensor)

In [9]:
cr = metrics.classification_report(y_trues, y_preds, output_dict=True, zero_division=0.0)
cr

{'0': {'precision': 0.9988852549181374,
  'recall': 0.9992606511673606,
  'f1-score': 0.9990729177794723,
  'support': 407115.0},
 '1': {'precision': 1.0,
  'recall': 0.31155778894472363,
  'f1-score': 0.47509578544061304,
  'support': 398.0},
 '2': {'precision': 0.9934328358208955,
  'recall': 0.9952153110047847,
  'f1-score': 0.9943232745742456,
  'support': 1672.0},
 '3': {'precision': 0.9985804317983277,
  'recall': 0.9979888372673131,
  'f1-score': 0.9982845468864526,
  'support': 64142.0},
 '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 8.0},
 '5': {'precision': 0.9824029126213593,
  'recall': 0.9988542217521593,
  'f1-score': 0.9905602657110393,
  'support': 11346.0},
 '6': {'precision': 0.9771689497716894,
  'recall': 0.9224137931034483,
  'f1-score': 0.9490022172949002,
  'support': 464.0},
 'accuracy': 0.9984149068835091,
 'macro avg': {'precision': 0.8500671978472013,
  'recall': 0.7464700861771129,
  'f1-score': 0.7723341439552461,
  'support': 485145.0}

In [11]:
cr.keys()

dict_keys(['0', '1', '2', '3', '4', '5', '6', 'accuracy', 'macro avg', 'weighted avg'])

In [12]:
cr['0']

{'precision': 0.9988852549181374,
 'recall': 0.9992606511673606,
 'f1-score': 0.9990729177794723,
 'support': 407115.0}

In [13]:
cr['1']

{'precision': 1.0,
 'recall': 0.31155778894472363,
 'f1-score': 0.47509578544061304,
 'support': 398.0}

In [14]:
cr['2']

{'precision': 0.9934328358208955,
 'recall': 0.9952153110047847,
 'f1-score': 0.9943232745742456,
 'support': 1672.0}

In [15]:
cr['3']

{'precision': 0.9985804317983277,
 'recall': 0.9979888372673131,
 'f1-score': 0.9982845468864526,
 'support': 64142.0}

In [16]:
cr['4'] # little support

{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 8.0}

In [17]:
cr['5'] 

{'precision': 0.9824029126213593,
 'recall': 0.9988542217521593,
 'f1-score': 0.9905602657110393,
 'support': 11346.0}

In [18]:
cr['6'] 

{'precision': 0.9771689497716894,
 'recall': 0.9224137931034483,
 'f1-score': 0.9490022172949002,
 'support': 464.0}

## Ensemble model

### Model 1

In [21]:
model_1 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_1.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split1.pth'))
model_1.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [23]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_1(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

In [28]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [29]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99897572, 0.32160804, 0.98863636, 0.99767703, 0.        ,
       0.9983254 , 0.85560345])

### Model 2

In [34]:
model_2 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_2.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split2.pth'))
model_2.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [35]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_2(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

In [36]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [37]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99917222, 0.27889447, 0.99342105, 0.99773939, 0.        ,
       0.99779658, 0.86853448])

### Model 3

In [38]:
model_3 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
model_3.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
model_3.eval()

MLP(
  (fc1): Linear(in_features=49, out_features=64, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=7, bias=True)
)

In [39]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = model_3(inputs)
    
    # get metrics
    y_preds += outputs.argmax(axis=1).tolist()
    y_trues += labels.tolist()

In [40]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [41]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99900274, 0.56532663, 0.99521531, 0.99767703, 0.        ,
       0.99753217, 0.86637931])

## Ensemble

In [47]:
def Emodel(inputs):
  model_1 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
  model_1.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
  model_1.eval()

  model_2 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
  model_2.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
  model_2.eval()

  model_3 = MLP(num_features=49, hidden1_size=64, hidden2_size=64, num_classes=7)
  model_3.load_state_dict(torch.load('../trained_models/ensemble_model/mlp_model_split3.pth'))
  model_3.eval()

  with torch.no_grad():
    y1 = torch.argmax(model_1(inputs), dim=1)
    y2 = torch.argmax(model_2(inputs), dim=1)
    y3 = torch.argmax(model_3(inputs), dim=1)

  stacked_tensors = torch.stack((y1, y2, y3))
  preds, _ = torch.mode(stacked_tensors, dim=0)
  
  return preds
    

In [49]:
# Check preformace 
y_preds = []
y_trues = []

with torch.no_grad():
  for inputs, labels in test_loader:
    outputs = Emodel(inputs)
    
    # get metrics
    y_preds += outputs.tolist()
    y_trues += labels.tolist()

In [50]:
val_recall_per_class = recall_score(y_trues, y_preds, average=None) 

In [51]:
val_recall_per_class # Class 1 & 4 had little support

array([0.99900274, 0.56532663, 0.99521531, 0.99767703, 0.        ,
       0.99753217, 0.86637931])