# A Demo for C-Net and D-Model
Before you run the Jupyter Notebook, please ensure to add shortcuts for these folders in MyDrive:
* [model_mesh](https://drive.google.com/drive/u/0/folders/1BiORybaqPGFMQW5dG9YM97lQSH5l4E2a)
* [mesh](https://drive.google.com/drive/u/0/folders/1rwYZBcR_--kweIByGDeSh2IEJiBN-A5i)

In [1]:
# @title
from google.colab import drive
drive.mount('/content/drive')
#root folder with the cloned github repo
DATA_DIR='/content/drive/MyDrive/mesh'
DATA_DIR1='/content/drive/MyDrive/BIR_Workshop/model_mesh'
from google.colab import drive
import os
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, f1_score, accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
import seaborn as sns
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import torchtext
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.optim as optim
import torch.nn.functional as F


device = torch.device('cuda' if True and torch.cuda.is_available() else 'cpu')

#https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
from sklearn.svm import LinearSVC


NUM_CLASSES=195
BATCH_SIZE=32

#change to working dir where the repo was cloned
os.chdir(DATA_DIR)
os.listdir()

#load labels
labels = np.load('output/grouped_train_labels.npy')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Choose and run the model

In [3]:
# @title
import ipywidgets as widgets
from IPython.display import display

dropdown = widgets.Dropdown(
    options=['C-Net', 'D-Model'],
    value='D-Model',
    description='Model:',
    disabled=False,
)

display(dropdown)

Dropdown(description='Model:', index=1, options=('C-Net', 'D-Model'), value='D-Model')

In [7]:
# @title
#Choosing the test model
test_model = dropdown.value
print(test_model)
#Defining dataset
class MESHDataset(Dataset):
    def __init__(self,numpy_file,label_file):
      try:
        self.data= np.load(numpy_file)
        self.labels = np.load(label_file)
      except Exception as err:
        raise Exception(f'ERROR OPENING FILES: {numpy_file} | {label_file}. See Error below. \n {err}')

    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self, idx):
        self.test_model = test_model
        #Get the element with `idx`
        #Output an 89*89 matrix
        if (self.test_model=="C-Net"):
          return np.expand_dims(self.data[idx], axis=0), self.labels[idx]
          print(test_model)
        if (self.test_model=="D-Model"):
          return self.data[idx].flatten(), self.labels[idx]
          print(test_model)
test_dataset = MESHDataset(os.path.join(DATA_DIR,'output/test.npy'),os.path.join(DATA_DIR,'output/test_labels.npy'))
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

#Dense Model
class BaselineModel(nn.Module):

    def __init__(self, matrix_size=89):
        super(BaselineModel, self).__init__()
        self.linear1 = nn.Linear(matrix_size**2, (matrix_size**2)//2)
        self.linear2 = nn.Linear((matrix_size**2)//2, matrix_size**2//4)
        self.linear3 = nn.Linear((matrix_size**2)//4, NUM_CLASSES)

    def forward(self, x):

        x = self.linear1(x)
        x = F.relu(x)

        x = self.linear2(x)
        x = F.relu(x)

        x = self.linear3(x)
        return x

#Convolutional Neural Network
class InceptionModel(nn.Module):

    def __init__(self, matrix_size=89):
      #Idea
      """
      ConvNet -> ConvNet -> Flatten ->  Dense => Class
      """
      super(InceptionModel, self).__init__()
      self.layer1 = nn.Sequential(
          nn.Conv2d(1, 128, kernel_size=3, stride=1),
          nn.BatchNorm2d(128),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2),
          nn.Conv2d(128,64, kernel_size=3, stride=1),
          nn.BatchNorm2d(64),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2),
          nn.Conv2d(64,32, kernel_size=3, stride=1),
          nn.BatchNorm2d(32),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2)
          )
      self.layer2 = nn.Sequential(
          nn.Conv2d(32, 16, kernel_size=3, stride=1),
          nn.BatchNorm2d(16),
          nn.ReLU(),
          nn.MaxPool2d(kernel_size=2, stride=2)
        )

      self.drop_out = nn.Dropout()
      self.fc = nn.Sequential(
          #nn.Linear(12800, 5000),
          #nn.ReLU(),
          #nn.Linear(5000, 1000),
          #nn.ReLU(),
          nn.Linear(2592,1024),
          nn.ReLU(),
          nn.Linear(1024,500),
          nn.ReLU(),
          nn.Linear(500,NUM_CLASSES)
      )
    def forward(self, x):

        #Expecting `x` to be an 89*89 matrix.
        #print(f'X original shape: {x.shape}')
        x=self.layer1(x)
        #x = self.layer2(x)
        x = x.reshape(x.size(0), -1)
        #print(f'Flattened X has shape: {x.shape}')
        x=self.drop_out(x)
        x=self.fc(x)
        #out = F.log_softmax(x) #Don't use softmax here since we use torch.nn.CrossEntropyLoss.
        return x

#Loading model
if test_model=="D-Model":
  PATH=os.path.join(DATA_DIR1,'best_model_1_old')
  model = BaselineModel().to(device)
if test_model=="C-Net":
  PATH=os.path.join(DATA_DIR,'output/best_model_cnn_best_128')
  model = InceptionModel().to(device)
model.load_state_dict(torch.load(PATH, map_location='cpu'))
print(model)

D-Model
BaselineModel(
  (linear1): Linear(in_features=7921, out_features=3960, bias=True)
  (linear2): Linear(in_features=3960, out_features=1980, bias=True)
  (linear3): Linear(in_features=1980, out_features=195, bias=True)
)


Getting Accuracy and F1-Score

In [8]:
# @title
from sklearn.metrics import accuracy_score
def compute_accuracy(pred, target):
  return target.detach().numpy(), pred.argmax(-1).detach().numpy()
from sklearn.metrics import f1_score
model.eval()

labels, preds = [], []
for test_features, test_label in test_dataloader:
  # print(test_features.size())
  test_features, test_label = test_features.to(device), test_label.to(device)
  test_pred = model(test_features.float())
  true, predictions = compute_accuracy(test_pred.cpu(),test_label.cpu())
  labels += true.tolist()
  preds += predictions.tolist()
print(f'Best Accuracy with 195 classes:{accuracy_score(labels, preds)}')
print("Best Weighted F1-Score with 195 classes: {}".format(f1_score(labels, preds, average='weighted')))

Best Accuracy with 195 classes:0.7077684527652249
Best Weighted F1-Score with 195 classes: 0.6689272084790814


Getting the recall for the classification of the most common relation types

In [9]:
# @title
#Getting the confusion matrix
confusion= confusion_matrix(labels, preds)
#Getting recall for all the classes
accurate = []
sum_list = []
for i in range(len(confusion)):
  accurate.append(confusion[i][i]/np.sum(confusion[i]))
  sum_list.append(np.sum(confusion[i]))
class_names = pd.read_csv("https://raw.githubusercontent.com/SisonkeBiotik-Africa/MeSH2Matrix/main/output/label_encoded.csv")
class_names = class_names.iloc[list(set(labels + preds))]
class_names["precision"] = accurate
class_names["sum"] = sum_list
class_names = class_names.sort_values(by=["sum"], ascending=False)
class_names.head(20)



  accurate.append(confusion[i][i]/np.sum(confusion[i]))


Unnamed: 0,label,encoding,precision,sum
129,P279,130,0.818119,1457
150,P530,151,0.994749,1333
41,P2868,42,0.829563,663
62,P31,63,0.453202,609
19,P2175,20,0.981707,492
190,P527,191,0.547131,488
39,P681,40,0.869955,446
175,P2176,176,0.982051,390
114,P1995,115,0.907514,346
144,P47,145,0.0,329
