In [None]:
from google.colab import drive
drive.mount('/content/drive')
DATA_DIR='/content/drive/MyDrive/mesh'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import drive
import os
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

#https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
from sklearn.svm import LinearSVC


NUM_CLASSES=195
BATCH_SIZE=32

MeSH Dataset Class...

In [None]:
class MESHDataset(Dataset):
    def __init__(self,numpy_file,label_file):
     
      try:
        self.data= np.load(numpy_file)
        self.labels = np.load(label_file)
      except Exception as err:
        raise Exception(f'ERROR OPENING FILES: {numpy_file} | {label_file}. See Error below. \n {err}')  
       
    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self, idx):
        #Get the element with `idx`
        #Output an 89*89 matrix
        return np.expand_dims(self.data[idx], axis=0), self.labels[idx]

    def get(self):
        return self.data.reshape(self.data.shape[0],-1),self.labels    
      

In [None]:
X_train, y_train = MESHDataset(os.path.join(DATA_DIR,'output/train.npy'),os.path.join(DATA_DIR,'output/train_labels.npy')).get()
X_valid, y_valid = MESHDataset(os.path.join(DATA_DIR,'output/dev.npy'),os.path.join(DATA_DIR,'output/dev_labels.npy')).get()
X_test, y_test = MESHDataset(os.path.join(DATA_DIR,'output/test.npy'),os.path.join(DATA_DIR,'output/test_labels.npy')).get()

X_train,y_train = np.concatenate((X_train,X_valid),axis=0),np.concatenate((y_train,y_valid),axis=0)

In [None]:
print(X_train.shape,y_train.shape)

(46469, 7921) (46469,)


LinearSVC (faster implementation of SVM)...

In [None]:
clf = make_pipeline(LinearSVC(random_state=0, tol=1e-5,max_iter=10_000))
clf.fit(X_train, y_train)

Pipeline(steps=[('linearsvc',
                 LinearSVC(max_iter=10000, random_state=0, tol=1e-05))])

Without StandardScaler...

In [None]:
clf.score(X_test,y_test)

0.6642995480955455

In [None]:
y_pred=clf.predict(X_test)

In [None]:
y_pred

array([134, 129, 114, ...,  62, 129, 148])

In [None]:
print(f'Accuracy with 195 classes:{accuracy_score(y_test.tolist(), y_pred.tolist())}')
print("Weighted F1-Score with 195 classes: {}".format(f1_score(y_test.tolist(), y_pred.tolist(), average='weighted')))

Accuracy with 195 classes:0.6642995480955455
Weighted F1-Score with 195 classes: 0.6126346908099434


# Working with 5 classes

In [None]:
X_train5, y_train5 = MESHDataset(os.path.join(DATA_DIR,'output/train.npy'),os.path.join(DATA_DIR,'output/grouped_train_labels.npy')).get()
X_valid5, y_valid5 = MESHDataset(os.path.join(DATA_DIR,'output/dev.npy'),os.path.join(DATA_DIR,'output/grouped_dev_labels.npy')).get()
X_test5, y_test5 = MESHDataset(os.path.join(DATA_DIR,'output/test.npy'),os.path.join(DATA_DIR,'output/grouped_test_labels.npy')).get()

X_train5,y_train5 = np.concatenate((X_train5,X_valid5),axis=0),np.concatenate((y_train5,y_valid5),axis=0)

In [None]:
np.unique(y_test5)

array([0, 1, 2, 3, 4])

In [None]:
clf5 = make_pipeline(LinearSVC(random_state=0, tol=1e-5,max_iter=10_000))
clf5.fit(X_train5, y_train5)

Pipeline(steps=[('linearsvc',
                 LinearSVC(max_iter=10000, random_state=0, tol=1e-05))])

Score...

In [None]:
y_pred5=clf5.predict(X_test5)

In [None]:
y_pred5

array([3, 0, 3, ..., 0, 0, 0])

In [None]:

print(f'Accuracy with 5 classes:{accuracy_score(y_test5.tolist(),y_pred5.tolist())}')
print("Weighted F1-Score with 5 classes: {}".format(f1_score(y_test5.tolist(), y_pred5.tolist(), average='weighted')))

Accuracy with 5 classes:0.7873897137938455
Weighted F1-Score with 5 classes: 0.7862985198931911


In [None]:
clf5.score(X_test5,y_test5)

0.7873897137938455