## Task A: Binary Classification

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! pip install medmnist

#### 0. Packages

In [1]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
import medmnist
from medmnist import INFO,PneumoniaMNIST
from medmnist.evaluator import Evaluator

In [2]:
data_flag = 'pneumoniamnist'

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])
print("Task: {},\nn_channels: {},\nn_classes: {}".format(task,n_channels,n_classes))

Task: binary-class,
n_channels: 1,
n_classes: 2


In [9]:
# preprocessing
data_transform = transforms.Compose([
    #transforms.Resize(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=(-20,20)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

In [10]:
root ='/content/drive/MyDrive/Datasets/'
train_dataset = PneumoniaMNIST(split='train', transform=data_transform, root=root)
val_dataset = PneumoniaMNIST(split='val', transform=data_transform, root=root)
test_dataset = PneumoniaMNIST(split='test', transform=data_transform, root=root)

In [None]:
from torchvision.models import resnet50

feature_extractor_1 = resnet50(pretrained=True)



In [11]:
from drive.MyDrive.A.classifier import get_data

In [12]:
X_train,Y_train = get_data(train_dataset)
X_test,Y_test = get_data(test_dataset)
X_val,Y_val = get_data(val_dataset)

4708it [00:03, 1325.16it/s]
624it [00:00, 1190.31it/s]
524it [00:00, 1894.55it/s]


In [13]:
X_train[0].shape

torch.Size([3, 28, 28])

In [11]:
from torchvision.models import resnet18

feature_extractor_0 = resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 100MB/s]


In [None]:
from torchvision.models import resnet50

feature_extractor_1 = resnet50(pretrained=True)

In [8]:
from torchvision.models import resnet152

feature_extractor_2 = resnet152(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:05<00:00, 47.6MB/s]


In [None]:
from torchvision.models import inception_v3

feature_extractor_3 = inception_v3(pretrained=True)
feature_extractor_3

In [13]:
from drive.MyDrive.A.classifier import Train_classifier

In [15]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [14]:
import pandas as pd
import numpy as np
import pickle as pkl
from tqdm import tqdm
import torch
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

# ==================== Pretrained Models + SVM/KNN/DecisionTree Classifier ====================
def Train_classifier(feature_extractor,clf,device,X_train,Y_train,X_test,Y_test):
    # Use Pretrained models to Extract features
    feature_extractor = feature_extractor.to(device)
    X_train = X_train.to(device)
    features = feature_extractor(X_train)

    # Define Classifier
    if clf == "SVM":
        classifier = svm.LinearSVC()
    elif clf == "KNN":
        classifier = KNeighborsClassifier(n_neighbors=3)
    elif clf == "Tree":
        classifier = DecisionTreeClassifier()

    features = features.detach().cpu().numpy()
    Y_train = Y_train.detach().cpu().numpy()

    # Training
    print("----------------- Training {} Classifier -----------------".format(clf))
    classifier.fit(features,Y_train)

    # Testing
    X_test = X_test.to(device)
    feature_test = feature_extractor(X_test)
    feature_test = feature_test.detach().cpu().numpy()
    result = classifier.predict(feature_test)
    Y_test = Y_test.detach().cpu().numpy()
    # Accuracy,Precision,Recall,F1_score
    confusion_m = metrics.confusion_matrix(Y_test,result)
    acc = metrics.accuracy_score(Y_test,result)
    print("Accuracy: ",acc)
    pre = metrics.precision_score(Y_test,result)
    print("Precision: ",pre)
    recall = metrics.recall_score(Y_test,result)
    print("Recall: ",recall)
    f1 = metrics.f1_score(Y_test,result)
    print("F1 score: ",f1)
    auc = metrics.accuracy_score(Y_test,result)
    print("AUC: ",auc)
    return classifier

In [14]:
# Use ResNet18 As feature_extractor
classifier_svm_res18=Train_classifier(feature_extractor_0,"SVM",device,X_train,Y_train,X_test,Y_test)
classifier_knn_res18=Train_classifier(feature_extractor_0,"KNN",device,X_train,Y_train,X_test,Y_test)
classifier_tree_res18=Train_classifier(feature_extractor_0,"Tree",device,X_train,Y_train,X_test,Y_test)

  y = column_or_1d(y, warn=True)


Accuracy:  0.7371794871794872
Precision:  0.7132075471698113
Recall:  0.9692307692307692
F1 score:  0.8217391304347825
AUC:  0.7371794871794872


  return self._fit(X, y)


Accuracy:  0.7307692307692307
Precision:  0.7126436781609196
Recall:  0.9538461538461539
F1 score:  0.8157894736842106
AUC:  0.7307692307692307
Accuracy:  0.6891025641025641
Precision:  0.723744292237443
Recall:  0.8128205128205128
F1 score:  0.7657004830917876
AUC:  0.6891025641025641


In [44]:
# Use ResNet50 As feature_extractor
classifier_svm2=Train_classifier(feature_extractor_1,"SVM",device,X_train,Y_train,X_test,Y_test)

----------------- Training SVM Classifier -----------------


  y = column_or_1d(y, warn=True)


Accuracy:  0.7339743589743589
Precision:  0.7424242424242424
Recall:  0.8794871794871795
F1 score:  0.8051643192488263
AUC:  0.7339743589743589




In [39]:
classifier_knn=Train_classifier(feature_extractor_1,"KNN",device,X_train,Y_train,X_test,Y_test)

----------------- Training KNN Classifier -----------------


  return self._fit(X, y)


Accuracy:  0.6778846153846154
Precision:  0.6753246753246753
Recall:  0.9333333333333333
F1 score:  0.7836383207750269
AUC:  0.6778846153846154


In [40]:
classifier_tree=Train_classifier(feature_extractor_1,"Tree",device,X_train,Y_train,X_test,Y_test)

----------------- Training Tree Classifier -----------------
Accuracy:  0.6137820512820513
Precision:  0.6674157303370787
Recall:  0.7615384615384615
F1 score:  0.711377245508982
AUC:  0.6137820512820513
