# Ensemble Classifier

The goal of this is to take the optimized models from all previous experiments and see if they can be combined into something that produces a higher accuracy (ROC)

In [92]:
import numpy as np

from sklearn.metrics import roc_auc_score

from joblib import load

## Utilities

In [2]:
def process_image_arr_sklearn(images):
    return images.reshape(len(images), -1)

In [3]:
def roc(model, X, y):
    y_score = model.predict_proba(X)[:, 1]
    return roc_auc_score(y, y_score)

## Load Data

In [36]:
breast = np.load("../../data/breastmnist.npz")
pneumonia = np.load("../../data/pneumoniamnist.npz")

In [39]:
breast_trainX = breast["train_images"]
breast_trainY = breast["train_labels"].flatten()

breast_valX = breast["val_images"]
breast_valY = breast["val_labels"].flatten()

breast_testX = breast["test_images"]
breast_testY = breast["test_labels"].flatten()

In [37]:
breast_trainX_sk = process_image_arr_sklearn(breast["train_images"])
breast_trainY_sk = breast["train_labels"].flatten()

breast_valX_sk = process_image_arr_sklearn(breast["val_images"])
breast_valY_sk = breast["val_labels"].flatten()

breast_testX_sk = process_image_arr_sklearn(breast["test_images"])
breast_testY_sk = breast["test_labels"].flatten()

In [40]:
pneumonia_trainX = pneumonia["train_images"]
pneumonia_trainY = pneumonia["train_labels"].flatten()

pneumonia_valX = pneumonia["val_images"]
pneumonia_valY = pneumonia["val_labels"].flatten()

pneumonia_testX = pneumonia["test_images"]
pneumonia_testY = pneumonia["test_labels"].flatten()

In [38]:
pneumonia_trainX_sk = process_image_arr_sklearn(pneumonia["train_images"])
pneumonia_trainY_sk = pneumonia["train_labels"].flatten()

pneumonia_valX_sk = process_image_arr_sklearn(pneumonia["val_images"])
pneumonia_valY_sk = pneumonia["val_labels"].flatten()

pneumonia_testX_sk = process_image_arr_sklearn(pneumonia["test_images"])
pneumonia_testY_sk = pneumonia["test_labels"].flatten()

## Load Models

Load the `sklearn` models

In [8]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [10]:
sklearn_models = load("Other Models/sklearn_models.joblib")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Load the Resnets

In [45]:
import torch
from torchvision import transforms

In [46]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

In [19]:
resnet_pneumonia = torch.load("Resnet/resnet_pneumonia.pt", map_location=torch.device('cpu'))
resnet_breast = torch.load("Resnet/resnet_breast.pt", map_location=torch.device('cpu'))

In [23]:
resnet_breast.eval()
resnet_pneumonia.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [53]:
input_tensor = data_transform(breast_trainX[0])
input_batch = input_tensor.unsqueeze(0)

with torch.no_grad():
    output = torch.sigmoid(resnet_breast(input_batch))
print(output)

tensor([[0.9863]])


## Combine Models Together

### Breast Data

In [77]:
sklearn_models

{'decision_tree': {'breast': DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=3),
  'pneumonia': DecisionTreeClassifier(min_samples_leaf=10, min_samples_split=3)},
 'knn': {'breast': KNeighborsClassifier(metric='correlation', n_neighbors=1, weights='distance'),
  'pneumonia': KNeighborsClassifier(metric='correlation', n_neighbors=1, weights='distance')},
 'logistic': {'breast': LogisticRegression(C=100000, penalty='l1', solver='liblinear'),
  'pneumonia': LogisticRegression(C=100000, penalty='l1', solver='liblinear')},
 'svc': {'breast': SVC(C=10, probability=True),
  'pneumonia': SVC(C=10, probability=True)}}

In [90]:
probs = []
for idx in range(len(breast_testX)):
    prob = []
    for model in sklearn_models:
        prob.append(sklearn_models[model]["breast"].predict_proba(np.array([breast_testX_sk[idx]]))[:, 1][0])
        
    input_tensor = data_transform(breast_trainX[0]).unsqueeze(0)
    
    with torch.no_grad():
        output = torch.sigmoid(resnet_breast(input_tensor))
    prob.append(float(output[0][0]))

    probs.append(sum(prob)/len(prob))

In [93]:
roc_auc_score(breast_testY_sk, probs)

0.8995405179615706

### Pneumonia Data

In [96]:
probs = []
for idx in range(len(pneumonia_testX)):
    prob = []
    for model in sklearn_models:
        prob.append(sklearn_models[model]["pneumonia"].predict_proba(np.array([pneumonia_testX_sk[idx]]))[:, 1][0])
        
    input_tensor = data_transform(pneumonia_trainX[0]).unsqueeze(0)
    
    with torch.no_grad():
        output = torch.sigmoid(resnet_pneumonia(input_tensor))
    prob.append(float(output[0][0]))

    probs.append(sum(prob)/len(prob))

In [97]:
roc_auc_score(pneumonia_testY_sk, probs)

0.9375410913872453