# Import Libraries

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, cv2,glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,fbeta_score,confusion_matrix,plot_confusion_matrix
from sklearn.dummy import DummyClassifier

# Baseline Model

### Load data

In [24]:
data = pd.read_csv('../data/data_info.csv')
data

Unnamed: 0,cbb,cbsd,cgm,cmd,healthy,total
0,466,1443,773,2658,316,5656


From our dataset the probability of getting CMD(3) is **46.99%**,that of CBB(0) **8.24%**, CBSD(1) **25.51%**, CGM(2) **13.67%** and a Healthy(4) one **5.59%**. Our baseline model is a probability function where its prediction is based on a label with the highest probability rate which is CMD. However, due to the imbalanced nature of our dataset accuracy can be a misleading metric in our modeling.

In the following estimation of the F_beta-score we assigned the "average" parameter to "macro" since our dataset is imbalanced. This will put more emphasis on the false negative or class II error. See below for the explanation of the values of the parameter:

**'binary':**
Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary.

**'micro':**
Calculate metrics globally by counting the total true positives, false negatives and false positives.

**'macro':**
Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.

**'weighted':**
Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall.

**'samples':**
Calculate metrics for each instance, and find their average (only meaningful for multi-label classification where this differs from accuracy_score).

In the case of weighted average the performance metrics are weighted accordingly:

$score_{weighted\text{-}avg} = 0.0824 \cdot score_{class\text{ }0} + 0.0.2551 \cdot score_{class\text{ }1} + 0.1367 \cdot score_{class\text{ }2} + ...$

Which will give us a higher F2 score due the class imbalance.

However, macro avg is not weighted and therefore:

$score_{macro\text{-}avg} = 0.5 \cdot score_{class\text{ }0} + 0.5 \cdot score_{class\text{ }1} + 0.5 \cdot score_{class\text{ }2} + ...$

### Create a dictionary of image labels and their relative path

In [14]:
path = '../data/train/'

In [15]:
images_dict = {
                0: [i for i in glob.glob(os.path.join(path,'cbb/*.jpg'))],
                1: [i for i in glob.glob(os.path.join(path,'cbsd/*.jpg'))],
                2: [i for i in glob.glob(os.path.join(path,'cgm/*.jpg'))],
                3: [i for i in glob.glob(os.path.join(path,'cmd/*.jpg'))],
                4: [i for i in glob.glob(os.path.join(path,'healthy/*.jpg'))]
                }

## Preprocess data: create vector from raw image file and resize it

In [16]:
# create a 3D tensor from images
X, y = [], []

for labels, images in images_dict.items():
    for image in images:
        img = cv2.imread(''.join(image))
        resized_img = cv2.resize(img, dsize=(224,224))
        X.append(resized_img)
        y.append(labels)

In [17]:
# create an numpy_array
X=np.array(X)
y=np.array(y)

In [18]:
# rescale X
X = X.astype(np.float32)/ 255.
y = y.astype(np.int32)

## Data segregation into train and test sets

In [None]:
# split into train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In supervised learning, a simple sanity check consists of comparing one's estimator against simple rules of thumb. The target will be to beat the classifier that makes predictions using simple rules. DummyClassifier implements several such simple strategies for classification:

- **stratified** generates random predictions by respecting the training set class distribution.

- **most_frequent** always predicts the most frequent label in the training set.

- **prior** always predicts the class that maximizes the class prior (like most_frequent) and predict_proba returns the class prior.

- **uniform** generates predictions uniformly at random.


In [None]:
dummy_clf = DummyClassifier(strategy='most_frequent',random_state=42)
dummy_clf.fit(X_train,y_train)

DummyClassifier(random_state=42, strategy='most_frequent')

In [None]:
y_pred = dummy_clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
beta_score_recall = fbeta_score(y_test, y_pred, average='macro', beta=2)
print(f'baseline accuracy : {accuracy.round(2)}')
print(f'F_beta_score_recall: {beta_score_recall.round(2)}')


baseline accuracy : 0.46
F_beta_score_recall: 0.16


In [None]:
plot_confusion_matrix(dummy_clf,X_test,y_test)
plt.show()

In [None]:
cm = confusion_matrix(y_test,y_pred)
cm

array([[  0,   0,   0, 108,   0],
       [  0,   0,   0, 302,   0],
       [  0,   0,   0, 147,   0],
       [  0,   0,   0, 520,   0],
       [  0,   0,   0,  55,   0]])

### Alternative method to predict the probability of the majority class

In [None]:
y_pred = [3] * len(data.label)
score = accuracy_score(data.label, y_pred)
beta_score_recall = fbeta_score(data.label , y_pred, average='macro', beta=2)
print(f'baseline accuracy : {score.round(2)}')
print(f'F_beta_score_recall: {beta_score_recall.round(2)}')

baseline accuracy : 0.47
F_beta_score_recall: 0.16


## Binary classification

In a binary classification task, the terms "positive" and "negative" refer to the classifier's prediction, and the terms "true" and "false" refer to whether that prediction corresponds to the external judgment (sometimes known as the "observation").

In this context, we can define the notions of precision, recall and F-measure:

$\text{precision} = \frac{tp}{tp + fp}$,

$\text{recall} = \frac{tp}{tp + fn}$,

$F_\beta = (1 + \beta^2) \frac{\text{precision} \times \text{recall}}{\beta^2 \text{precision} + \text{recall}}$

##### For beta ($\beta$) = 2 and **stratified** strategy for the dummy classifier

In [None]:
dummy_clf = DummyClassifier(strategy='stratified',random_state=42)
dummy_clf.fit(X_train,y_train)

DummyClassifier(random_state=42, strategy='stratified')

In [None]:
y_pred = dummy_clf.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
beta_score_recall = fbeta_score(y_test, y_pred, average='macro', beta=2)
print(f'baseline accuracy : {accuracy.round(2)}')
print(f'F_beta_score_recall: {beta_score_recall.round(2)}')


baseline accuracy : 0.3
F_beta_score_recall: 0.19


In [None]:
plot_confusion_matrix(dummy_clf,X_test,y_test)
plt.show()

In [None]:
cm = confusion_matrix(y_test,y_pred)
cm

array([[  5,  25,  19,  51,   8],
       [ 28,  65,  35, 153,  21],
       [ 11,  37,  17,  74,   8],
       [ 43, 123,  73, 248,  33],
       [  4,  16,   9,  21,   5]])

In [None]:
cm[:4,:4]

array([[  5,  25,  19,  51],
       [ 28,  65,  35, 153],
       [ 11,  37,  17,  74],
       [ 43, 123,  73, 248]])

In [None]:
print(f' True Negative for the healthy class = {np.sum(cm[:4,:4])}')

 True Negative for the healthy class = 1007


In [None]:
cm[:4,4]

array([ 8, 21,  8, 33])

In [None]:
print(f' False positive for the healthy class = {np.sum(cm[:4,4])}')

 False positive for the healthy class = 70


In [None]:
cm[4,:4]

array([ 4, 16,  9, 21])

In [None]:
print(f' False negative for the healthy class = {np.sum(cm[4,:4])}')

 False negative for the healthy class = 50


In [None]:
print(f' True positive for the healthy class = {np.sum(cm[4,4])}')

 True positive for the healthy class = 5


In [None]:
precision = 5/(5+70)
print(f'Classification Precision: {round(precision,3)}')

Classification Precision: 0.067


In [None]:
recall = 5/(5+50)
print(f'Classification Sensitivity: {round(recall,3)}')


Classification Sensitivity: 0.091


In [None]:
F2 = (1+pow(2,2))*(precision*recall/(pow(2,2)*precision+recall))
print(f'F2-score for the healthy class: {round(F2,3)}')

F2-score for the healthy class: 0.085
