# **Importing and Extracting the dataset**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report
from keras.applications import InceptionResNetV2, VGG19, Xception

In [2]:
train_path = r'aptos_augmented_images_resized/train'
test_path = r'aptos_augmented_images_resized/test'

In [3]:
training_data = tf.keras.utils.image_dataset_from_directory(train_path,validation_split=0.1,interpolation='area',image_size=(128,128),batch_size=8,subset='training',seed=42,color_mode='rgb')
validationData = tf.keras.utils.image_dataset_from_directory(train_path,validation_split=0.1,interpolation='area',image_size=(128,128),batch_size=8,subset='validation',seed=42,color_mode='rgb')
testing_data = tf.keras.utils.image_dataset_from_directory(test_path,interpolation='area',image_size=(128,128),shuffle=False,batch_size=16)

Found 8000 files belonging to 5 classes.
Using 7200 files for training.
Found 8000 files belonging to 5 classes.
Using 800 files for validation.
Found 2000 files belonging to 5 classes.


#Defining 3 Models


In [4]:
inceptionResnet = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
xception = VGG19(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
vgg = Xception(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

#AdaBoost Implementtion

###Compute Error Rate, Alpha and w

In [5]:
# Compute error rate, alpha and w
def compute_error(y, y_pred, w_i):
    '''
    Calculate the error rate of a weak classifier m. Arguments:
    y: actual target value
    y_pred: predicted value by weak classifier
    w_i: individual weights for each observation
    
    Note that all arrays should be the same length
    '''
    return (sum(w_i * (np.not_equal(y, y_pred)).astype(int)))/sum(w_i)

def compute_alpha(error):
    '''
    Calculate the weight of a weak classifier m in the majority vote of the final classifier. This is called
    alpha in chapter 10.1 of The Elements of Statistical Learning. Arguments:
    error: error rate from weak classifier m
    '''
    return np.log((1 - error) / error)

def update_weights(w_i, alpha, y, y_pred):
    ''' 
    Update individual weights w_i after a boosting iteration. Arguments:
    w_i: individual weights for each observation
    y: actual target value
    y_pred: predicted value by weak classifier  
    alpha: weight of weak classifier used to estimate y_pred
    '''  
    return w_i * np.exp(alpha * (np.not_equal(y, y_pred)).astype(int))

In [6]:
def print_history(model_history):
  # list all data in history
  print(model_history.history.keys())
  # summarize history for accuracy
  plt.plot(model_history.history['accuracy'])
  plt.plot(model_history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'validation'], loc='upper left')
  plt.show()

In [13]:
# Define AdaBoost class
class AdaBoost:
    
    def __init__(self):
        self.alphas = []
        self.G_M = []
        self.M = None
        self.training_errors = []
        self.prediction_errors = []

    def fit(self, M = 3):
        '''
        Fit model. Arguments:
        X: independent variables - array-like matrix
        y: target variable - array-like vector
        M: number of boosting rounds. Default is 100 - integer
        '''
        
        # Clear before calling
        self.alphas = [] 
        self.training_errors = []
        self.M = M

        test_labels = []
        for i in range(0,5):
            for j in range(0,400):
                test_labels.append(i)
        y = np.array(test_labels,dtype='int8')


        # Iterate over M weak classifiers
        for m in range(0, M):
            
            # Set weights for current boosting iteration
            if m == 0:
                w_i = np.ones(len(y)) * 1 / len(y)  # At m = 0, weights are all the same and equal to 1 / N
            else:
                # (d) Update w_i
                w_i = update_weights(w_i, alpha_m, y, y_pred)
            
            training_data = tf.keras.utils.image_dataset_from_directory(train_path,validation_split=0.1,interpolation='area',image_size=(128,128),batch_size=8,subset='training',seed=42,color_mode='rgb',sample_weight = w_i)

            # (a) Fit weak classifier and predict labels
            if (m % 3) == 0:
                  new_model = inceptionResnet
            elif (m % 3) == 1:
                  new_model = xception
            else:
                  new_model = vgg

            # Creating rescaling layer add adding dense layers at the end of pretrained model to match the no. of classes 
            G_m = tf.keras.Sequential()
            G_m.add(tf.keras.layers.Rescaling(scale=1./255))
            G_m.add(new_model)
            G_m.add(tf.keras.layers.Flatten())
            G_m.add(tf.keras.layers.Dense(500, activation='relu'))
            G_m.add(tf.keras.layers.Dense(100, activation='relu'))
            G_m.add(tf.keras.layers.Dense(5, activation='softmax'))
              
            G_m.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'],run_eagerly=True)
                  
            model_history = G_m.fit(training_data)
            print_history(model_history)
            pred = G_m.predict(training_data)
            y_pred = []
            for _ in pred:
              y_pred.append(np.argmax(_))

            y_pred = np.array(y_pred)

            
            self.G_M.append(G_m) # Save to list of weak classifiers

            # (b) Compute error
            error_m = compute_error(y, y_pred, w_i)
            self.training_errors.append(error_m)

            # (c) Compute alpha
            alpha_m = compute_alpha(error_m)
            self.alphas.append(alpha_m)

        assert len(self.G_M) == len(self.alphas)

    
    def predict(self, X):
        '''
        Predict using fitted model. Arguments:
        X: independent variables - array-like
        '''

        # Initialise dataframe with weak predictions for each observation
        weak_preds = pd.DataFrame(index = range(len(X)), columns = range(self.M)) 

        # Predict class label for each weak classifier, weighted by alpha_m
        for m in range(self.M):
            y_pred_m = self.G_M[m].predict(X) * self.alphas[m]
            weak_preds.iloc[:,m] = y_pred_m

        # Calculate final predictions
        y_pred = (1 * np.sign(weak_preds.T.sum())).astype(int)

        return y_pred

In [None]:
ab = AdaBoost()

train_labels = np.array([])
for images, labels in training_data:
    train_labels = np.concatenate((train_labels,labels.numpy()),axis=0)

train_labels.astype(int)

# len(train_data)
# # len(train_labels)

ab.fit(M = 3)

# Predict on test set
# y_pred = ab.predict(validationData)