In [1]:
#Q1 SVM
# importing libraries ------------------------------------------------------------
import matplotlib
import numpy as np

import matplotlib.pyplot as plt
# %matplotlib inline


# for test dataset - not using any library functions

from sklearn.datasets import make_blobs, make_circles, make_moons
from sklearn.preprocessing import StandardScaler



# imports done-----------------------------------------------------



# (DUAL) objective function

def objective_function(alphas, target, kernel, X_train):
    """Returns the SVM objective function based in the input model defined by:
    `alphas`: vector of Lagrange multipliers
    `target`: vector of class labels (-1 or 1) for training data
    `kernel`: kernel function
    `X_train`: training data for model."""

    return np.sum(alphas) - 0.5 * np.sum(
        (target[:, None] * target[None, :]) * kernel(X_train, X_train) * (alphas[:, None] * alphas[None, :]))


# Decision function for test data

def decision_function(alphas, target, kernel, X_train, x_test, b):
    """Applies the SVM decision function to the input feature vectors in `x_test`."""

    result = (alphas * target) @ kernel(X_train, x_test) - b
    return result




#helper function for plotting the decision boundary
# from sklearn.svm._libsvm import decision_function


def plot_decision_boundary(model, ax, resolution=100, colors=('b', 'k', 'r'), levels=(-1, 0, 1)):
    """Plots the model's decision boundary on the input axes object.
    Range of decision boundary grid is determined by the training data.
    Returns decision boundary grid and axes object (`grid`, `ax`)."""

    # Generate coordinate grid of shape [resolution x resolution]
    # and evaluate the model over the entire space
    xrange = np.linspace(model.X[:, 0].min(), model.X[:, 0].max(), resolution)
    yrange = np.linspace(model.X[:, 1].min(), model.X[:, 1].max(), resolution)
    grid = [[decision_function(model.alphas, model.y,
                               model.kernel, model.X,
                               np.array([xr, yr]), model.b) for xr in xrange] for yr in yrange]
    grid = np.array(grid).reshape(len(xrange), len(yrange))

    # Plot decision contours using grid and
    # make a scatter plot of training data
    ax.contour(xrange, yrange, grid, levels=levels, linewidths=(1, 1, 1),
               linestyles=('--', '-', '--'), colors=colors)
    ax.scatter(model.X[:, 0], model.X[:, 1],
               c=model.y, cmap=plt.cm.viridis, lw=0, alpha=0.25)

    # Plot support vectors (non-zero alphas)
    # as circled points (linewidth > 0)
    mask = np.round(model.alphas, decimals=2) != 0.0
    ax.scatter(model.X[mask, 0], model.X[mask, 1],
               c=model.y[mask], cmap=plt.cm.viridis, lw=1, edgecolors='k')

    return grid, ax




#defining the kernel functions --------------------------------------------------

# the linearkenal function
def linear_kernel(x, y, hyperparameter):
    """Returns the linear combination of arrays `x` and `y` with
    the optional bias term `b` (set to 0 by default)."""
    b = 0

    return x @ y.T + b  # Note the @ operator for matrix multiplication

#the gaussian kernel function
def gaussian_kernel(x, y, hyperparameter= [1.0] ):
    """Returns the gaussian similarity of arrays `x` and `y` with
    kernel width parameter `sigma` (set to 1 by default)."""

    # sigma = 1.0
    sigma = hyperparameter[0]
    if np.ndim(x) == 1 and np.ndim(y) == 1:
        result = np.exp(- (np.linalg.norm(x - y, 2)) ** 2 / (2 * sigma ** 2))
    elif (np.ndim(x) > 1 and np.ndim(y) == 1) or (np.ndim(x) == 1 and np.ndim(y) > 1):
        result = np.exp(- (np.linalg.norm(x - y, 2, axis=1) ** 2) / (2 * sigma ** 2))
    elif np.ndim(x) > 1 and np.ndim(y) > 1:
        result = np.exp(- (np.linalg.norm(x[:, np.newaxis] - y[np.newaxis, :], 2, axis=2) ** 2) / (2 * sigma ** 2))
    return result


# sigmoide kernal function
def sigmoid_kernel(x, y, hyperparameter= [1.0] ):
    # gamma = 1
    gamma = hyperparameter[0]
    return np.tanh(gamma * x @ y.T + 1)


# polynomial kernel
def polynomial_kernel(x, y,hyperparameter=[3, 1]):
    # , gamma = 1, p = 3  degree, gamma
    p = hyperparameter[0]
    gamma = hyperparameter[1]
    return ( (gamma * (x @ y.T )+ 1) ** p )




#kernals defined ------------------------------------------------------------




#SVM MODEL Class

class SMOModel:
    """Container object for the model used for sequential minimal optimization."""

    def __init__(self, X, y, C, tol, eps,  kernel, alphas, b, errors , hyperparams):
        self.X = X  # training data vector
        self.y = y  # class label vector
        self.C = C  # regularization parameter
        self.tol = tol  # error tolerance for stopping criterion
        self.eps = eps  # tolerance for alpha update
        self.kernel = kernel  # kernel function
        self.alphas = alphas  # lagrange multiplier vector
        self.b = b  # scalar bias term
        self.errors = errors  # error cache
        self._obj = []  # record of objective function value
        self.m = len(self.X)  # store size of training set
        self.hyperparams = hyperparams  # store hyperparameters

        #predict function
        # def predict(self, x):
        #     predicted_label =
        #     return np.sign(self.decision_function(x))



##SMO algorithm implementation
# The code here is based off of the pseudocode provided in Platt's paper. The implementation here is slightly different. I changed the training functions to pass around our model object to make the variable scoping clearer.
#
# The three functions used to train our model are take_step(), examine_example(), and train(). These are structured to work as follows:
#
#     The train() function implements selection of the first α
#
# to optimize via the first choice heuristic and passes this value to examine_example().
# Then examine_example() implements the second choice heuristic to choose the second α
# to optimize, and passes the index of both α
# values to take_step().
# Finally take_step() carries out the meat of the calculations and computes the two new α
# values, a new threshold b
#
#     , and updates the error cache.
#
# The train() function uses a while loop to iterate through the α
# values in a few different ways until no more optimizations can be made, at which point it returns the optimized α vector (embedded in an SMOModel object).


#take_step()
def take_step(i1, i2, model):
    # Skip if chosen alphas are the same
    if i1 == i2:
        return 0, model

    alph1 = model.alphas[i1]
    alph2 = model.alphas[i2]
    y1 = model.y[i1]
    y2 = model.y[i2]
    E1 = model.errors[i1]
    E2 = model.errors[i2]
    s = y1 * y2

    # Compute L & H, the bounds on new possible alpha values
    if (y1 != y2):
        L = max(0, alph2 - alph1)
        H = min(model.C, model.C + alph2 - alph1)
    elif (y1 == y2):
        L = max(0, alph1 + alph2 - model.C)
        H = min(model.C, alph1 + alph2)
    if (L == H):
        return 0, model

    # Compute kernel & 2nd derivative eta
    k11 = model.kernel(model.X[i1], model.X[i1], model.hyperparams)
    k12 = model.kernel(model.X[i1], model.X[i2], model.hyperparams)
    k22 = model.kernel(model.X[i2], model.X[i2], model.hyperparams)
    eta = 2 * k12 - k11 - k22

    # Compute new alpha 2 (a2) if eta is negative
    if (eta < 0):
        a2 = alph2 - y2 * (E1 - E2) / eta
        # Clip a2 based on bounds L & H
        if L < a2 < H:
            a2 = a2
        elif (a2 <= L):
            a2 = L
        elif (a2 >= H):
            a2 = H

    # If eta is non-negative, move new a2 to bound with greater objective function value
    else:
        alphas_adj = model.alphas.copy()
        alphas_adj[i2] = L
        # objective function output with a2 = L
        Lobj = objective_function(alphas_adj, model.y, model.kernel, model.X)
        alphas_adj[i2] = H
        # objective function output with a2 = H
        Hobj = objective_function(alphas_adj, model.y, model.kernel, model.X)
        if Lobj > (Hobj + model.eps):
            a2 = L
        elif Lobj < (Hobj - model.eps):
            a2 = H
        else:
            a2 = alph2

    # Push a2 to 0 or C if very close
    if a2 < 1e-8:
        a2 = 0.0
    elif a2 > (model.C - 1e-8):
        a2 = model.C

    # If examples can't be optimized within epsilon (eps), skip this pair
    if (np.abs(a2 - alph2) < model.eps * (a2 + alph2 + model.eps)):
        return 0, model

    # Calculate new alpha 1 (a1)
    a1 = alph1 + s * (alph2 - a2)

    # Update threshold b to reflect newly calculated alphas
    # Calculate both possible thresholds
    b1 = E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12 + model.b
    b2 = E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22 + model.b

    # Set new threshold based on if a1 or a2 is bound by L and/or H
    if 0 < a1 and a1 < model.C:
        b_new = b1
    elif 0 < a2 and a2 < model.C:
        b_new = b2
    # Average thresholds if both are bound
    else:
        b_new = (b1 + b2) * 0.5

    # Update model object with new alphas & threshold
    model.alphas[i1] = a1
    model.alphas[i2] = a2

    # Update error cache
    # Error cache for optimized alphas is set to 0 if they're unbound
    for index, alph in zip([i1, i2], [a1, a2]):
        if 0.0 < alph < model.C:
            model.errors[index] = 0.0

    # Set non-optimized errors based on equation 12.11 in Platt's book
    non_opt = [n for n in range(model.m) if (n != i1 and n != i2)]
    model.errors[non_opt] = model.errors[non_opt] + \
                            y1 * (a1 - alph1) * model.kernel(model.X[i1], model.X[non_opt], model.hyperparams) + \
                            y2 * (a2 - alph2) * model.kernel(model.X[i2], model.X[non_opt], model.hyperparams) + model.b - b_new

    # Update model threshold
    model.b = b_new

    return 1, model


#--

# examine_example()
def examine_example(i2, model):
    y2 = model.y[i2]
    alph2 = model.alphas[i2]
    E2 = model.errors[i2]
    r2 = E2 * y2

    # Proceed if error is within specified tolerance (tol)
    if ((r2 < -model.tol and alph2 < model.C) or (r2 > model.tol and alph2 > 0)):

        if len(model.alphas[(model.alphas != 0) & (model.alphas != model.C)]) > 1:
            # Use 2nd choice heuristic is choose max difference in error
            if model.errors[i2] > 0:
                i1 = np.argmin(model.errors)
            elif model.errors[i2] <= 0:
                i1 = np.argmax(model.errors)
            step_result, model = take_step(i1, i2, model)
            if step_result:
                return 1, model

        # Loop through non-zero and non-C alphas, starting at a random point
        for i1 in np.roll(np.where((model.alphas != 0) & (model.alphas != model.C))[0],
                          np.random.choice(np.arange(model.m))):
            step_result, model = take_step(i1, i2, model)
            if step_result:
                return 1, model

        # loop through all alphas, starting at a random point
        for i1 in np.roll(np.arange(model.m), np.random.choice(np.arange(model.m))):
            step_result, model = take_step(i1, i2, model)
            if step_result:
                return 1, model

    return 0, model

#--

#train()

def train(model):
    numChanged = 0
    examineAll = 1

    while (numChanged > 0) or (examineAll):
        numChanged = 0
        if examineAll:
            # loop over all training examples
            for i in range(model.alphas.shape[0]):
                examine_result, model = examine_example(i, model)
                numChanged += examine_result
                if examine_result:
                    obj_result = objective_function(model.alphas, model.y, model.kernel, model.X)
                    model._obj.append(obj_result)
        else:
            # loop over examples where alphas are not already at their limits
            for i in np.where((model.alphas != 0) & (model.alphas != model.C))[0]:
                examine_result, model = examine_example(i, model)
                numChanged += examine_result
                if examine_result:
                    obj_result = objective_function(model.alphas, model.y, model.kernel, model.X)
                    model._obj.append(obj_result)
        if examineAll == 1:
            examineAll = 0
        elif numChanged == 0:
            examineAll = 1

    return model





# SVM with grid search for kernel parameters and C
class Grid_Search():
    "We get "

    def __init__(self,X_train, y, name_of_kernal , X_validation , y_validation ):
        self.X_train = X_train
        self.y = y
        self.name_of_kernal = name_of_kernal
        self.X_validation = X_validation
        self.y_validation = y_validation




class SVM():

    """ This class is the SVM clsss. It has the following methods:
    __init__()
    train()
    predict()
    test()"""
    def __init__(self , X_train , y_train , X_validation , y_validation, X_test, y_test, name_of_kernal , positive_class,negative_class, grid_search_on=False):

        self.X_train = X_train
        self.y_train = y_train
        self.X_validation = X_validation
        self.y_validation = y_validation
        self.X_test = X_test
        self.y_test = y_test
        self.name_of_kernal = name_of_kernal

        self.grid_search_on = grid_search_on

        # SVM works on normalized data
        # function to preprocess the scale the data to be centered at the origin with a unit standard deviation, using scikit-learn's StandardScaler objec
        self.scaler = StandardScaler()
        self.X_train = self.scaler.fit_transform(self.X_train , y_train)
        # make y labels to be +1 or -1
        self.y_train[self.y_train == positive_class] = 1
        self.y_train[self.y_train == negative_class] = -1

        #if grid search is on, we need to do the grid search and construct the hyperparameter grid
        if self.grid_search_on:
            self.hyperparameters = self.construct_hyperparameters_grid(self)



    #shared by all
    tolerance = 0.01
    epsilon = 0.01


   #function to construct hyper-parameters grid , for each kernel

    def construct_hyperparameters_grid(self):
        if self.name_of_kernal == linear_kernel:
            hyperparameters_grid = {'C': [0.1, 1, 10, 100, 1000]}
        elif self.name_of_kernal == polynomial_kernel:
            hyperparameters_grid = {'C': [0.1, 1, 10, 100, 1000], 'degree': [2, 3, 4, 5], 'gamma': [0.1, 0.01, 0.001, 0.0001]}
            # making hyperparameters grid as list of tuples
            hyperparameters_grid = [(C, degree, gamma) for C in hyperparameters_grid['C'] for degree in hyperparameters_grid['degree'] for gamma in hyperparameters_grid['gamma']]

        elif self.name_of_kernal == gaussian_kernel:
            hyperparameters_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [0.1, 0.01, 0.001, 0.0001]}
            # making hyperparameters grid as list of tuples
            hyperparameters_grid = [(C, gamma) for C in hyperparameters_grid['C'] for gamma in hyperparameters_grid['gamma']]
        elif self.name_of_kernal == sigmoid_kernel:
            hyperparameters_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [0.1, 0.01, 0.001, 0.0001]}
            # making hyperparameters grid as list of tuples
            hyperparameters_grid = [(C, gamma) for C in hyperparameters_grid['C'] for gamma in hyperparameters_grid['gamma']]

        else:
            raise Exception('Kernel not supported')
        return hyperparameters_grid


    #function to predict the labels of the unknown data
    # def predict(self, opt_model,unknown_data):
    #     # if unknown data is a collection of data, we need to predict for each data point
    #     if type(unknown_data) == np.ndarray:# if
    #         output = []
    #         for unknown in unknown_data:
    #             output.append()
    #
    #
    #         output = np.array(output)
    #
    #
    #     else:  #only single test data point is passed







    def get_optimal_hyperplane(self , kernal_parameters)  :
        # setting initial values
        C = kernal_parameters[0]
        m = len(self.X_train)
        initial_alphas = np.zeros(m)
        initial_b = 0.0
        # set tolerance
        tol = self.tolerance
        # set epsilon
        eps = self.epsilon

        # hyper-parameters
        hyperparam = np.array(kernal_parameters[1:])

        # instantiate the model
        model = SMOModel(self.X_train, self.y_train, C, tol, eps, self.name_of_kernal, initial_alphas, initial_b,
                         np.zeros(m), hyperparam)

        # Initialize error cache
        initial_error = decision_function(model.alphas, model.y, model.kernel,
                                          model.X, model.X, model.b) - model.y
        model.errors = initial_error

        np.random.seed(0)
        output = train(model)
        return output


    #function to get the F1 score on the validation set
    # def get_F1_score(self, model):





    # function to train the classifier given the hyperparameters
    def train(self , kernal_parameters):
        #check if grid search is on
        if self.grid_search_on:     #we will loop over the hyperparameters grid
            for kernal_parameters in self.hyperparameters:
                output_model = self.get_optimal_hyperplane(kernal_parameters)













# SVM implementation over --------------------------------------------------------------------------------------------





# testing --------------------------------------------------------------------------------------------------------------
# toy data generated by scikit-learn's make_blobs.

#testing and debugging purposes functions
def testing_toy_data(C = 1000.0 , tol = 0.01 , eps = 0.01 , kernel = linear_kernel ):

    X_train, y = make_blobs(n_samples=1000, centers=2,
                        n_features=2, random_state=1)

    # scale the data to be centered at the origin with a unit standard deviation, using scikit-learn's StandardScaler object.
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train, y)

    # Finally, we have to change the class labels to be +1 and -1 instead of 0 and 1.

    y[y == 0] = -1

    # Set model parameters and initial values
    # C = 1000.0
    m = len(X_train)
    initial_alphas = np.zeros(m)
    initial_b = 0.0

    # Set tolerances
    tol = 0.01  # error tolerance
    eps = 0.01  # alpha tolerance

    # Instantiate model
    model = SMOModel(X_train, y, C,tol,eps, linear_kernel,
                     initial_alphas, initial_b, np.zeros(m))

    # Initialize error cache
    initial_error = decision_function(model.alphas, model.y, model.kernel,
                                      model.X, model.X, model.b) - model.y
    model.errors = initial_error

    np.random.seed(0)
    output = train(model)

    # Let's see what the decision boundary looks like.

    fig, ax = plt.subplots()
    grid, ax = plot_decision_boundary(output, ax)



def test_driver():

    #initialize C (the regularization parameter)
    C = 1000.0
    tol = 0.01
    eps = 0.01
    kernel = linear_kernel
    # C = 1.0
    testing_toy_data(C, tol, eps, kernel)

#testing kernals
def test_kernal():
    """Tests the kernel function `kernel` on"""
    x = [ 1,2,3]
    y = [ 4,5,6]
    #convert to numpy arrays
    x = np.array(x)
    y = np.array(y)
    # call gaussian kernel function
    result = gaussian_kernel(x, y, sigma=1)
    print(result)




#defining main function
def main():
    print("Entering main function")
    # test_kernal()

    test_driver()

#if __name__ == "__main__":
if __name__ == "__main__":
    main()



















# #gridsearch
#     # create a grid of points to evaluate the kernel function on
#     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
#     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
#                          np.arange(y_min, y_max, 0.02))
#
#     # evaluate the kernel function on the grid
#     Z = kernel(X, X, sigma=sigma)
#     Z = Z @ y.T
#     Z = Z.reshape(xx.shape)
#
#     # plot the contour and training examples
#     plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
#     plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
#     plt.show()





# #gridsearch
#     def __init__(self, X, y, C_range, gamma_range, tol=1e-3):
#         self.X = X
#         self.y = y
#         self.C_range = C_range
#         self.gamma_range = gamma_range
#         self.tol = tol
#         self.best_params = None
#         self.best_score = None
#         self.best_model = None
#
#     def fit(self):
#         self.best_params = None
#         self.best_score = None
#         self.best_model = None
#
#         for C in self.C_range:
#             for gamma in self.gamma_range:
#                 model = SVM(self.X, self.y, C, gamma, self.tol)
#                 model = train(model)
#                 score = accuracy_score(model.predict(self.X), self.y)
#                 if self.best_score is None or score > self.best_score:
#                     self.best_params = (C, gamma)
#                     self.best_score = score
#                     self.best_model = model
#
#         return self.best_model



# #gridsearch
#         self.C_range = np.logspace(-2, 10, 13)
#         self.gamma_range = np.logspace(-9, 3, 13)
#         self.kernel_parameter_grid = dict(gamma=self.gamma_range, C=self.C_range)
#         self.grid_search_model = GridSearchCV(SVC(kernel=self.name_of_kernal), self.kernel_parameter_grid, cv=5)
#         self.grid_search_model.fit(self.X_train, self.y)
#         self.best_C = self.grid_search_model.best_estimator_.C
#         self.best_gamma = self.grid_search_model.best_estimator_.gamma
#         self.best_model = SVC(kernel=self.name_of_kernal, C=self.best_C, gamma=self.best_gamma)
#         self.best_model.fit(self.X_train, self.y)
#         self.best_score = self.best_model.score(self.X_validation, self.y_validation)
#         self.best_score_train = self.best_model.score(self.X_train, self.y)
#         self.best_score_test = self.best_model.score(self.X_validation, self.y_validation)
#         self.best_score_train_validation = self.best_model.score(self.X_train, self.y)
#         self.best_score_test_validation = self.best_model.score(self.X_validation, self.y_validation)
#         self.best_score_train

In [None]:
#SVM Binary pneumonia mnist
# we are implementing a simple SVM classifier for the pneumonia MNIST dataset
# the dataset is at the folder: pneumoniamnist in the same directory as this file

#result folder is: binary_svm



#importing libraries
import csv
import os

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_curve
from sklearn.preprocessing import StandardScaler
#importing svm

from sklearn.svm import SVC

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize



class MidpointNormalize(Normalize):
    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y))



def data_preprocessing(path):
    data_path = path + '/pneumoniamnist.npz'
    data = np.load(data_path)
    # print(data.files)
    # Reshape images and save in numpy arrays
    train1_data = np.reshape(data['train_images'], (4708, 784))
    train1_labels = np.reshape(data['train_labels'], (4708))
    #change data type to float of train1_data
    train1_data = train1_data.astype(float)
    #change datatype of train1_labels to int
    train1_labels = train1_labels.astype(int)

    #validation data
    valid1_data = np.reshape(data['val_images'], (524, 784))
    valid1_labels = np.reshape(data['val_labels'], (524))
    #change datatype of valid1_data to float
    valid1_data = valid1_data.astype(float)
    #change datatype of valid1_labels to int
    valid1_labels = valid1_labels.astype(int)


    test1_data = np.reshape(data['test_images'], (624, 784))
    test1_labels = np.reshape(data['test_labels'], (624))
    #change data type to float of test1_data
    test1_data = test1_data.astype(float)
    #change datatype of test1_labels to int
    test1_labels = test1_labels.astype(int)

    # feature scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(train1_data , train1_labels)
    X_test = sc.fit_transform(test1_data, test1_labels)
    X_valid = sc.fit_transform(valid1_data, valid1_labels)
    #change label of train1_labels to -1
    train1_labels[train1_labels == 0] = -1
    Y_train = train1_labels
    #change label of test1_labels to -1
    test1_labels[test1_labels == 0] = -1
    Y_test = test1_labels
    #change label of valid1_labels to -1
    valid1_labels[valid1_labels == 0] = -1
    Y_valid = valid1_labels

    return X_train, Y_train, X_test, Y_test , X_valid, Y_valid


# def validation_dataset(path):
#     data = np.load(path)
#     # print(data.files)
#     # Reshape images and save in numpy arrays




def grid_search_poly(X_train, Y_train,  X_valid, Y_valid ):

    #create csv file for the results name: results_poly.csv
    with open('binary_svm/results_poly.csv', 'w') as f:
        f.write('C, degree, gamma, accuracy, precision, recall, f1\n')


    #SVM with C = 1 and degree = 2 and gamma = 0.1
    clf = SVC(kernel='poly', C=1, degree=2)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    #f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2

    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)



    #for testing
    # c_range = [0.1, 1, 10]
    # degree_range = [ 2 , 3]
    gamma_range = [0.1 ]

    # c_range = np.logspace(-2, 10, 13)
    # degree_range = [ 2, 3, 4, 5, 6 , 7, 8, 9, 10, 100, 1000, 10000]
    # gamma_range = np.logspace(-9, 3, 13)

    c_range = [0.001,  1, 100]
    degree_range = [ 2, 10, 100]
    # gamma_range = [ 0.01, 10, 1000]




    for C in c_range:
        for degree in degree_range:
            for gamma in gamma_range:
                clf = SVC(kernel='poly', C=C, degree=degree )
                clf.fit(X_train, Y_train)
                Y_pred = clf.predict(X_valid)
                # f1 score
                f1 = f1_score(Y_valid, Y_pred, average='micro')
                # accuracy
                accuracy = clf.score(X_valid, Y_valid)
                # precision
                precision = precision_score(Y_valid, Y_pred, average='micro')
                # recall
                recall = recall_score(Y_valid, Y_pred, average='micro')
                # write results to csv file
                with open('binary_svm/results_poly.csv', 'a') as f:
                    f.write('{}, {}, {}, {}, {}, {}, {}\n'.format(C, degree, gamma, accuracy, precision, recall, f1))

                if f1 > optimal_f1:
                    optimal_f1 = f1
                    optimal_C = C
                    optimal_degree = degree

                    optimal_classifier = clf
                    optimal_accuracy = accuracy



    optimal_hyperparameters = {'C': optimal_C, 'degree': optimal_degree}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



def grid_search_sigmoid(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_sigmoid.csv
    with open('binary_svm/results_sigmoid.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='sigmoid', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    # c_range = np.logspace(-2, 10, 13)
    # gamma_range = np.logspace(-9, 3, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [ 0.001, 0.01, 0.1, 1, 10, 100, 1000]

    c_range = [0.001, 1, 100]

    gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='sigmoid', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('binary_svm/results_sigmoid.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))


            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_gaussian(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_gaussian.csv
    with open('binary_svm/results_gaussian.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='rbf', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    # c_range = np.logspace(-2, 10, 13)
    # gamma_range = np.logspace(-9, 3, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    c_range = [0.001, 1, 100]

    gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='rbf', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('binary_svm/results_gaussian.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))

            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_linear(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_linear.csv
    with open('binary_svm/results_linear.csv', 'w') as f:
        f.write('C, accuracy, precision, recall, f1\n')

    #SVM with C = 1
    clf = SVC(kernel='linear', C=1)
    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2
    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    # for testing
    # c_range = [1]

    # c_range = np.logspace(-2, 10, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    c_range = [0.001, 1, 100]

    gamma_range = [ 0.01, 10, 1000]
    for C in c_range:
        clf = SVC(kernel='linear', C=C)
        clf.fit(X_train, Y_train)
        Y_pred = clf.predict(X_valid)
        # f1 score
        f1 = f1_score(Y_valid, Y_pred, average='micro')
        # accuracy

        accuracy = clf.score(X_valid, Y_valid)

        # precision
        precision = precision_score(Y_valid, Y_pred, average='micro')
        # recall
        recall = recall_score(Y_valid, Y_pred, average='micro')
        # write results to csv file
        with open('binary_svm/results_linear.csv', 'a') as f:
            f.write('{}, {}, {}, {}, {}\n'.format(C, accuracy, precision, recall, f1))

        if f1 > optimal_f1:
            optimal_f1 = f1
            optimal_C = C
            optimal_classifier = clf
            optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



#this function plots colormap where x and y coordinates of colourmap are the hyperparameters and colourmap is the accuracy of the classifier
def plot_hyperparameter_accuracy(csv_file, midpoint, title, name , hyperparameters_to_plot= ['C', ' gamma']):
    print()
    hyperparameter_1 = hyperparameters_to_plot[0]
    hyperparameter_2 = hyperparameters_to_plot[1]

    # Draw heatmap of the validation accuracy as a function of gamma and C
    #   read csv file

    #plotting the validation accuracy vs hyperparameters colormap of linear kernel
    #   read csv file and store value of accuracy in a 2d array
    # one axis of array is gamma and other is C
    #reading csv file
    accuracy_list = []
    c_list = []
    gamma_list = []
    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            #save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row[hyperparameter_1])
            gamma_list.append(row[hyperparameter_2])
            line_count += 1
    #print the list c_list ands gamma_list
    print(c_list)
    print(gamma_list)


    #convert the list to numpy array
    #convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]
    gamma_list = [float(i) for i in gamma_list]
    #convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)
    gamma_array = np.array(gamma_list)
    #reshape the array to 2d array

    #define c_len to be no. of unique values of c_array
    c_len = len(np.unique(c_array))
    #define gamma_len to be no. of unique values of gamma_array
    gamma_len = len(np.unique(gamma_array))

    #c_array to have unique values of c_array
    c_array = np.unique(c_array)
    #gamma_array to have unique values of gamma_array
    gamma_array = np.unique(gamma_array)
    #c_array to be sorted in ascending order
    c_array = np.sort(c_array)
    #gamma_array to be sorted in ascending order
    gamma_array = np.sort(gamma_array)

    reshaped_array = np.reshape(accuracy_array, (len(c_array), len(gamma_array)))
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(left=0.2, right=0.95, bottom=0.15, top=0.95)
    plt.imshow(
        reshaped_array,
        interpolation="nearest",
        cmap=plt.cm.hot,
        norm=MidpointNormalize(vmin=0.2, midpoint=midpoint, vmax=1)
    )
    plt.xlabel(hyperparameter_2)
    plt.ylabel(hyperparameter_1)
    plt.colorbar()
    plt.xticks(np.arange(len(gamma_array)), gamma_array, rotation=45)
    plt.yticks(np.arange(len(c_array)), c_array)
    plt.title("Validation accuracy " + title + " "+ name)
    plt.show()


def extract_accuracy(csv_file):
    accuracy_list = []
    c_list = []

    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            # save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row['C'])

            line_count += 1

    # convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]

    # convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)



    # c_array to have unique values of c_array
    c_array = np.unique(c_array)

    # c_array to be sorted in ascending order
    c_array = np.sort(c_array)

    return accuracy_array, c_array




#plotting the results
def plot_results():
    name = "PneumoniaMNIST"

    # plotting the colormap of sigmoide kernel
    plot_hyperparameter_accuracy('binary_svm/results_sigmoid.csv' , 0.72 , 'Sigmoid Kernel', name)

    #plotting the colormap of gaussian kernel
    plot_hyperparameter_accuracy('binary_svm/results_gaussian.csv' , 0.74 , 'Gaussian Kernel', name)

    #plotting the colormap of polynomial kernel
    plot_hyperparameter_accuracy('binary_svm/results_poly_best.csv' , 0.7 , 'Polynomial Kernel', name , ['C', ' degree'])

    #plotting the graph of linear kernel
    accuracy_array, c_array = extract_accuracy('binary_svm/results_linear.csv')
    #plot the accuracy vs c graph
    plt.plot(c_array, accuracy_array)
    plt.xlabel('C')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs C for Linear Kernel ' + name)
    #show
    plt.show()

    # #plotting the graph of polynomial kernel
    #
    # accuracy_array, c_array = extract_accuracy('binary_svm/results_poly.csv')
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('degree')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs degree for Linear Kernel ' + name)
    # # show
    # plt.show()




#plotting AUC curve of the best classifier
def plot_AUC(classifier, X_test, Y_test):
    name = "PneumoniaMNIST"
    #get the predicted labels
    Y_pred = classifier.predict(X_test)
    #get the predicted probabilities
    Y_prob = classifier.predict_proba(X_test)
    #get the AUC
    fpr, tpr, thresholds = roc_curve(Y_test, Y_prob[:,1])
    #plot the AUC
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for ' + name)
    #show
    plt.show()


def SVM(path):

    #getting the data
    X_train, Y_train, X_test, Y_test, X_valid, Y_valid = data_preprocessing(path)
    # SVM
    # clf = SVC(kernel='linear', C=1)
    # clf.fit(X_train, Y_train)
    # Y_pred = clf.predict(X_test)
    # # print(classification_report(Y_test, Y_pred))
    # print(y_pred)
    # print(test1_labels)
    # print(y_pred == test1_labels)
    # print(np.sum(y_pred == test1_labels))
    # print(np.sum(y_pred == test1_labels) / 624)
    # print(np.sum(y_pred == test1_labels) / 624 * 100)

    # print("Accuracy of SVM classifier on test set: {:.2f}%".format(np.sum(Y_pred == Y_test) / 624 * 100))

    # print(clf.score(X_test, test1_labels))
    # print(clf.score(X_train, train1_labels))



    #calling grid search poly
    print("Grid Search for Polynomial Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_poly(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal degree: {}".format(optimal_hyperparameters['degree']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))


    # calling grid search sigmoid
    print("Grid Search for Sigmoid Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_sigmoid(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # calling grid search gaussian
    print("Grid Search for Gaussian Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_gaussian(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # # calling grid search linear
    print("Grid Search for Linear Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_linear(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))


    # plotting
    # plotting the colormap of polynomial kernel
    # plot_hyperparameter_accuracy('binary_svm/results_poly.csv', 0.9, 'Polynomial Kernel', "PneumoniaMNIST", ['C', ' degree'])

    # # plotting the graph of linear kernel
    # accuracy_array, c_array = extract_accuracy('binary_svm/results_linear.csv')
    #
    # #print accuracy_array and c_array
    # print(accuracy_array)
    # print(c_array)
    #
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('C')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs C for Linear Kernel ' + "PneumoniaMNIST")
    # # show
    # plt.show()













#main function
def main():

    #if directory binary_svm does not exist, create it
    if not os.path.exists('binary_svm'):
        os.makedirs('binary_svm')


    path = 'drive/MyDrive/Colab Notebooks/PRNN_A1_DATA'
    # path = os.getcwd()

    # path  = 'pneumoniamnist.npz'
    SVM(path)
    plot_results()





# if main
if __name__ == '__main__':
    main()


In [None]:
#SVM multi class bloodmnist
# we are implementing a simple SVM classifier for the  BloodMNIST dataset with 8 classes
# the dataset is at the folder: bloodmnist.npz in the same directory as this file


#global variables
decision_function_shape = 'ovr'
break_ties = False



#importing libraries
import csv
import os

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_curve
from sklearn.preprocessing import StandardScaler
#importing svm

from sklearn.svm import SVC

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize


from sklearn import svm
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score


class MidpointNormalize(Normalize):
    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y))



def data_preprocessing(path):
    data_path = path + '/bloodmnist.npz'
    data = np.load(data_path)
    # print(data.files)
    # Reshape images and save in numpy arrays
    train1_data = np.reshape(data['train_images'], (11959, 2352))
    train1_labels = np.reshape(data['train_labels'], (11959))
    #change data type to float of train1_data
    train1_data = train1_data.astype(float)
    #change datatype of train1_labels to int
    train1_labels = train1_labels.astype(int)

    #validation data
    valid1_data = np.reshape(data['val_images'], (1712, 2352))
    valid1_labels = np.reshape(data['val_labels'], (1712))
    #change datatype of valid1_data to float
    valid1_data = valid1_data.astype(float)
    #change datatype of valid1_labels to int
    valid1_labels = valid1_labels.astype(int)


    test1_data = np.reshape(data['test_images'], (3421, 2352))
    test1_labels = np.reshape(data['test_labels'], (3421))
    #change data type to float of test1_data
    test1_data = test1_data.astype(float)
    #change datatype of test1_labels to int
    test1_labels = test1_labels.astype(int)

    # feature scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(train1_data , train1_labels)
    X_test = sc.fit_transform(test1_data, test1_labels)
    X_valid = sc.fit_transform(valid1_data, valid1_labels)
    # #change label of train1_labels to -1
    # train1_labels[train1_labels == 0] = -1
    Y_train = train1_labels
    # #change label of test1_labels to -1
    # test1_labels[test1_labels == 0] = -1
    Y_test = test1_labels
    # #change label of valid1_labels to -1
    # valid1_labels[valid1_labels == 0] = -1
    Y_valid = valid1_labels

    #print y_train
    # print(Y_train)

    return X_train, Y_train, X_test, Y_test , X_valid, Y_valid


# def validation_dataset(path):
#     data = np.load(path)
#     # print(data.files)
#     # Reshape images and save in numpy arrays





#  Different grid search methods based on types of kernels

def grid_search_poly(X_train, Y_train,  X_valid, Y_valid ):

    #create csv file for the results name: results_poly.csv
    with open('multi_svm/results_poly.csv', 'w') as f:
        f.write('C, degree, gamma, accuracy, precision, recall, f1\n')


    #SVM with C = 1 and degree = 2 and gamma = 0.1
    clf = SVC(kernel='poly', C=1, degree=2)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    #f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2

    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)



    #for testing
    # c_range = [0.1, 1, 10]
    # degree_range = [ 2 , 3]
    # gamma_range = [0.1 ]

    # c_range = np.logspace(-2, 10, 13)
    # degree_range = [ 2, 3, 4, 5, 6 , 7, 8, 9, 10, 100, 1000, 10000]
    # gamma_range = np.logspace(-9, 3, 13)

    # c_range = [0.001,  1, 10, 100, 1000]
    # degree_range = [ 2,  5, 25, 100]
    # gamma_range = [ 0.1, 1, 10,  1000]

    c_range =[    0.001, 1, 100]
    degree_range = [2, 10, 100]
    # gamma_range = [ 0.01, 10, 1000]
    gamma_range = [0.1]





    for C in c_range:
        for degree in degree_range:
            for gamma in gamma_range:
                clf = SVC(kernel='poly', C=C, degree=degree )
                clf.fit(X_train, Y_train)
                Y_pred = clf.predict(X_valid)
                # f1 score
                f1 = f1_score(Y_valid, Y_pred, average='micro')
                # accuracy
                accuracy = clf.score(X_valid, Y_valid)
                # precision
                precision = precision_score(Y_valid, Y_pred, average='micro')
                # recall
                recall = recall_score(Y_valid, Y_pred, average='micro')
                # write results to csv file
                with open('multi_svm/results_poly.csv', 'a') as f:
                    f.write('{}, {}, {}, {}, {}, {}, {}\n'.format(C, degree, gamma, accuracy, precision, recall, f1))

                if f1 > optimal_f1:
                    optimal_f1 = f1
                    optimal_C = C
                    optimal_degree = degree

                    optimal_classifier = clf
                    optimal_accuracy = accuracy



    optimal_hyperparameters = {'C': optimal_C, 'degree': optimal_degree}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



def grid_search_sigmoid(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_sigmoid.csv
    with open('multi_svm/results_sigmoid.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='sigmoid', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    # c_range = np.logspace(-2, 10, 13)
    # gamma_range = np.logspace(-9, 3, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [ 0.001, 0.01, 0.1, 1, 10, 100, 1000]

    # c_range = [0.001,  1, 10, 100, 1000]
    #
    # gamma_range = [ 0.1, 1, 10,  1000]

    c_range = [0.001,  1, 100]

    gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='sigmoid', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('multi_svm/results_sigmoid.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))


            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_gaussian(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_gaussian.csv
    with open('multi_svm/results_gaussian.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='rbf', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    # c_range = np.logspace(-2, 10, 13)
    # gamma_range = np.logspace(-9, 3, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

    # c_range = [0.001,  1, 10, 100, 1000]
    #
    # gamma_range = [ 0.1, 1, 10,  1000]

    c_range = [0.001,  1, 100]

    gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='rbf', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('multi_svm/results_gaussian.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))

            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_linear(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_linear.csv
    with open('multi_svm/results_linear.csv', 'w') as f:
        f.write('C, accuracy, precision, recall, f1\n')

    #SVM with C = 1
    clf = SVC(kernel='linear', C=1)
    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2
    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    # for testing
    # c_range = [1]

    # c_range = np.logspace(-2, 10, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

    # c_range = [0.001,  1, 10, 100, 1000]
    #
    # gamma_range = [ 0.1, 1, 10,  1000]

    c_range = [0.001,  1, 100]

    gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        clf = SVC(kernel='linear', C=C)
        clf.fit(X_train, Y_train)
        Y_pred = clf.predict(X_valid)
        # f1 score
        f1 = f1_score(Y_valid, Y_pred, average='micro')
        # accuracy

        accuracy = clf.score(X_valid, Y_valid)

        # precision
        precision = precision_score(Y_valid, Y_pred, average='micro')
        # recall
        recall = recall_score(Y_valid, Y_pred, average='micro')
        # write results to csv file
        with open('multi_svm/results_linear.csv', 'a') as f:
            f.write('{}, {}, {}, {}, {}\n'.format(C, accuracy, precision, recall, f1))

        if f1 > optimal_f1:
            optimal_f1 = f1
            optimal_C = C
            optimal_classifier = clf
            optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



#this function plots colormap where x and y coordinates of colourmap are the hyperparameters and colourmap is the accuracy of the classifier
def plot_hyperparameter_accuracy(csv_file, midpoint, title, name , hyperparameters_to_plot= ['C', ' gamma']):
    print()
    hyperparameter_1 = hyperparameters_to_plot[0]
    hyperparameter_2 = hyperparameters_to_plot[1]

    # Draw heatmap of the validation accuracy as a function of gamma and C
    #   read csv file

    #plotting the validation accuracy vs hyperparameters colormap of linear kernel
    #   read csv file and store value of accuracy in a 2d array
    # one axis of array is gamma and other is C
    #reading csv file
    accuracy_list = []
    c_list = []
    gamma_list = []
    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            #save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row[hyperparameter_1])
            gamma_list.append(row[hyperparameter_2])
            line_count += 1
    #print the list c_list ands gamma_list
    print(c_list)
    print(gamma_list)


    #convert the list to numpy array
    #convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]
    gamma_list = [float(i) for i in gamma_list]
    #convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)
    gamma_array = np.array(gamma_list)
    #reshape the array to 2d array

    #define c_len to be no. of unique values of c_array
    c_len = len(np.unique(c_array))
    #define gamma_len to be no. of unique values of gamma_array
    gamma_len = len(np.unique(gamma_array))

    #c_array to have unique values of c_array
    c_array = np.unique(c_array)
    #gamma_array to have unique values of gamma_array
    gamma_array = np.unique(gamma_array)
    #c_array to be sorted in ascending order
    c_array = np.sort(c_array)
    #gamma_array to be sorted in ascending order
    gamma_array = np.sort(gamma_array)

    reshaped_array = np.reshape(accuracy_array, (len(c_array), len(gamma_array)))
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(left=0.2, right=0.95, bottom=0.15, top=0.95)
    plt.imshow(
        reshaped_array,
        interpolation="nearest",
        cmap=plt.cm.hot,
        norm=MidpointNormalize(vmin=0.2, midpoint=midpoint, vmax=1)
    )
    plt.xlabel(hyperparameter_2)
    plt.ylabel(hyperparameter_1)
    plt.colorbar()
    plt.xticks(np.arange(len(gamma_array)), gamma_array, rotation=45)
    plt.yticks(np.arange(len(c_array)), c_array)
    plt.title("Validation accuracy " + title + " "+ name)
    plt.show()


def extract_accuracy(csv_file):
    accuracy_list = []
    c_list = []

    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            # save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row['C'])

            line_count += 1

    # convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]

    # convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)



    # c_array to have unique values of c_array
    c_array = np.unique(c_array)

    # c_array to be sorted in ascending order
    c_array = np.sort(c_array)

    return accuracy_array, c_array




#plotting the results
def plot_results():
    name = "BloodMNIST"

    #plotting the colormap of sigmoide kernel
    plot_hyperparameter_accuracy('multi_svm/results_sigmoid.csv' , 0.4 , 'Sigmoid Kernel', name)

    #plotting the colormap of gaussian kernel
    plot_hyperparameter_accuracy('multi_svm/results_gaussian.csv' , 0.18 , 'Gaussian Kernel', name)

    #plotting the colormap of polynomial kernel
    # plot_hyperparameter_accuracy('multi_svm/results_poly.csv' , 0.7 , 'Polynomial Kernel', name , ['C', ' degree'])


    #plotting the graph of linear kernel
    accuracy_array, c_array = extract_accuracy('multi_svm/results_linear.csv')
    #plot the accuracy vs c graph
    plt.plot(c_array, accuracy_array)
    plt.xlabel('C')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs C for Linear Kernel ' + name)
    #show
    plt.show()




#plotting AUC curve of the best classifier
def plot_AUC(classifier, X_test, Y_test):
    name = "BloodMNIST"
    #get the predicted labels
    Y_pred = classifier.predict(X_test)
    #get the predicted probabilities
    Y_prob = classifier.predict_proba(X_test)
    #get the AUC
    fpr, tpr, thresholds = roc_curve(Y_test, Y_prob[:,1])
    #plot the AUC
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for ' + name)
    #show
    plt.show()



def Multi_Class_SVM(path):
    # getting the data
    X_train, Y_train, X_test, Y_test, X_valid, Y_valid = data_preprocessing(path)
    # SVM
    # clf = SVC(kernel='linear', C=1)
    # clf.fit(X_train, Y_train)
    # Y_pred = clf.predict(X_test)
    # # print(classification_report(Y_test, Y_pred))
    # print(y_pred)
    # print(test1_labels)
    # print(y_pred == test1_labels)
    # print(np.sum(y_pred == test1_labels))
    # print(np.sum(y_pred == test1_labels) / 624)
    # print(np.sum(y_pred == test1_labels) / 624 * 100)

    # print("Accuracy of SVM classifier on test set: {:.2f}%".format(np.sum(Y_pred == Y_test) / 624 * 100))

    # print(clf.score(X_test, test1_labels))
    # print(clf.score(X_train, train1_labels))

    # calling grid search poly
    print("Grid Search for Polynomial Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_poly(X_train, Y_train, X_valid, Y_valid)
    # printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal degree: {}".format(optimal_hyperparameters['degree']))

    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # # calling grid search sigmoid
    # print("Grid Search for Sigmoid Kernel")
    # optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_sigmoid(X_train, Y_train, X_valid,
    #                                                                                    Y_valid)
    # # printing the results
    # print("Optimal C: {}".format(optimal_hyperparameters['C']))
    # print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    # print("Optimal f1: {}".format(optimal_metrics['f1']))
    # print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))
    #
    # # calling grid search gaussian
    # print("Grid Search for Gaussian Kernel")
    # optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_gaussian(X_train, Y_train, X_valid,
    #                                                                                     Y_valid)
    # # printing the results
    # print("Optimal C: {}".format(optimal_hyperparameters['C']))
    # print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    # print("Optimal f1: {}".format(optimal_metrics['f1']))
    # print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))
    #
    # # # calling grid search linear
    # print("Grid Search for Linear Kernel")
    # optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_linear(X_train, Y_train, X_valid,
    #                                                                                   Y_valid)
    # # printing the results
    # print("Optimal C: {}".format(optimal_hyperparameters['C']))
    # print("Optimal f1: {}".format(optimal_metrics['f1']))
    # print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # plotting
    # plotting the colormap of polynomial kernel
    # plot_hyperparameter_accuracy('multi_svm/results_poly.csv', 0.9, 'Polynomial Kernel', "BloodMNIST", ['C', ' degree'])

    # # plotting the graph of linear kernel
    # accuracy_array, c_array = extract_accuracy('multi_svm/results_linear.csv')
    #
    # #print accuracy_array and c_array
    # print(accuracy_array)
    # print(c_array)
    #
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('C')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs C for Linear Kernel ' + "BloodMNIST")
    # # show
    # plt.show()


#main function
def main():
    # if directory multi_svm does not exist, create it
    if not os.path.exists('multi_svm'):
        os.makedirs('multi_svm')
    # path = 'drive/MyDrive/Colab Notebooks/PRNN_A1_DATA/bloodmnist.npz'
    path = os.getcwd()

    # path  = 'bloodmnist.npz'
    Multi_Class_SVM(path)





# if main
if __name__ == '__main__':
    main()




In [None]:
#SVM audio data
#this code will implement SVM for binary audio data




# we are implementing a simple SVM classifier for the pneumonia MNIST dataset
# the dataset is at the folder:  in the same directory as this file

#result folder is: audio_data



#importing libraries
import csv
import os

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_curve
from sklearn.preprocessing import StandardScaler
#importing svm

from sklearn.svm import SVC

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import IPython.display as ipd  # To play sound in the notebook
from scipy.io import wavfile # for reading wave files as numpy arrays
import wave # opening .wav files
import struct # for padding
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # visualizations
from sklearn.model_selection import train_test_split
import os # operating system
from os.path import join
import time

import os

import matplotlib.pyplot as plt


#global variables
#PLEASE SAVE PATH TO TIMIT DATASET IN PATH var
path = 'darpa/'
# path = 'drive/MyDrive/Colab Notebooks/PRNN_A1_DATA'
tolerence = 0.1
max_iteration = 4000


RATE = 16000
data_dir = path +"data"
train_csv_file = path+"train_data.csv"
test_csv_file = path+"test_data.csv"

class MidpointNormalize(Normalize):
    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y))











def add_padding(data, length) :
    padded_data = []
    for row in data :
        x1 = np.zeros(length)
        if row[0].shape[0] > length:
            x1 = row[0][:length]
        else:
            x1[:row[0].shape[0]] = row[0]
        padded_data.append((x1, row[1]))
    return padded_data








def make_data_set(train_csv_file):
    train_csv = get_good_audio_files(train_csv_file)
    train_csv['filepath'] = train_csv.apply(lambda row: join_dirs(row), axis=1)
    waves = train_csv['filepath']
    audio_data = [read_audio(wave) for wave in waves]
    wrds = [wave.replace('.WAV.wav', '') + '.PHN' for wave in waves]
    word_data = [parse_phn_timestamps(wrd) for wrd in wrds]
    train_data = [align_data(audio, wrd) for audio, wrd in zip(audio_data, word_data)]
    train_data = [item for sublist in train_data for item in sublist]
    return train_data


def align_data(data, words, verbose=False):
    aligned = []
    print('len(data)', len(data)) if verbose else None
    print('len(words)', len(words)) if verbose else None
    print('data', data) if verbose else None
    print('words', words) if verbose else None
    for tup in words[1:-1]:
        print('tup', tup) if verbose else None
        start = int(tup[0])
        end = int(tup[1])
        word = tup[2]
        assert start >= 0
        assert end <= len(data)
        aligned.append((data[start:end], word))
    return aligned


def parse_word_waves(time_aligned_words, audio_data, verbose=False):
    return [align_data(data, words, verbose) for data, words in zip(audio_data, time_aligned_words)]




def read_audio(wave_path, verbose=False):
    rate, data = wavfile.read(wave_path)
    # make sure the rate of the file is the RATE that we want
    assert rate == RATE
    print("Sampling (frame) rate = ", rate) if verbose else None
    print("Total samples (frames) = ", data.shape) if verbose else None
    return data

def join_dirs(row):
    return os.path.join(data_dir,
                       row['test_or_train'],
                       row['dialect_region'],
                       row['speaker_id'],
                       row['filename'])

def parse_phn_timestamps(wrd_path, verbose=False):
    print('phn_path', wrd_path) if verbose else None
    speaker_id = wrd_path.split('/')[-2]
    sentence_id = wrd_path.split('/')[-1].replace('.PHN', '')
    wrd_file = open(wrd_path)
    content = wrd_file.read()
    content = content.split('\n')
    content = [tuple(foo.split(' ') + [speaker_id, sentence_id]) for foo in content if foo != '']
    wrd_file.close()
    return content

def get_good_audio_files(filename):
    df = pd.read_csv(filename)
    return df[df['is_converted_audio'] == True]

def data_preprocessing():
    print()

    train_data = make_data_set(train_csv_file)
    test_data = make_data_set(test_csv_file)

    vowels = ["iy", "ih", "eh", "ey", "ae", "aa", "aw", "ay", "ah", "ao", "oy", "ow", "uh", "uw", "ux", "er", "ax",
              "ix", "axr", "ax-h"]

    train_data = add_padding(train_data, 100)
    test_data = add_padding(test_data, 100)



    trainX = np.array([i[0] for i in train_data])
    trainY = np.array([int(i[1] in vowels) for i in train_data])

    testX = np.array([i[0] for i in test_data])
    testY = np.array([int(i[1] in vowels) for i in test_data])
    #     train1_data ,valid1_data, train1_labels ,valid1_labels  = train_test_split(trainX, trainY,  test_size=0.2, random_state=42)
    train1_data = trainX
    train1_labels = trainY


    test1_data = testX
    test1_labels = testY

    #split train data into train and validation
    train1_data ,valid1_data, train1_labels ,valid1_labels  = train_test_split(train1_data, train1_labels,  test_size=0.2, random_state=42)

    train1_data = train1_data.astype(float)
    # change datatype of train1_labels to int
    train1_labels = train1_labels.astype(int)

    # validation data
    valid1_data = valid1_data.astype(float)
    # change datatype of valid1_labels to int
    valid1_labels = valid1_labels.astype(int)

    #     #change datatype of valid1_data to float
    #     valid1_data = valid1_data.astype(float)
    #     #change datatype of valid1_labels to int
    #     valid1_labels = valid1_labels.astype(int)

    test1_data = testX
    test1_labels = testY
    # change data type to float of test1_data
    test1_data = test1_data.astype(float)
    # change datatype of test1_labels to int
    test1_labels = test1_labels.astype(int)

    # feature scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(train1_data, train1_labels)
    X_test = sc.fit_transform(test1_data, test1_labels)
    X_valid = sc.fit_transform(valid1_data, valid1_labels)

    # change label of train1_labels to -1
    train1_labels[train1_labels == 0] = -1
    Y_train = train1_labels
    # change label of valid1_labels to -1
    valid1_labels[valid1_labels == 0] = -1
    Y_valid = valid1_labels
    # change label of test1_labels to -1
    test1_labels[test1_labels == 0] = -1
    Y_test = test1_labels


    return X_train, Y_train, X_test, Y_test, X_valid, Y_valid


#
    #
    # train1_data ,valid1_data, train1_labels ,valid1_labels  = train_test_split(trainX, trainY,  test_size=0.2, random_state=42)
    #
    # train1_data = train1_data.astype(float)
    # #change datatype of train1_labels to int
    # train1_labels = train1_labels.astype(int)
    #
    # #validation data
    #
    # #change datatype of valid1_data to float
    # valid1_data = valid1_data.astype(float)
    # #change datatype of valid1_labels to int
    # valid1_labels = valid1_labels.astype(int)
    #
    #
    # test1_data = testX
    # test1_labels = testY
    # #change data type to float of test1_data
    # test1_data = test1_data.astype(float)
    # #change datatype of test1_labels to int
    # test1_labels = test1_labels.astype(int)
    #
    # # feature scaling
    # sc = StandardScaler()
    # X_train = sc.fit_transform(train1_data , train1_labels)
    # X_test = sc.fit_transform(test1_data, test1_labels)
    # X_valid = sc.fit_transform(valid1_data, valid1_labels)
    # #change label of train1_labels to -1
    # train1_labels[train1_labels == 0] = -1
    # Y_train = train1_labels
    # #change label of test1_labels to -1
    # test1_labels[test1_labels == 0] = -1
    # Y_test = test1_labels
    # #change label of valid1_labels to -1
    # valid1_labels[valid1_labels == 0] = -1
    # Y_valid = valid1_labels
    #
    # return X_train, Y_train, X_test, Y_test , X_valid, Y_valid



# def validation_dataset(path):
#     data = np.load(path)
#     # print(data.files)
#     # Reshape images and save in numpy arrays




def grid_search_poly(X_train, Y_train,  X_valid, Y_valid ):

    #create csv file for the results name: results_poly.csv
    with open('audio_data/results_poly.csv', 'w') as f:
        f.write('C, degree, gamma, accuracy, precision, recall, f1\n')


    #SVM with C = 1 and degree = 2 and gamma = 0.1
    clf = SVC(kernel='poly', C=1, degree=2)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    #f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2

    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)



    #for testing
    # c_range = [ 1, ]
    # degree_range = [2]
    # gamma_range = [0.1]

    # c_range = [0.1, 1, 10]
    # degree_range = [ 2 , 3]
    # gamma_range = [0.1 ]

    c_range = np.logspace(-2, 10, 13)
    degree_range = [ 2, 3, 4, 5, 6 , 7, 8, 9, 10, 100, 1000, 10000]
    gamma_range = np.logspace(-9, 3, 13)

    # c_range = [0.001,  1, 100]
    # degree_range = [ 2, 10, 100]
    # # gamma_range = [ 0.01, 10, 1000]




    for C in c_range:
        for degree in degree_range:
            for gamma in gamma_range:
                clf = SVC(kernel='poly', C=C, degree=degree )
                clf.fit(X_train, Y_train)
                Y_pred = clf.predict(X_valid)
                # f1 score
                f1 = f1_score(Y_valid, Y_pred, average='micro')
                # accuracy
                accuracy = clf.score(X_valid, Y_valid)
                # precision
                precision = precision_score(Y_valid, Y_pred, average='micro')
                # recall
                recall = recall_score(Y_valid, Y_pred, average='micro')
                # write results to csv file
                with open('audio_data/results_poly.csv', 'a') as f:
                    f.write('{}, {}, {}, {}, {}, {}, {}\n'.format(C, degree, gamma, accuracy, precision, recall, f1))

                if f1 > optimal_f1:
                    optimal_f1 = f1
                    optimal_C = C
                    optimal_degree = degree

                    optimal_classifier = clf
                    optimal_accuracy = accuracy



    optimal_hyperparameters = {'C': optimal_C, 'degree': optimal_degree}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



def grid_search_sigmoid(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_sigmoid.csv
    with open('audio_data/results_sigmoid.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='sigmoid', C=1, gamma=0.1 , tol=tolerence, max_iter=max_iteration)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    c_range = np.logspace(-2, 10, 13)
    gamma_range = np.logspace(-9, 3, 13)
    c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [ 0.001, 0.01, 0.1, 1, 10, 100, 1000]

    # c_range = [0.001, 1, 100]
    #
    # gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='sigmoid', C=C, gamma=gamma , tol=tolerence, max_iter=max_iteration)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('audio_data/results_sigmoid.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))


            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_gaussian(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_gaussian.csv
    with open('audio_data/results_gaussian.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='rbf', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    c_range = np.logspace(-2, 10, 13)
    gamma_range = np.logspace(-9, 3, 13)
    c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    # c_range = [0.001, 1, 100]
    #
    # gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='rbf', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('audio_data/results_gaussian.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))

            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_linear(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_linear.csv
    with open('audio_data/results_linear.csv', 'w') as f:
        f.write('C, accuracy, precision, recall, f1\n')

    #SVM with C = 1
    clf = SVC(kernel='linear', C=1)
    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2
    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    # for testing
    # c_range = [1]

    c_range = np.logspace(-2, 10, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    # c_range = [0.001, 1, 100]
    #
    # gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        clf = SVC(kernel='linear', C=C)
        clf.fit(X_train, Y_train)
        Y_pred = clf.predict(X_valid)
        # f1 score
        f1 = f1_score(Y_valid, Y_pred, average='micro')
        # accuracy

        accuracy = clf.score(X_valid, Y_valid)

        # precision
        precision = precision_score(Y_valid, Y_pred, average='micro')
        # recall
        recall = recall_score(Y_valid, Y_pred, average='micro')
        # write results to csv file
        with open('audio_data/results_linear.csv', 'a') as f:
            f.write('{}, {}, {}, {}, {}\n'.format(C, accuracy, precision, recall, f1))

        if f1 > optimal_f1:
            optimal_f1 = f1
            optimal_C = C
            optimal_classifier = clf
            optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



#this function plots colormap where x and y coordinates of colourmap are the hyperparameters and colourmap is the accuracy of the classifier
def plot_hyperparameter_accuracy(csv_file, midpoint, title, name , hyperparameters_to_plot= ['C', ' gamma']):
    print()
    hyperparameter_1 = hyperparameters_to_plot[0]
    hyperparameter_2 = hyperparameters_to_plot[1]

    # Draw heatmap of the validation accuracy as a function of gamma and C
    #   read csv file

    #plotting the validation accuracy vs hyperparameters colormap of linear kernel
    #   read csv file and store value of accuracy in a 2d array
    # one axis of array is gamma and other is C
    #reading csv file
    accuracy_list = []
    c_list = []
    gamma_list = []
    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            #save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row[hyperparameter_1])
            gamma_list.append(row[hyperparameter_2])
            line_count += 1
    #print the list c_list ands gamma_list
    print(c_list)
    print(gamma_list)


    #convert the list to numpy array
    #convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]
    gamma_list = [float(i) for i in gamma_list]
    #convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)
    gamma_array = np.array(gamma_list)
    #reshape the array to 2d array

    #define c_len to be no. of unique values of c_array
    c_len = len(np.unique(c_array))
    #define gamma_len to be no. of unique values of gamma_array
    gamma_len = len(np.unique(gamma_array))

    #c_array to have unique values of c_array
    c_array = np.unique(c_array)
    #gamma_array to have unique values of gamma_array
    gamma_array = np.unique(gamma_array)
    #c_array to be sorted in ascending order
    c_array = np.sort(c_array)
    #gamma_array to be sorted in ascending order
    gamma_array = np.sort(gamma_array)

    reshaped_array = np.reshape(accuracy_array, (len(c_array), len(gamma_array)))
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(left=0.2, right=0.95, bottom=0.15, top=0.95)
    plt.imshow(
        reshaped_array,
        interpolation="nearest",
        cmap=plt.cm.hot,
        norm=MidpointNormalize(vmin=0.2, midpoint=midpoint, vmax=1)
    )
    plt.xlabel(hyperparameter_2)
    plt.ylabel(hyperparameter_1)
    plt.colorbar()
    plt.xticks(np.arange(len(gamma_array)), gamma_array, rotation=45)
    plt.yticks(np.arange(len(c_array)), c_array)
    plt.title("Validation accuracy " + title + " "+ name)
    plt.show()


def extract_accuracy(csv_file):
    accuracy_list = []
    c_list = []

    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            # save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row['C'])

            line_count += 1

    # convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]

    # convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)



    # c_array to have unique values of c_array
    c_array = np.unique(c_array)

    # c_array to be sorted in ascending order
    c_array = np.sort(c_array)

    return accuracy_array, c_array




#plotting the results
def plot_results():
    name = "TIMIT"

    # plotting the colormap of sigmoide kernel
    plot_hyperparameter_accuracy('audio_data/results_sigmoid.csv' , 0.5 , 'Sigmoid Kernel', name)

    #plotting the colormap of gaussian kernel
    plot_hyperparameter_accuracy('audio_data/results_gaussian.csv' , 0.5 , 'Gaussian Kernel', name)

    #plotting the colormap of polynomial kernel
    plot_hyperparameter_accuracy('audio_data/results_poly_best.csv' , 0.5 , 'Polynomial Kernel', name , ['C', ' degree'])

    #plotting the graph of linear kernel
    accuracy_array, c_array = extract_accuracy('audio_data/results_linear.csv')
    #plot the accuracy vs c graph
    plt.plot(c_array, accuracy_array)
    plt.xlabel('C')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs C for Linear Kernel ' + name)
    #show
    plt.show()

    # #plotting the graph of polynomial kernel
    #
    # accuracy_array, c_array = extract_accuracy('audio_data/results_poly.csv')
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('degree')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs degree for Linear Kernel ' + name)
    # # show
    # plt.show()




#plotting AUC curve of the best classifier
def plot_AUC(classifier, X_test, Y_test):
    name = "TIMIT"
    #get the predicted labels
    Y_pred = classifier.predict(X_test)
    #get the predicted probabilities
    Y_prob = classifier.predict_proba(X_test)
    #get the AUC
    fpr, tpr, thresholds = roc_curve(Y_test, Y_prob[:,1])
    #plot the AUC
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for ' + name)
    #show
    plt.show()


def SVM():

    #getting the data
    X_train, Y_train, X_test, Y_test, X_valid, Y_valid = data_preprocessing()
    # SVM
    # clf = SVC(kernel='linear', C=1)
    # clf.fit(X_train, Y_train)
    # Y_pred = clf.predict(X_test)
    # # print(classification_report(Y_test, Y_pred))
    # print(y_pred)
    # print(test1_labels)
    # print(y_pred == test1_labels)
    # print(np.sum(y_pred == test1_labels))
    # print(np.sum(y_pred == test1_labels) / 624)
    # print(np.sum(y_pred == test1_labels) / 624 * 100)

    # print("Accuracy of SVM classifier on test set: {:.2f}%".format(np.sum(Y_pred == Y_test) / 624 * 100))

    # print(clf.score(X_test, test1_labels))
    # print(clf.score(X_train, train1_labels))

    #
    #
    #calling grid search poly
    print("Grid Search for Polynomial Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_poly(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal degree: {}".format(optimal_hyperparameters['degree']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))


    # calling grid search sigmoid
    print("Grid Search for Sigmoid Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_sigmoid(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # calling grid search gaussian
    print("Grid Search for Gaussian Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_gaussian(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # # calling grid search linear
    print("Grid Search for Linear Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_linear(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))


    # plotting
    # plotting the colormap of polynomial kernel
    # plot_hyperparameter_accuracy('audio_data/results_poly.csv', 0.9, 'Polynomial Kernel', "PneumoniaMNIST", ['C', ' degree'])

    # # plotting the graph of linear kernel
    # accuracy_array, c_array = extract_accuracy('audio_data/results_linear.csv')
    #
    # #print accuracy_array and c_array
    # print(accuracy_array)
    # print(c_array)
    #
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('C')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs C for Linear Kernel ' + "PneumoniaMNIST")
    # # show
    # plt.show()













#main function
def main():

    #if directory audio_data does not exist, create it
    if not os.path.exists('audio_data'):
        os.makedirs('audio_data')


    path = 'drive/MyDrive/Colab Notebooks/PRNN_A1_DATA'
    # path = os.getcwd()

    # data_preprocessing()




    SVM()
    plot_results()





# if main
if __name__ == '__main__':
    main()




In [None]:
# we are implementing a simple SVM classifier for the pneumonia MNIST dataset
# the dataset is at the folder:  object_localization.npy in the same directory as this file

#result folder is: binary_svr



#importing libraries
import csv
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_curve
from sklearn.preprocessing import StandardScaler
#importing svm

from sklearn.svm import SVC

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize

import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np

from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

#Import libraries for processing
import numpy as np
from skimage import color
from skimage.transform import rescale

#global vars

# path = 'drive/MyDrive/Colab Notebooks/PRNN_A1_DATA'
path = os.getcwd()




class MidpointNormalize(Normalize):
    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y))




#Compute Mean Square Error
def compute_mse(test_data, test_label,test_output) :
    mse = [0,0,0,0]
    for i in range(len(test_data)):
      for j in range(4):
        mse[j]+= (test_output[i][j] - test_label[i][j])*(test_output[i][j] - test_label[i][j])

    for i in range(len(mse)):
      mse[i] = mse[i]/len(test_data)

    print("Mean squared error: ",sum(mse)/len(mse))



#Compute Mean Absolute Error
def compute_mae(test_data, test_label,test_output) :
    mae = [0,0,0,0]
    for i in range(len(test_data)):
      for j in range(4):
        mae[j]+= abs(test_output[i][j] - test_label[i][j])

    for i in range(len(mae)):
      mae[i] = mae[i]/len(test_data)

    print("Mean absolute error: ",sum(mae)/len(mae))


#Compute Mean Intersection over Union Value
def compute_mIoU(test_data, test_label,test_output) :
    miou = 0
    for i in range(len(test_data)):
      x1 = max(test_output[i][0],test_label[i][0])
      x2 = min(test_output[i][2],test_label[i][2])
      y1 = max(test_output[i][1],test_label[i][1])
      y2 = min(test_output[i][3],test_label[i][3])
      int_area = (x2 - x1)*(y2 - y1)
      test_area = (test_output[i][2] - test_output[i][0])*(test_output[i][3] - test_output[i][1])
      op_area = (test_label[i][2] - test_label[i][0])*(test_label[i][3] - test_label[i][1])
      iou = int_area / (test_area + op_area - int_area)
      miou += iou

    print("Mean Intersection over Union error: ",miou/len(test_data))








# preprocessing the data
def data_preprocessing():
    data_path = path + '/object_localization.npy'

    data = np.load(data_path,allow_pickle=True,encoding='latin1')

    num_images = data.shape[0]
    # 80% of the data to be training data
    num_train_data = int(num_images * 0.8)

    # Convert the image to grayscale and downscale it
    r_data = []
    labels = []
    for i in range(num_images):
        g_img = color.rgb2gray(data[i][0])
        # print(g_img.shape)
        ds_img = rescale(g_img, 0.37)
        ds_img = ds_img.flatten()
        ds_img = [*ds_img, 1]
        # print(ds_img.shape)
        r_data.append(ds_img)
        labels.append(data[i][2] * 0.37)

    # Split the training and test data
    train_data = np.array(r_data[:num_train_data])
    test_data = np.array(r_data[num_train_data:])
    train_label = np.array(labels[:num_train_data])
    test_label = labels[num_train_data:]

    #split train data into train and validation data
    train_data, val_data, train_label, val_label = train_test_split(train_data, train_label, test_size=0.2, random_state=42)


    #converting everything to numpy array
    train_data = np.array(train_data)
    train_label = np.array(train_label)
    val_data = np.array(val_data)
    val_label = np.array(val_label)
    test_data = np.array(test_data)
    test_label = np.array(test_label)

    # print(data.files)
    # Reshape images and save in numpy arrays
    train1_data = train_data
    train1_labels = train_label
    # change data type to float of train1_data
    train1_data = train1_data.astype(float)
    # change datatype of train1_labels to int

    train1_labels = train1_labels.astype(int)

    # validation data
    valid1_data = val_data
    valid1_labels = val_label
    # change datatype of valid1_data to float
    valid1_data = valid1_data.astype(float)
    # change datatype of valid1_labels to int
    valid1_labels = valid1_labels.astype(int)

    test1_data = test_data
    test1_labels = test_label
    # change data type to float of test1_data
    test1_data = test1_data.astype(float)
    # change datatype of test1_labels to int
    test1_labels = test1_labels.astype(int)

    # feature scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(train1_data, train1_labels)
    X_test = sc.fit_transform(test1_data, test1_labels)
    X_valid = sc.fit_transform(valid1_data, valid1_labels)


    #print the shape of Y_train

    return X_train, Y_train, X_test, Y_test, X_valid, Y_valid



    # # save np.load
    # np_load_old = np.load
    #
    # # modify the default parameters of np.load
    # np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)
    #
    # # call load_data with allow_pickle implicitly set to true
    # # (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
    # data = np.load(data_path )
    #
    # # restore np.load for future normal usage
    # np.load = np_load_old
    #
    # # data = pickle.load(data_path, encoding='iso-8859-1')
    # # print(data.files)
    # # Reshape images and save in numpy arrays
    # # train1_data = np.reshape(data['train_images'], (4708, 784))
    # # train1_labels = np.reshape(data['train_labels'], (4708))
    #
    # #print
    # print(data.files)












def grid_search_poly(X_train, Y_train,  X_valid, Y_valid ):

    #create csv file for the results name: results_poly.csv
    with open('binary_svr/results_poly.csv', 'w') as f:
        f.write('C, degree, gamma, accuracy, precision, recall, f1\n')


    #SVM with C = 1 and degree = 2 and gamma = 0.1
    clf = SVC(kernel='poly', C=1, degree=2)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    #f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2

    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)



    #for testing
    c_range = [0.1, 1, 10]
    degree_range = [ 2 , 3]
    gamma_range = [0.1 ]

    # c_range = np.logspace(-2, 10, 13)
    # degree_range = [ 2, 3, 4, 5, 6 , 7, 8, 9, 10, 100, 1000, 10000]
    # gamma_range = np.logspace(-9, 3, 13)

    # c_range = [0.001,  1, 100]
    # degree_range = [ 2, 10, 100]
    # gamma_range = [ 0.01, 10, 1000]




    for C in c_range:
        for degree in degree_range:
            for gamma in gamma_range:
                clf = SVC(kernel='poly', C=C, degree=degree )
                clf.fit(X_train, Y_train)
                Y_pred = clf.predict(X_valid)
                # f1 score
                f1 = f1_score(Y_valid, Y_pred, average='micro')
                # accuracy
                accuracy = clf.score(X_valid, Y_valid)
                # precision
                precision = precision_score(Y_valid, Y_pred, average='micro')
                # recall
                recall = recall_score(Y_valid, Y_pred, average='micro')
                # write results to csv file
                with open('binary_svr/results_poly.csv', 'a') as f:
                    f.write('{}, {}, {}, {}, {}, {}, {}\n'.format(C, degree, gamma, accuracy, precision, recall, f1))

                if f1 > optimal_f1:
                    optimal_f1 = f1
                    optimal_C = C
                    optimal_degree = degree

                    optimal_classifier = clf
                    optimal_accuracy = accuracy



    optimal_hyperparameters = {'C': optimal_C, 'degree': optimal_degree}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



def grid_search_sigmoid(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_sigmoid.csv
    with open('binary_svr/results_sigmoid.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='sigmoid', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    c_range = [1]
    gamma_range = [0.1]

    # c_range = np.logspace(-2, 10, 13)
    # gamma_range = np.logspace(-9, 3, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [ 0.001, 0.01, 0.1, 1, 10, 100, 1000]

    # c_range = [0.001, 1, 100]
    #
    # gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='sigmoid', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('binary_svr/results_sigmoid.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))


            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_gaussian(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_gaussian.csv
    with open('binary_svr/results_gaussian.csv', 'w') as f:
        f.write('C, gamma, accuracy, precision, recall, f1\n')

    #SVM with C = 1 and gamma = 0.1
    clf = SVC(kernel='rbf', C=1, gamma=0.1)

    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1

    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    #for testing
    # c_range = [1]
    # gamma_range = [0.1]

    # c_range = np.logspace(-2, 10, 13)
    # gamma_range = np.logspace(-9, 3, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    c_range = [0.001, 1, 100]

    gamma_range = [ 0.01, 10, 1000]

    for C in c_range:
        for gamma in gamma_range:
            clf = SVC(kernel='rbf', C=C, gamma=gamma)
            clf.fit(X_train, Y_train)
            Y_pred = clf.predict(X_valid)
            # f1 score
            f1 = f1_score(Y_valid, Y_pred, average='micro')
            # accuracy
            accuracy = clf.score(X_valid, Y_valid)
            # precision
            precision = precision_score(Y_valid, Y_pred, average='micro')
            # recall
            recall = recall_score(Y_valid, Y_pred, average='micro')
            # write results to csv file
            with open('binary_svr/results_gaussian.csv', 'a') as f:
                f.write('{}, {}, {}, {}, {}, {}\n'.format(C, gamma, accuracy, precision, recall, f1))

            if f1 > optimal_f1:
                optimal_f1 = f1
                optimal_C = C
                optimal_gamma = gamma
                optimal_classifier = clf
                optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C, 'gamma': optimal_gamma}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics

def grid_search_linear(X_train, Y_train,  X_valid, Y_valid):
    #create csv file for the results name: results_linear.csv
    with open('binary_svr/results_linear.csv', 'w') as f:
        f.write('C, accuracy, precision, recall, f1\n')

    #SVM with C = 1
    clf = SVC(kernel='linear', C=1)
    clf.fit(X_train, Y_train)
    Y_pred = clf.predict(X_valid)
    # f1 score
    f1 = f1_score(Y_valid, Y_pred, average='micro')
    optimal_f1 = f1
    optimal_C = 1
    optimal_degree = 2
    optimal_gamma = 0.1
    optimal_classifier = clf
    optimal_accuracy = clf.score(X_valid, Y_valid)

    # for testing
    # c_range = [1]

    # c_range = np.logspace(-2, 10, 13)
    # c_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
    #
    # gamma_range = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
    c_range = [0.001, 1, 100]

    gamma_range = [ 0.01, 10, 1000]
    for C in c_range:
        clf = SVC(kernel='linear', C=C)
        clf.fit(X_train, Y_train)
        Y_pred = clf.predict(X_valid)
        # f1 score
        f1 = f1_score(Y_valid, Y_pred, average='micro')
        # accuracy

        accuracy = clf.score(X_valid, Y_valid)

        # precision
        precision = precision_score(Y_valid, Y_pred, average='micro')
        # recall
        recall = recall_score(Y_valid, Y_pred, average='micro')
        # write results to csv file
        with open('binary_svr/results_linear.csv', 'a') as f:
            f.write('{}, {}, {}, {}, {}\n'.format(C, accuracy, precision, recall, f1))

        if f1 > optimal_f1:
            optimal_f1 = f1
            optimal_C = C
            optimal_classifier = clf
            optimal_accuracy = accuracy

    optimal_hyperparameters = {'C': optimal_C}
    optimal_metrics = {'accuracy': optimal_accuracy, 'f1': optimal_f1}
    return optimal_classifier, optimal_hyperparameters, optimal_metrics



#this function plots colormap where x and y coordinates of colourmap are the hyperparameters and colourmap is the accuracy of the classifier
def plot_hyperparameter_accuracy(csv_file, midpoint, title, name , hyperparameters_to_plot= ['C', ' gamma']):
    print()
    hyperparameter_1 = hyperparameters_to_plot[0]
    hyperparameter_2 = hyperparameters_to_plot[1]

    # Draw heatmap of the validation accuracy as a function of gamma and C
    #   read csv file

    #plotting the validation accuracy vs hyperparameters colormap of linear kernel
    #   read csv file and store value of accuracy in a 2d array
    # one axis of array is gamma and other is C
    #reading csv file
    accuracy_list = []
    c_list = []
    gamma_list = []
    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            #save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row[hyperparameter_1])
            gamma_list.append(row[hyperparameter_2])
            line_count += 1
    #print the list c_list ands gamma_list
    print(c_list)
    print(gamma_list)


    #convert the list to numpy array
    #convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]
    gamma_list = [float(i) for i in gamma_list]
    #convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)
    gamma_array = np.array(gamma_list)
    #reshape the array to 2d array

    #define c_len to be no. of unique values of c_array
    c_len = len(np.unique(c_array))
    #define gamma_len to be no. of unique values of gamma_array
    gamma_len = len(np.unique(gamma_array))

    #c_array to have unique values of c_array
    c_array = np.unique(c_array)
    #gamma_array to have unique values of gamma_array
    gamma_array = np.unique(gamma_array)
    #c_array to be sorted in ascending order
    c_array = np.sort(c_array)
    #gamma_array to be sorted in ascending order
    gamma_array = np.sort(gamma_array)

    reshaped_array = np.reshape(accuracy_array, (len(c_array), len(gamma_array)))
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(left=0.2, right=0.95, bottom=0.15, top=0.95)
    plt.imshow(
        reshaped_array,
        interpolation="nearest",
        cmap=plt.cm.hot,
        norm=MidpointNormalize(vmin=0.2, midpoint=midpoint, vmax=1)
    )
    plt.xlabel(hyperparameter_2)
    plt.ylabel(hyperparameter_1)
    plt.colorbar()
    plt.xticks(np.arange(len(gamma_array)), gamma_array, rotation=45)
    plt.yticks(np.arange(len(c_array)), c_array)
    plt.title("Validation accuracy " + title + " "+ name)
    plt.show()


def extract_accuracy(csv_file):
    accuracy_list = []
    c_list = []

    with open(csv_file, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        line_count = 0
        # the columns that we want to read are 'C', 'gamma', 'accuracy'
        # the first row is the header
        # the rest of the rows are the data
        for row in reader:
            if line_count == 0:
                # this is the header
                # print(f'Column names are {", ".join(row)}')
                line_count += 1
            # save the accuracy values for all rows in a list
            accuracy_list.append(row[' accuracy'])
            c_list.append(row['C'])

            line_count += 1

    # convert list of strings to list of floats
    accuracy_list = [float(i) for i in accuracy_list]
    c_list = [float(i) for i in c_list]

    # convert list of floats to numpy array
    accuracy_array = np.array(accuracy_list)
    c_array = np.array(c_list)



    # c_array to have unique values of c_array
    c_array = np.unique(c_array)

    # c_array to be sorted in ascending order
    c_array = np.sort(c_array)

    return accuracy_array, c_array




#plotting the results
def plot_results():
    name = "PneumoniaMNIST"

    # plotting the colormap of sigmoide kernel
    plot_hyperparameter_accuracy('binary_svr/results_sigmoid.csv' , 0.72 , 'Sigmoid Kernel', name)

    #plotting the colormap of gaussian kernel
    plot_hyperparameter_accuracy('binary_svr/results_gaussian.csv' , 0.74 , 'Gaussian Kernel', name)

    #plotting the colormap of polynomial kernel
    plot_hyperparameter_accuracy('binary_svr/results_poly_best.csv' , 0.7 , 'Polynomial Kernel', name , ['C', ' degree'])

    #plotting the graph of linear kernel
    accuracy_array, c_array = extract_accuracy('binary_svr/results_linear.csv')
    #plot the accuracy vs c graph
    plt.plot(c_array, accuracy_array)
    plt.xlabel('C')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs C for Linear Kernel ' + name)
    #show
    plt.show()

    # #plotting the graph of polynomial kernel
    #
    # accuracy_array, c_array = extract_accuracy('binary_svr/results_poly.csv')
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('degree')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs degree for Linear Kernel ' + name)
    # # show
    # plt.show()




#plotting AUC curve of the best classifier
def plot_AUC(classifier, X_test, Y_test):
    name = "PneumoniaMNIST"
    #get the predicted labels
    Y_pred = classifier.predict(X_test)
    #get the predicted probabilities
    Y_prob = classifier.predict_proba(X_test)
    #get the AUC
    fpr, tpr, thresholds = roc_curve(Y_test, Y_prob[:,1])
    #plot the AUC
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for ' + name)
    #show
    plt.show()


def SVM(path):

    #getting the data
    X_train, Y_train, X_test, Y_test, X_valid, Y_valid = data_preprocessing(path)
    # SVM
    # clf = SVC(kernel='linear', C=1)
    # clf.fit(X_train, Y_train)
    # Y_pred = clf.predict(X_test)
    # # print(classification_report(Y_test, Y_pred))
    # print(y_pred)
    # print(test1_labels)
    # print(y_pred == test1_labels)
    # print(np.sum(y_pred == test1_labels))
    # print(np.sum(y_pred == test1_labels) / 624)
    # print(np.sum(y_pred == test1_labels) / 624 * 100)

    # print("Accuracy of SVM classifier on test set: {:.2f}%".format(np.sum(Y_pred == Y_test) / 624 * 100))

    # print(clf.score(X_test, test1_labels))
    # print(clf.score(X_train, train1_labels))



    # #calling grid search poly
    # print("Grid Search for Polynomial Kernel")
    # optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_poly(X_train, Y_train, X_valid, Y_valid)
    # #printing the results
    # print("Optimal C: {}".format(optimal_hyperparameters['C']))
    # print("Optimal degree: {}".format(optimal_hyperparameters['degree']))
    # print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    # print("Optimal f1: {}".format(optimal_metrics['f1']))
    # print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))


    # calling grid search sigmoid
    print("Grid Search for Sigmoid Kernel")
    optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_sigmoid(X_train, Y_train, X_valid, Y_valid)
    #printing the results
    print("Optimal C: {}".format(optimal_hyperparameters['C']))
    print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    print("Optimal f1: {}".format(optimal_metrics['f1']))
    print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))

    # # calling grid search gaussian
    # print("Grid Search for Gaussian Kernel")
    # optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_gaussian(X_train, Y_train, X_valid, Y_valid)
    # #printing the results
    # print("Optimal C: {}".format(optimal_hyperparameters['C']))
    # print("Optimal gamma: {}".format(optimal_hyperparameters['gamma']))
    # print("Optimal f1: {}".format(optimal_metrics['f1']))
    # print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))
    # 
    # # # calling grid search linear
    # print("Grid Search for Linear Kernel")
    # optimal_classifier, optimal_hyperparameters, optimal_metrics = grid_search_linear(X_train, Y_train, X_valid, Y_valid)
    # #printing the results
    # print("Optimal C: {}".format(optimal_hyperparameters['C']))
    # print("Optimal f1: {}".format(optimal_metrics['f1']))
    # print("Optimal accuracy: {}".format(optimal_metrics['accuracy']))


    # plotting
    # plotting the colormap of polynomial kernel
    # plot_hyperparameter_accuracy('binary_svr/results_poly.csv', 0.9, 'Polynomial Kernel', "PneumoniaMNIST", ['C', ' degree'])

    # # plotting the graph of linear kernel
    # accuracy_array, c_array = extract_accuracy('binary_svr/results_linear.csv')
    #
    # #print accuracy_array and c_array
    # print(accuracy_array)
    # print(c_array)
    #
    # # plot the accuracy vs c graph
    # plt.plot(c_array, accuracy_array)
    # plt.xlabel('C')
    # plt.ylabel('Accuracy')
    # plt.title('Accuracy vs C for Linear Kernel ' + "PneumoniaMNIST")
    # # show
    # plt.show()















#main function
import os


def main():

    #if directory binary_svr does not exist, create it
    # if not os.path.exists('binary_svr'):
    #     os.makedirs('binary_svr')



    data_preprocessing()


    # SVM()





# if main
if __name__ == '__main__':
    main()




In [None]:
#MLP 
from google.colab import drive
drive.mount('/content/drive')

"""For Dataset-2 we use output activation = Softmax, Loss = Categorical Cross-entropy"""

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

def process_data(path):

    dataset = np.load(path)
    train_x, train_y = dataset['train_images']/255.0, dataset['train_labels']
    val_x, val_y = dataset['val_images']/255.0, dataset['val_labels']
    test_x, test_y = dataset['test_images']/255.0, dataset['test_labels']
    temp = test_x

    train_x = train_x.reshape((train_x.shape[0], train_x.shape[1]*train_x.shape[2]*train_x.shape[3]))
    val_x = val_x.reshape((val_x.shape[0], val_x.shape[1]*val_x.shape[2]*val_x.shape[3]))
    test_x = test_x.reshape((test_x.shape[0], test_x.shape[1]*test_x.shape[2]*test_x.shape[3]))

    return  train_x, tf.reshape(tf.one_hot(train_y, depth=8), [train_x.shape[0], 8]), \
    test_x, tf.reshape(tf.one_hot(test_y, depth=8), [test_y.shape[0], 8]), \
    val_x, tf.reshape(tf.one_hot(val_y, depth=8), [val_y.shape[0], 8]), temp

def MLP_train(x, y, val_x, val_y):
    model = tf.keras.Sequential([tf.keras.layers.Dense(80000, activation='relu'),   
                                 tf.keras.layers.Dense(8, activation='softmax')])
    model.compile(optimizer='rmsprop',
                  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    history = model.fit(x, y, epochs=40, batch_size=32, callbacks=[callback], validation_data=(val_x, val_y))
    return history, model

def plot_accuracy(history):
    plt.plot(history.history['accuracy'])
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.grid()
    plt.show()
    return

def plot_example_predictions(test_images, test_inputs, test_labels, model):
    rows = 2
    cols = 4
    fig, axes = plt.subplots(nrows=rows, ncols=cols)
    for i in range(rows*cols):
        temp = test_inputs[i]
        temp = temp[np.newaxis is None,:]
        pred = model.predict(temp)
        plt.subplot(int(str(rows) + str(cols) + str(i+1)))
        plt.imshow((test_images[i, :, :]))
        title = str("\nPredicted \nclass =" + str(np.argmax(pred))+",\nTrue class = " + str(np.argmax(test_labels[i])))
        plt.title(title)
    fig.tight_layout()
    plt.savefig("d2_test_predictions.png")
    plt.show()
    return

def main():

    path = '/content/drive/MyDrive/PRNN/bloodmnist.npz'
    train_x, train_y, test_x, test_y, val_x, val_y, test_images = process_data(path)
    
    history, trained_model = MLP_train(train_x, train_y, val_x, val_y)
    test_loss, test_acc = trained_model.evaluate(test_x,  test_y, verbose=2)
    print("Test Accuracy = ", test_acc)

    plot_accuracy(history)
    plot_example_predictions(test_images, test_x, test_y, trained_model)


if __name__ == "__main__":
    main()



import numpy as np
from tqdm import tqdm
from numba import jit
import matplotlib.pyplot as plt
from skimage.transform import downscale_local_mean, rescale


def process_data(path):

    dataset = np.load(path)
    train_x, train_y = dataset['train_images']/255.0, dataset['train_labels']
    val_x, val_y = dataset['val_images']/255.0, dataset['val_labels']
    test_x, test_y = dataset['test_images']/255.0, dataset['test_labels']

    scale = 4

    train_x_ = np.zeros((train_x.shape[0], train_x.shape[1]**2//scale**2))
    val_x_ = np.zeros((val_x.shape[0], val_x.shape[1]**2//scale**2))
    test_x_ = np.zeros((test_x.shape[0], test_x.shape[1]**2//scale**2))

    for i in range(train_x.shape[0]):
        temp = downscale_local_mean(train_x[i, :, :], (scale, scale))
        train_x_[i, :] = temp.reshape(train_x.shape[1]**2//scale**2)
        #train_x_[i, :] = train_x_[i, :]/np.max(train_x_[i, :])

    for i in range(val_x.shape[0]):
        temp = downscale_local_mean(val_x[i, :, :], (scale, scale))
        val_x_[i, :] = temp.reshape(val_x.shape[1]**2//scale**2)
        #val_x_[i, :] = val_x_[i, :]/np.max(val_x_[i, :])

    for i in range(test_x.shape[0]):
        temp = downscale_local_mean(test_x[i, :, :], (scale, scale))
        test_x_[i, :] = temp.reshape(test_x.shape[1]**2//scale**2)
        #test_x_[i, :] = test_x_[i, :]/np.max(test_x_[i, :])

    return train_x_, train_y, test_x_, test_y, val_x_, val_y, test_x


@jit(nopython=True)
def ReLU(x):
    return x * (x > 0)


@jit(nopython=True)
def dReLU(x):
    return 1.0 * (x > 0)


@jit(nopython=True)
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))


def forward_pass(h0, theta):
    h0 = h0.reshape((h0.shape[0], 1))
    W1, b1, W2, b2 = theta

    a1 = b1 + np.matmul(W1, h0)
    h1 = ReLU(a1)
    a2 = b2 + np.matmul(W2, h1)
    y_hat = sigmoid(a2)
    outputs = [h1, a1, a2, y_hat]
    return outputs


def backward_pass(h0, h1, a1, a2, y_hat, y, W2):
    grad_a2_L = y_hat - y
    grad_W2_L = np.outer(grad_a2_L, h1.T)
    grad_b2_L = grad_a2_L
    grad_h1_L = np.matmul(W2.T, grad_a2_L)
    grad_a1_L = grad_h1_L * dReLU(a1)
    grad_W1_L = np.outer(grad_a1_L, h0.T)
    grad_b1_L = grad_a1_L

    gradients = [grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L]
    return gradients


def grad_descent(train_x, train_y, parameters):
    W1, b1, W2, b2 = parameters
    t = 0
    max_iters = 100
    lr = 1e-4
    no_correct = 0
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    f1 = 0.0
    acc = 0.0

    for i in range(train_x.shape[0]):
        h0 = train_x[i]
        y = train_y[i]

        while(t < max_iters):
            h1, a1, a2, y_hat = forward_pass(h0, parameters)
            grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L = backward_pass(h0, h1, a1, a2, y_hat, y, W2)
            W1 = W1 - lr*grad_W1_L
            W2 = W2 - lr*grad_W2_L
            b2 = b2 - lr*grad_b2_L
            b1 = b1 - lr*grad_b1_L
            t += 1
        
        if(np.round(y_hat) == 1):
            if(train_y[i] == 1):
                tp += 1
            else:
                fp += 1
        else:
            if(train_y[i] == 0):
                tn += 1
            else:
                fn += 1

    acc = (tp+tn)/(tp+tn+fp+fn)
    f1 = tp/(tp+0.5*(fp+fn))
    #print("\nTraining Accuracy = ", acc)
    return W1, b1, W2, b2, acc, f1

def train_loop(train_x, train_y, val_x, val_y, epochs):

    h0 = train_x[0]
    y = train_y[0]

    input_dim = h0.shape[0]
    hidden_layer_dim = 2*input_dim
    output_dim = y.shape[0]

    accuracy = []

    W1 = np.random.rand(hidden_layer_dim, input_dim)*np.random.normal(size=1)
    b1 = np.random.rand(hidden_layer_dim, 1)
    W2 = np.random.rand(output_dim, hidden_layer_dim)
    b2 = np.random.rand(output_dim, 1)
    for i in tqdm(range(epochs)):
        #if(i % 2==0):
        W1, b1, W2, b2, acc, f1 = grad_descent(train_x[i:i-1+train_x.shape[0]//epochs, :], train_y[i:i-1+train_x.shape[0]//epochs, :], [W1, b1, W2, b2])
        #else:
        #    W1, b1, W2, b2, acc, f1 = grad_descent(val_x[i:i-1+train_x.shape[0]//epochs, :], val_y[i:i-1+train_x.shape[0]//epochs, :], [W1, b1, W2, b2])
        accuracy.append(acc)
    
    #print("F1 score = ", f1)
    plot_accuracy(accuracy)
    return W1, b1, W2, b2

def evaluate(test_x, test_y, parameters, name_data):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    f1 = 0.0
    acc = 0.0

    for i in range(test_x.shape[0]):
        _, _, _, y_hat = forward_pass(test_x[i], parameters)
        if(np.round(y_hat) == 1):
            if(test_y[i] == 1):
                tp += 1
            else:
                fp += 1
        else:
            if(test_y[i] == 0):
                tn += 1
            else:
                fn += 1

    acc = (tp+tn)/(tp+tn+fp+fn)
    f1 = tp/(tp+0.5*(fp+fn))
    print("\n\n", name_data, "accuracy = ", acc)
    print("\n", name_data, "F1-Score = ", f1)
    return

def plot_accuracy(acc):
    plt.plot(acc[:-5])
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.ylim(0.0,1.0)
    plt.grid()
    plt.title("Training Accuracy")
    plt.show()
    return

def main():
    path = '/content//drive/MyDrive/PRNN/pneumoniamnist.npz'
    train_x, train_y, test_x, test_y, val_x, val_y, test_images = process_data(path)
    W1, b1, W2, b2 = train_loop(train_x, train_y, val_x, val_y, 200)
    evaluate(test_x, test_y, [W1, b1, W2, b2], 'Test')
    return


if __name__ == '__main__':
    main()

"""Backpropagation of single layer MLP with L2 regularization"""

import numpy as np
from tqdm import tqdm
from numba import jit
import matplotlib.pyplot as plt
from skimage.transform import downscale_local_mean, rescale


def process_data(path):

    dataset = np.load(path)
    train_x, train_y = dataset['train_images']/255.0, dataset['train_labels']
    val_x, val_y = dataset['val_images']/255.0, dataset['val_labels']
    test_x, test_y = dataset['test_images']/255.0, dataset['test_labels']

    scale = 4

    train_x_ = np.zeros((train_x.shape[0], train_x.shape[1]**2//scale**2))
    val_x_ = np.zeros((val_x.shape[0], val_x.shape[1]**2//scale**2))
    test_x_ = np.zeros((test_x.shape[0], test_x.shape[1]**2//scale**2))

    for i in range(train_x.shape[0]):
        temp = downscale_local_mean(train_x[i, :, :], (scale, scale))
        train_x_[i, :] = temp.reshape(train_x.shape[1]**2//scale**2)
        #train_x_[i, :] = train_x_[i, :]/np.max(train_x_[i, :])

    for i in range(val_x.shape[0]):
        temp = downscale_local_mean(val_x[i, :, :], (scale, scale))
        val_x_[i, :] = temp.reshape(val_x.shape[1]**2//scale**2)
        #val_x_[i, :] = val_x_[i, :]/np.max(val_x_[i, :])

    for i in range(test_x.shape[0]):
        temp = downscale_local_mean(test_x[i, :, :], (scale, scale))
        test_x_[i, :] = temp.reshape(test_x.shape[1]**2//scale**2)
        #test_x_[i, :] = test_x_[i, :]/np.max(test_x_[i, :])

    return train_x_, train_y, test_x_, test_y, val_x_, val_y, test_x


@jit(nopython=True)
def ReLU(x):
    return x * (x > 0)


@jit(nopython=True)
def dReLU(x):
    return 1.0 * (x > 0)


@jit(nopython=True)
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))


def forward_pass(h0, theta):
    h0 = h0.reshape((h0.shape[0], 1))
    W1, b1, W2, b2 = theta

    a1 = b1 + np.matmul(W1, h0)
    h1 = ReLU(a1)
    a2 = b2 + np.matmul(W2, h1)
    y_hat = sigmoid(a2)
    outputs = [h1, a1, a2, y_hat]
    return outputs


def backward_pass(h0, h1, a1, a2, y_hat, y, W2):
    grad_a2_L = y_hat - y
    grad_W2_L = np.outer(grad_a2_L, h1.T)
    grad_b2_L = grad_a2_L
    grad_h1_L = np.matmul(W2.T, grad_a2_L)
    grad_a1_L = grad_h1_L * dReLU(a1)
    grad_W1_L = np.outer(grad_a1_L, h0.T)
    grad_b1_L = grad_a1_L

    gradients = [grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L]
    return gradients


def grad_descent(train_x, train_y, parameters, lambda_):
    W1, b1, W2, b2 = parameters
    t = 0
    max_iters = 100
    lr = 1e-4
    no_correct = 0
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    f1 = 0.0
    acc = 0.0

    for i in range(train_x.shape[0]):
        h0 = train_x[i]
        y = train_y[i]

        while(t < max_iters):
            h1, a1, a2, y_hat = forward_pass(h0, parameters)
            grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L = backward_pass(h0, h1, a1, a2, y_hat, y, W2)
            W1 = (1-lr*lambda_)*W1 - lr*grad_W1_L
            W2 = (1-lr*lambda_)*W2 - lr*grad_W2_L
            b2 = (1-lr*lambda_)*b2 - lr*grad_b2_L
            b1 = (1-lr*lambda_)*b1 - lr*grad_b1_L
            t += 1
        
        if(np.round(y_hat) == 1):
            if(train_y[i] == 1):
                tp += 1
            else:
                fp += 1
        else:
            if(train_y[i] == 0):
                tn += 1
            else:
                fn += 1

    acc = (tp+tn)/(tp+tn+fp+fn)
    f1 = tp/(tp+0.5*(fp+fn))
    #print("\nTraining Accuracy = ", acc)
    return W1, b1, W2, b2, acc, f1

def train_loop(train_x, train_y, val_x, val_y, epochs):

    h0 = train_x[0]
    y = train_y[0]

    input_dim = h0.shape[0]
    hidden_layer_dim = 2*input_dim
    output_dim = y.shape[0]

    accuracy = []

    W1 = np.random.rand(hidden_layer_dim, input_dim)*np.random.normal(100, 5)
    b1 = np.random.rand(hidden_layer_dim, 1)*np.random.normal(10, 2)
    W2 = np.random.rand(output_dim, hidden_layer_dim)*np.random.normal(-10, 2)
    b2 = np.random.rand(output_dim, 1)*np.random.normal(size=1)
    for i in tqdm(range(epochs)):
        W1, b1, W2, b2, acc, f1 = grad_descent(train_x[i:i-1+train_x.shape[0]//epochs, :], train_y[i:i-1+train_x.shape[0]//epochs, :], [W1, b1, W2, b2], 0.1)
        accuracy.append(acc)
    
    plot_accuracy(accuracy)
    return W1, b1, W2, b2

def evaluate(test_x, test_y, parameters, name_data):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    f1 = 0.0
    acc = 0.0

    for i in range(test_x.shape[0]):
        _, _, _, y_hat = forward_pass(test_x[i], parameters)
        if(np.round(y_hat) == 1):
            if(test_y[i] == 1):
                tp += 1
            else:
                fp += 1
        else:
            if(test_y[i] == 0):
                tn += 1
            else:
                fn += 1

    acc = (tp+tn)/(tp+tn+fp+fn)
    f1 = tp/(tp+0.5*(fp+fn))
    print("\n\n", name_data, "accuracy = ", acc)
    print("\n", name_data, "F1-Score = ", f1)
    return

def plot_accuracy(acc):
    plt.plot(acc[:-5])
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.ylim(0.0,1.0)
    plt.grid()
    plt.title("Training Accuracy")
    plt.show()
    return

def main():
    path = '/content//drive/MyDrive/PRNN/pneumoniamnist.npz'
    train_x, train_y, test_x, test_y, val_x, val_y, test_images = process_data(path)
    W1, b1, W2, b2 = train_loop(train_x, train_y, val_x, val_y, 200)
    evaluate(test_x, test_y, [W1, b1, W2, b2], 'Test')
    return


if __name__ == '__main__':
    main()

"""Backpropagation of single layer MLP with L1 regularization"""

import numpy as np
from tqdm import tqdm
from numba import jit
import matplotlib.pyplot as plt
from skimage.transform import downscale_local_mean, rescale


def process_data(path):

    dataset = np.load(path)
    train_x, train_y = dataset['train_images']/255.0, dataset['train_labels']
    val_x, val_y = dataset['val_images']/255.0, dataset['val_labels']
    test_x, test_y = dataset['test_images']/255.0, dataset['test_labels']

    scale = 4

    train_x_ = np.zeros((train_x.shape[0], train_x.shape[1]**2//scale**2))
    val_x_ = np.zeros((val_x.shape[0], val_x.shape[1]**2//scale**2))
    test_x_ = np.zeros((test_x.shape[0], test_x.shape[1]**2//scale**2))

    for i in range(train_x.shape[0]):
        temp = downscale_local_mean(train_x[i, :, :], (scale, scale))
        train_x_[i, :] = temp.reshape(train_x.shape[1]**2//scale**2)
        #train_x_[i, :] = train_x_[i, :]/np.max(train_x_[i, :])

    for i in range(val_x.shape[0]):
        temp = downscale_local_mean(val_x[i, :, :], (scale, scale))
        val_x_[i, :] = temp.reshape(val_x.shape[1]**2//scale**2)
        #val_x_[i, :] = val_x_[i, :]/np.max(val_x_[i, :])

    for i in range(test_x.shape[0]):
        temp = downscale_local_mean(test_x[i, :, :], (scale, scale))
        test_x_[i, :] = temp.reshape(test_x.shape[1]**2//scale**2)
        #test_x_[i, :] = test_x_[i, :]/np.max(test_x_[i, :])

    return train_x_, train_y, test_x_, test_y, val_x_, val_y, test_x


@jit(nopython=True)
def ReLU(x):
    return x * (x > 0)


@jit(nopython=True)
def dReLU(x):
    return 1.0 * (x > 0)


@jit(nopython=True)
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))


def forward_pass(h0, theta):
    h0 = h0.reshape((h0.shape[0], 1))
    W1, b1, W2, b2 = theta

    a1 = b1 + np.matmul(W1, h0)
    h1 = ReLU(a1)
    a2 = b2 + np.matmul(W2, h1)
    y_hat = sigmoid(a2)
    outputs = [h1, a1, a2, y_hat]
    return outputs


def backward_pass(h0, h1, a1, a2, y_hat, y, W2):
    grad_a2_L = y_hat - y
    grad_W2_L = np.outer(grad_a2_L, h1.T)
    grad_b2_L = grad_a2_L
    grad_h1_L = np.matmul(W2.T, grad_a2_L)
    grad_a1_L = grad_h1_L * dReLU(a1)
    grad_W1_L = np.outer(grad_a1_L, h0.T)
    grad_b1_L = grad_a1_L

    gradients = [grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L]
    return gradients


def grad_descent(train_x, train_y, parameters, lambda_):
    W1, b1, W2, b2 = parameters
    t = 0
    max_iters = 100
    lr = 1e-4
    no_correct = 0
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    f1 = 0.0
    acc = 0.0

    for i in range(train_x.shape[0]):
        h0 = train_x[i]
        y = train_y[i]

        while(t < max_iters):
            h1, a1, a2, y_hat = forward_pass(h0, parameters)
            grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L = backward_pass(h0, h1, a1, a2, y_hat, y, W2)
            W1 = W1 + (-lr*lambda_)*np.sign(W1) - lr*grad_W1_L
            W2 = W2 + (-lr*lambda_)*np.sign(W2) - lr*grad_W2_L
            b2 = b2 + (-lr*lambda_)*np.sign(b2) - lr*grad_b2_L
            b1 = b1 + (-lr*lambda_)*np.sign(b1) - lr*grad_b1_L
            t += 1
            t += 1
        
        if(np.round(y_hat) == 1):
            if(train_y[i] == 1):
                tp += 1
            else:
                fp += 1
        else:
            if(train_y[i] == 0):
                tn += 1
            else:
                fn += 1

    acc = (tp+tn)/(tp+tn+fp+fn)
    f1 = tp/(tp+0.5*(fp+fn))
    #print("\nTraining Accuracy = ", acc)
    return W1, b1, W2, b2, acc, f1

def train_loop(train_x, train_y, val_x, val_y, epochs):

    h0 = train_x[0]
    y = train_y[0]

    input_dim = h0.shape[0]
    hidden_layer_dim = 2*input_dim
    output_dim = y.shape[0]

    accuracy = []

    W1 = np.random.rand(hidden_layer_dim, input_dim)*np.random.normal(100, 5)
    b1 = np.random.rand(hidden_layer_dim, 1)*np.random.normal(10, 2)
    W2 = np.random.rand(output_dim, hidden_layer_dim)*np.random.normal(-10, 2)
    b2 = np.random.rand(output_dim, 1)*np.random.normal(size=1)
    for i in tqdm(range(epochs)):
        W1, b1, W2, b2, acc, f1 = grad_descent(train_x[i:i-1+train_x.shape[0]//epochs, :], train_y[i:i-1+train_x.shape[0]//epochs, :], [W1, b1, W2, b2], 0.1)
        accuracy.append(acc)
    
    plot_accuracy(accuracy)
    return W1, b1, W2, b2

def evaluate(test_x, test_y, parameters, name_data):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    f1 = 0.0
    acc = 0.0

    for i in range(test_x.shape[0]):
        _, _, _, y_hat = forward_pass(test_x[i], parameters)
        if(np.round(y_hat) == 1):
            if(test_y[i] == 1):
                tp += 1
            else:
                fp += 1
        else:
            if(test_y[i] == 0):
                tn += 1
            else:
                fn += 1

    acc = (tp+tn)/(tp+tn+fp+fn)
    f1 = tp/(tp+0.5*(fp+fn))
    print("\n\n", name_data, "accuracy = ", acc)
    print("\n", name_data, "F1-Score = ", f1)
    return

def plot_accuracy(acc):
    plt.plot(acc[:-5])
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.grid()
    plt.ylim(0.0,1.0)
    plt.title("Training Accuracy")
    plt.show()
    return

def main():
    path = '/content//drive/MyDrive/PRNN/pneumoniamnist.npz'
    train_x, train_y, test_x, test_y, val_x, val_y, test_images = process_data(path)
    W1, b1, W2, b2 = train_loop(train_x, train_y, val_x, val_y, 200)
    evaluate(test_x, test_y, [W1, b1, W2, b2], 'Test')
    return


if __name__ == '__main__':
    main()

"""Backpropagation of single layer MLP with Dropout"""

import numpy as np
from tqdm import tqdm
from numba import jit
from skimage.transform import downscale_local_mean, rescale


def process_data(path):

    dataset = np.load(path)
    train_x, train_y = dataset['train_images']/255.0, dataset['train_labels']
    val_x, val_y = dataset['val_images']/255.0, dataset['val_labels']
    test_x, test_y = dataset['test_images']/255.0, dataset['test_labels']

    scale = 4

    train_x_ = np.zeros((train_x.shape[0], train_x.shape[1]**2//scale**2))
    val_x_ = np.zeros((val_x.shape[0], val_x.shape[1]**2//scale**2))
    test_x_ = np.zeros((test_x.shape[0], test_x.shape[1]**2//scale**2))

    for i in range(train_x.shape[0]):
        temp = downscale_local_mean(train_x[i, :, :], (scale, scale))
        train_x_[i, :] = temp.reshape(train_x.shape[1]**2//scale**2)
        #train_x_[i, :] = train_x_[i, :]/np.max(train_x_[i, :])

    for i in range(val_x.shape[0]):
        temp = downscale_local_mean(val_x[i, :, :], (scale, scale))
        val_x_[i, :] = temp.reshape(val_x.shape[1]**2//scale**2)
        #val_x_[i, :] = val_x_[i, :]/np.max(val_x_[i, :])

    for i in range(test_x.shape[0]):
        temp = downscale_local_mean(test_x[i, :, :], (scale, scale))
        test_x_[i, :] = temp.reshape(test_x.shape[1]**2//scale**2)
        #test_x_[i, :] = test_x_[i, :]/np.max(test_x_[i, :])

    return train_x_, train_y, test_x_, test_y, val_x_, val_y, test_x


@jit(nopython=True)
def ReLU(x):
    return x * (x > 0)


@jit(nopython=True)
def dReLU(x):
    return 1.0 * (x > 0)


@jit(nopython=True)
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))


def forward_pass(h0, theta):
    p = 0.9
    h0 = h0.reshape((h0.shape[0], 1))
    W1, b1, W2, b2 = theta

    a1 = b1 + np.matmul(W1, h0)
    rand1 = np.random.rand(a1.shape[0], 1) < p
    a1 = a1 * rand1
    h1 = ReLU(a1)
    a2 = b2 + np.matmul(W2, h1)
    #print(a2.shape)
    #rand2 = np.random.rand(a2.shape[0], 1) < p
    #a2 = a2 * rand1
    #print(a2.shape)
    y_hat = sigmoid(a2)
    outputs = [h1, a1, a2, y_hat]
    return outputs


def backward_pass(h0, h1, a1, a2, y_hat, y, W2):
    grad_a2_L = y_hat - y
    grad_W2_L = np.outer(grad_a2_L, h1.T)
    grad_b2_L = grad_a2_L
    grad_h1_L = np.matmul(W2.T, grad_a2_L)
    grad_a1_L = grad_h1_L * dReLU(a1)
    grad_W1_L = np.outer(grad_a1_L, h0.T)
    grad_b1_L = grad_a1_L

    gradients = [grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L]
    return gradients


def grad_descent(train_x, train_y, parameters):
    W1, b1, W2, b2 = parameters
    t = 0
    max_iters = 100
    lr = 1e-2
    no_correct = 0

    for i in range(train_x.shape[0]):
        h0 = train_x[i]
        y = train_y[i]

        while(t < max_iters):
            h1, a1, a2, y_hat = forward_pass(h0, parameters)
            grad_W1_L, grad_W2_L, grad_b2_L, grad_b1_L = backward_pass(h0, h1, a1, a2, y_hat, y, W2)
            W1 = W1 - lr*grad_W1_L
            W2 = W2 - lr*grad_W2_L
            b2 = b2 - lr*grad_b2_L
            b1 = b1 - lr*grad_b1_L
            t += 1
        
        if(y_hat == train_y[i]):
            no_correct += 1
    acc = no_correct/train_x.shape[0]
    print("\nTraining Accuracy = ", acc)
    return W1, b1, W2, b2

def train_loop(train_x, train_y, val_x, val_y, epochs):

    h0 = train_x[0]
    y = train_y[0]

    input_dim = h0.shape[0]
    hidden_layer_dim = 2*input_dim
    output_dim = y.shape[0]

    W1 = np.random.rand(hidden_layer_dim, input_dim)
    b1 = np.random.rand(hidden_layer_dim, 1)
    W2 = np.random.rand(output_dim, hidden_layer_dim)
    b2 = np.random.rand(output_dim, 1)
    for i in tqdm(range(epochs)):
        if(i % 2==0):
            W1, b1, W2, b2 = grad_descent(train_x, train_y, [W1, b1, W2, b2])
        else:
            W1, b1, W2, b2 = grad_descent(val_x, val_y, [W1, b1, W2, b2])
    return W1, b1, W2, b2

def evaluate(test_x, test_y, parameters, name_data):
    no_correct = 0
    for i in range(test_x.shape[0]):
        _, _, _, y_hat = forward_pass(test_x[i], parameters)
        if(y_hat == test_y[i]):
            no_correct += 1
    acc = no_correct/test_x.shape[0]
    print("\n\n", name_data, "accuracy = ", acc)
    return


def main():
    path = '/content/drive/MyDrive/PRNN/pneumoniamnist.npz'
    train_x, train_y, test_x, test_y, val_x, val_y, test_images = process_data(path)
    W1, b1, W2, b2 = train_loop(train_x, train_y, val_x, val_y, epochs=100)
    evaluate(test_x, test_y, [W1, b1, W2, b2], 'Test')
    return


if __name__ == '__main__':
    main()

"""# Backpropagation with Dropout"""

import numpy as np
#import datasets.mnist.loader as mnist
import matplotlib.pylab as plt
from sklearn .preprocessing import OneHotEncoder
 
 
class ANN:
    def __init__(self, layers_size,p):
        self.layers_size = layers_size
        #print(layers_size)
        self.parameters = {}
        self.L = len(self.layers_size)
        self.n = 0
        self.p=p
        self.costs = []
 
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
 
    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z))
        return expZ / expZ.sum(axis=0, keepdims=True)
 
    def initialize_parameters(self):
        np.random.seed(1)
        print(self.layers_size)
        for l in range(1, len(self.layers_size)):
            self.parameters["W" + str(l)] = np.random.randn(self.layers_size[l], self.layers_size[l - 1]) / np.sqrt(
                self.layers_size[l - 1])
            self.parameters["b" + str(l)] = np.zeros((self.layers_size[l], 1))
            #print("W" + str(l)+" shape ")
            #print(self.parameters["W" + str(l)].shape)
            #print("b" + str(l)+" shape ")
            #print(self.parameters["b" + str(l)].shape)
            
            
 
    def forward(self, X):
        store = {}
 
        A = X.T
        for l in range(self.L - 1):
            #print("W" + str(l + 1)+"shape is")
            #print(self.parameters["W" + str(l + 1)].shape)
            #print("A "+"shape is")
            #print(A.shape)
            #print("W" + str(l+1)+" dot A shape ")
            #print(self.parameters["W" + str(l + 1)].dot(A).shape)
            #print("b" + str(l+1)+" shape ")
            #print(self.parameters["b" + str(l+1)].shape)
            
            Z = self.parameters["W" + str(l + 1)].dot(A) + self.parameters["b" + str(l + 1)]
            #print("Z shape ")
            #print(Z.shape)
            A = self.sigmoid(Z)
            store["A" + str(l + 1)] = A
            store["W" + str(l + 1)] = self.parameters["W" + str(l + 1)]
            store["Z" + str(l + 1)] = Z
            #print("Stored A,W,Z for index "+str(l + 1))
 
        Z = self.parameters["W" + str(self.L)].dot(A) + self.parameters["b" + str(self.L)]
        A = self.softmax(Z)
        store["A" + str(self.L)] = A
        store["W" + str(self.L)] = self.parameters["W" + str(self.L)]
        store["Z" + str(self.L)] = Z
        #print("A shape is ")
        #print(A.shape)
        #print("Stored A,W,Z for index "+str(self.L))
        return A, store
    
    def forward2(self, X):
        store = {}
 
        A = X.T
        for l in range(self.L - 1):
            #print("W" + str(l + 1)+"shape is")
            #print(self.parameters["W" + str(l + 1)].shape)
            #print("A "+"shape is")
            #print(A.shape)
            #print("W" + str(l+1)+" dot A shape ")
            #print(self.parameters["W" + str(l + 1)].dot(A).shape)
            #print("b" + str(l+1)+" shape ")
            #print(self.parameters["b" + str(l+1)].shape)
            
            Z = self.parameters["W" + str(l + 1)].dot(A) + self.parameters["b" + str(l + 1)]
            #print("Z shape ")
            #print(Z.shape)
            A = self.sigmoid(Z)
            store["A" + str(l + 1)] = A
            store["W" + str(l + 1)] = self.parameters["W" + str(l + 1)]*np.ones(self.parameters["W" + str(l + 1)].shape)*self.p
            store["Z" + str(l + 1)] = Z
            #print("Stored A,W,Z for index "+str(l + 1))
 
        Z = self.parameters["W" + str(self.L)].dot(A) + self.parameters["b" + str(self.L)]
        A = self.softmax(Z)
        store["A" + str(self.L)] = A
        store["W" + str(self.L)] = self.parameters["W" + str(self.L)]
        store["Z" + str(self.L)] = Z
        #print("A shape is ")
        #print(A.shape)
        #print("Stored A,W,Z for index "+str(self.L))
        return A, store
 
    def sigmoid_derivative(self, Z):
        s = 1 / (1 + np.exp(-Z))
        return s * (1 - s)
    
 
    def backward(self, X, Y, lam,store):
 
        derivatives = {}
 
        store["A0"] = X.T
 
        A = store["A" + str(self.L)]
        dZ = A - Y.T
        #print("self.n ",self.n)
        dW = dZ.dot(store["A" + str(self.L - 1)].T) / self.n +(lam/self.n)*store['W'+str(self.L)]
        db = np.sum(dZ, axis=1, keepdims=True) / self.n
        dAPrev = store["W" + str(self.L)].T.dot(dZ)
        
        
        #print("Shape of dZ is",dZ.shape)
        #print("Shape of dW is",dW.shape)
        #print("Shape of db is",db.shape)
        #print("Shape of dAPrev is",dAPrev.shape)
 
        derivatives["dW" + str(self.L)] = dW
        derivatives["db" + str(self.L)] = db
        #print("shape of derivative of dW "+str(self.L)+" is ",derivatives["dW" + str(self.L)].shape)
        #print("shape of derivative of db "+str(self.L)+" is ",derivatives["db" + str(self.L)].shape)
        for l in range(self.L - 1, 0, -1):
            dZ = dAPrev * self.sigmoid_derivative(store["Z" + str(l)])
            dW = 1. / self.n * dZ.dot(store["A" + str(l - 1)].T)+(lam/self.n)*store['W'+str(l)]
            db = 1. / self.n * np.sum(dZ, axis=1, keepdims=True)
            if l > 1:
                dAPrev = store["W" + str(l)].T.dot(dZ)
 
            derivatives["dW" + str(l)] = dW
            derivatives["db" + str(l)] = db
            #print("shape of derivative of dW "+str(l)+" is ",derivatives["dW" + str(l)].shape)
            #print("shape of derivative of db "+str(l)+" is ",derivatives["db" + str(l)].shape)
        
        #print("derivatives shape is ")
        #print(derivatives)
        return derivatives
 
    def fit(self, X, Y, lam,learning_rate=0.01, n_iterations=2500):
        np.random.seed(1)
 
        self.n = X.shape[0]
 
        self.layers_size.insert(0, X.shape[1])
 
        self.initialize_parameters()
        for loop in range(n_iterations):
            A, store = self.forward(X)
            cost = -np.mean(Y * np.log(A.T+ 1e-8))
            #print("Y shape is ",Y.shape)
            #print("np.log(A.T+ 1e-8) shape is ",np.log(A.T+ 1e-8).shape )
            #print("(Y * np.log(A.T+ 1e-8)).shape is ",(Y * np.log(A.T+ 1e-8)).shape)
            #print("cost",cost)
            derivatives = self.backward(X, Y, lam,store)
 
            for l in range(1, self.L + 1):
                self.parameters["W" + str(l)] = self.parameters["W" + str(l)] - learning_rate * derivatives[
                    "dW" + str(l)]*self.droputMatrix(self.p,derivatives[
                    "dW" + str(l)])
                self.parameters["b" + str(l)] = self.parameters["b" + str(l)] - learning_rate * derivatives[
                    "db" + str(l)]*self.droputMatrix(self.p,derivatives[
                    "db" + str(l)])
 
            if loop % 100 == 0:
                print("Cost: ", cost, "Train Accuracy:", self.predict(X, Y))
 
            if loop % 10 == 0:
                self.costs.append(cost)
 
    def predict(self, X, Y):
        A, cache = self.forward2(X)
        y_hat = np.argmax(A, axis=0)
        Y = np.argmax(Y, axis=1)
        accuracy = (y_hat == Y).mean()
        return accuracy * 100
 
    def plot_cost(self):
        plt.figure()
        plt.plot(np.arange(len(self.costs)), self.costs)
        plt.xlabel("epochs")
        plt.ylabel("cost")
        plt.show()
        
    def droputMatrix(self,p,Mat):
        noOfOnes=int(Mat.shape[0]*Mat.shape[1]*p)
        noOfZeros=Mat.shape[0]*Mat.shape[1]-noOfOnes
        ones=np.ones(noOfOnes)
        zeroes=np.zeros(noOfZeros)
        total=np.concatenate((ones,zeroes))
        np.random.shuffle(total)
        total=total.reshape((Mat.shape[0],Mat.shape[1]))
        return total
    
        
        
 
 
def pre_process_data(train_x, train_y, test_x, test_y):
    # Normalize
    train_x = train_x / 255.
    test_x = test_x / 255.
 
    enc = OneHotEncoder(sparse=False, categories='auto')
    train_y = enc.fit_transform(train_y.reshape(len(train_y), -1))
 
    test_y = enc.transform(test_y.reshape(len(test_y), -1))
 
    return train_x.reshape(len(train_x),28*28), train_y, test_x.reshape(len(test_x),28*28), test_y
 
 
if __name__ == '__main__':
    
 
    train_x, train_y, test_x, test_y = pre_process_data(Xtrain,ytrain,Xtest,ytest)
    #print(train_y)
    #print("train_x's shape: " + str(train_x.shape))
    #print("test_x's shape: " + str(test_x.shape))
    #print("train_y's shape: " + str(train_y.shape))
    #print("test_y's shape: " + str(test_y.shape))
 
    layers_dims = [50, 25, 2]
 
    ann = ANN(layers_dims,0.4)
    ann.fit(train_x, train_y, lam=0,learning_rate=0.1, n_iterations=10000)
    print("Train Accuracy:", ann.predict(train_x, train_y))
    print("Test Accuracy:", ann.predict(test_x, test_y))
    ann.plot_cost()



In [None]:
#CNN for pneumoniamnist
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt  #for visualization
# %matplotlib inline

data = np.load("../input/pneumoniamnist/pneumoniamnist.npz" )
train_dataset = data["train_images"]
train_labels = data["train_labels"]

valid_dataset = data["val_images"]
valid_labels = data["val_labels"]

test_dataset = data["test_images"]
test_labels = data["test_labels"]

print(train_dataset.shape)
print(train_labels.shape)
print(valid_dataset.shape)
print(valid_labels.shape)
print(test_dataset.shape)
print(test_labels.shape)

#Normalize the data
train_dataset = (train_dataset-127)/255
valid_dataset = (valid_dataset-127)/255
test_dataset = (test_dataset-127)/255

#One Hot Encoding the label
neural_train_labels = np.zeros((train_labels.shape[0],2))
neural_valid_labels = np.zeros((valid_labels.shape[0],2))
neural_test_labels = np.zeros((test_labels.shape[0],2))

for i,value in enumerate(train_labels):
    neural_train_labels[i,value] = 1

for i,value in enumerate(valid_labels):
    neural_valid_labels[i,value] = 1

for i,value in enumerate(test_labels):
    neural_test_labels[i,value] = 1

print("Train labels = {}".format(neural_train_labels.shape))
print("Valid labels = {}".format(neural_valid_labels.shape))
print("Test labels  = {}".format(neural_test_labels.shape))

image_size = 28
num_labels = 2
num_channels = 1 # grayscale

def reformat(dataset):
    dataset = dataset.reshape(-1,image_size,image_size,num_channels).astype(np.float32)
    return dataset

train_dataset = reformat(train_dataset)
valid_dataset = reformat(valid_dataset)
test_dataset = reformat(test_dataset)

print('Training set   = {}'.format(train_dataset.shape, neural_train_labels.shape))
print('Validation set = {}'.format(valid_dataset.shape, neural_valid_labels.shape))
print('Test set       = {}'.format(test_dataset.shape, neural_test_labels.shape))

##HYPER_PARAMETERS
#pixels
image_size = 28

#greyscale
num_channels = 1

#patch size
patch_size = 3

#depth
depth = 4

#hidden layers
hidden1 = 256

#hyperparameters
learning_rate = 0.1

#regularization
beta = 0.01

#target_labels
num_classes = 2

#Activation layers used in between and final

#sigmoid
def sigmoid(X):
    return 1/(1+np.exp(-1*X))

#sigmoid_derivative
def sigmoid_derivative(x):
    return x * (1-x)

#Relu activation function
def relu(x):
    return x * (x > 0)

#relu_derivative
def relu_derivative(x):
    return 1. * (x > 0)

#softmax
def softmax(X):
    exp_X = np.exp(X)
    sum_exp_X = np.sum(exp_X,1).reshape(-1,1)  #col-wise sum
    exp_X = exp_X/sum_exp_X
    return exp_X

def initialize_parameters():
    #initialize weights values with 0 mean and 0.5 standard deviation.
    mean = 0
    std = 0.5
    
    #conv layer weights
    conv_layer1_weights = np.random.normal(mean,std,(patch_size,patch_size,num_channels,depth))
    conv_layer1_biases = np.zeros([1,depth])
    conv_layer2_weights = np.random.normal(mean,std,(patch_size,patch_size,depth,depth*4))
    conv_layer2_biases = np.zeros([1,depth*4])
    
    #fully-connected weights
    full_layer1_weights = np.random.normal(mean,std,(((image_size//4-1) * (image_size//4-1) * depth * 4),hidden1))
    full_layer1_biases = np.zeros([hidden1])
    full_layer2_weights = np.random.normal(mean,std,(hidden1,num_classes))
    full_layer2_biases = np.zeros([num_classes])
    
    parameters = dict()
    parameters['cw1'] = conv_layer1_weights
    parameters['cb1'] = conv_layer1_biases
    parameters['cw2'] = conv_layer2_weights
    parameters['cb2'] = conv_layer2_biases
    parameters['fw1'] = full_layer1_weights
    parameters['fb1'] = full_layer1_biases
    parameters['fw2'] = full_layer2_weights
    parameters['fb2'] = full_layer2_biases
    
    return parameters

#Convolution operation i.e multiplying Image with the weights.
#stride hardcoded = 2
#padding  = 0
def conv_multiply(image,weights):
    hsize = (image.shape[0]-weights.shape[0])//2 + 1
    vsize = (image.shape[1]-weights.shape[1])//2 + 1
    logits = np.zeros([hsize,vsize,weights.shape[3]])
    for d in range(weights.shape[3]):
        row = 0
        for rpos in range(0,(image.shape[0]-patch_size+1),2):
            col=0
            for cpos in range(0,(image.shape[1]-patch_size+1),2):
                logits[row,col,d] = np.sum(np.multiply(image[rpos:rpos+patch_size, cpos:cpos+patch_size, :],weights[:,:,:,d]))
                col += 1
            row+=1
    return logits

#stride = 1
def conv_multiply_stride1(image,weights):
    hsize = (image.shape[0]-weights.shape[0])//1 + 1
    vsize = (image.shape[1]-weights.shape[1])//1 + 1
    logits = np.zeros([hsize,vsize,weights.shape[3]])
    for d in range(weights.shape[3]):
        row = 0
        for rpos in range(0,(image.shape[0]-patch_size+1),1):
            col=0
            for cpos in range(0,(image.shape[1]-patch_size+1),1):
                logits[row,col,d] = np.sum(np.multiply(image[rpos:rpos+patch_size, cpos:cpos+patch_size, :],weights[:,:,:,d]))
                col += 1
            row+=1
    return logits

#FORWARD PROPAGATION
def forward_propagation(dataset,parameters):
    #convolution layers activations
    m = dataset.shape[0]
    
    #get the parameters
    cw1 = parameters['cw1']
    cb1 = parameters['cb1']
    cw2 = parameters['cw2']
    cb2 = parameters['cb2']
    
    fw1 = parameters['fw1']
    fb1 = parameters['fb1']
    fw2 = parameters['fw2']
    fb2 = parameters['fb2']
    
    #to store the intermediate activations for backward propagation
    cache = dict()
    
    conv_activation1 = list()
    conv_activation2 = list()
    
    #image by image convolutional forward propagation
    for i in range(m):
        image = dataset[i]
        logits = conv_multiply(image,cw1) + cb1
        ca1 = sigmoid(logits)
        ca2 = sigmoid(conv_multiply(ca1,cw2) + cb2).reshape((image_size // 4 -1) * (image_size // 4 -1) * depth * 4)
        
        conv_activation1.append(ca1)
        conv_activation2.append(ca2)
        
    #convert into numpy array
    conv_activation1 = np.array(conv_activation1).reshape(m,image_size // 2 -1, image_size // 2 -1, depth)
    conv_activation2 = np.array(conv_activation2).reshape(m,image_size // 4 -1, image_size // 4 -1, depth * 4)
        
    #expand the conv_activation2 into (m,num_features) 
    #num_features = (image_size // 4 * image_size // 4 * depth * 4)
    temp_activation = np.array(conv_activation2).reshape(m,(image_size // 4 -1) * (image_size // 4-1) * depth * 4)
    
    #fully connected layers activations
    full_activation1 = np.matmul(temp_activation,fw1) + fb1
    full_activation1 = sigmoid(full_activation1)
    full_activation2 = np.matmul(full_activation1,fw2) + fb2
    output = softmax(full_activation2)
    
    cache['ca1'] = conv_activation1
    cache['ca2'] = conv_activation2
    cache['fa1'] = full_activation1
    cache['output'] = output
    return cache,output

#calculate conv deltas or errors only for one example
def conv_delta(next_error,weights):
    delta = np.zeros([next_error.shape[0]*2+1,next_error.shape[1]*2+1,next_error.shape[2]//4])
    for d in range(weights.shape[3]):
        row = 0
        for rpos in range(0,delta.shape[0]-patch_size+1,2):
            col=0
            for cpos in range(0,delta.shape[2]-patch_size+1,2):
                delta[rpos:rpos+patch_size,cpos:cpos+patch_size,:] += weights[:,:,:,d]*next_error[row,col,d]
                col+=1
            row +=1
    return delta

#conv partial derivatives only for single example
def conv_derivatives(delta,activation):
    partial_derivatives = np.zeros([patch_size,patch_size,activation.shape[2],delta.shape[2]])
    for d2 in range(0,partial_derivatives.shape[3]):
        row=0
        for rpos in range(0,activation.shape[0]-patch_size+1,2):
            col = 0
            for cpos in range(0,activation.shape[1]-patch_size+1,2):
                partial_derivatives[:,:,:,d2] += np.multiply(activation[rpos:rpos+patch_size, cpos:cpos+patch_size, :],delta[row,col,d2])
                col += 1
            row += 1
    return partial_derivatives

def backward_propagation(dataset,labels,cache,parameters):
    #get activations
    output = cache['output']
    fa1 = cache['fa1']
    ca2 = cache['ca2']
    ca1 = cache['ca1']
    
    temp_act = np.array(ca2).reshape(-1,(image_size // 4-1) * (image_size // 4 -1)* depth * 4)
    
    #get parameters
    cw1 = parameters['cw1']
    cw2 = parameters['cw2']
    fw1 = parameters['fw1']
    fw2 = parameters['fw2']
    
    
    #cal errors fully connected
    error_fa2 = output - labels
    error_fa1 = np.matmul(error_fa2,fw2.T)
    error_fa1 = np.multiply(error_fa1,sigmoid_derivative(fa1))
    error_temp = np.matmul(error_fa1,fw1.T)
    error_temp = np.multiply(error_temp,sigmoid_derivative(temp_act))
    
    m = dataset.shape[0]
    
    #cal errors conv layers
    error_ca2 = np.array(error_temp).reshape(-1,image_size//4-1,image_size//4-1,depth*4)
    error_ca1 = np.zeros(ca1.shape)
    ## Image by Image error
    for i in range(m):
        error = conv_delta(error_ca2[i],cw2)
        error = np.multiply(error,sigmoid_derivative(ca1[i]))
        error_ca1 += error
    
    
    #calculate partial derivatives
    #fully connected layers
    fd2 = (np.matmul(fa1.T,error_fa2) + beta*fw2)/m
    fd1 = (np.matmul(temp_act.T,error_fa1) + beta*fw1)/m
    
    #conv layers
    cd2 = np.zeros(cw2.shape)
    cd1 = np.zeros(cw1.shape)
    
    ##Image by Image derivatives
    for i in range(m):
        cd2 = cd2 + conv_derivatives(error_ca2[i],ca1[i])
        cd1 = cd1 + conv_derivatives(error_ca1[i],dataset[i])
    cd2 = (cd2 + beta*cw2)/m
    cd1 = (cd1 + beta*cw1)/m
    
    
    #store the derivatives in dict
    derivatives = dict()
    
    derivatives['cd1'] = cd1
    derivatives['cd2'] = cd2
    derivatives['fd1'] = fd1
    derivatives['fd2'] = fd2
    
    return derivatives

def update_parameters(derivatives,parameters):
    #get parameters
    cw1 = parameters['cw1']
    cw2 = parameters['cw2']
    fw1 = parameters['fw1']
    fw2 = parameters['fw2']
    
    #get derivatives
    cd1 = derivatives['cd1']
    cd2 = derivatives['cd2']
    fd1 = derivatives['fd1']
    fd2 = derivatives['fd2']
    
    #update
    cw1 = cw1 - learning_rate*cd1
    cw2 = cw2 - learning_rate*cd2
    fw1 = fw1 - learning_rate*fd1
    fw2 = fw2 - learning_rate*fd2
    
    #update the dict
    parameters['cw1'] = cw1
    parameters['cw2'] = cw2
    parameters['fw1'] = fw1
    parameters['fw2'] = fw2
    
    return parameters

def cal_loss_accuracy(true_labels,predictions,parameters):
    #get parameters
    cw1 = parameters['cw1']
    cw2 = parameters['cw2']
    fw1 = parameters['fw1']
    fw2 = parameters['fw2']
    
    m = len(true_labels)
    
    #cal loss
    loss = -1*(np.sum(np.multiply(np.log(predictions),true_labels),1) + np.sum(np.multiply(np.log(1-predictions),1-true_labels),1))
    loss = np.sum(loss)
    loss = loss + beta*(np.sum(cw1**2) + np.sum(cw2**2) + np.sum(fw1**2) + np.sum(fw2**2))
    loss = loss/m
    
    #cal accuracy
    accuracy = np.sum(np.argmax(true_labels,1)==np.argmax(predictions,1))/m
    
    return loss,accuracy

#train function
def train(train_dataset,train_labels,batch_size=32,iters=101,stride=2):
    
    #initialize the parameters
    parameters = initialize_parameters()
    
    cw1 = parameters['cw1']
    cb1 = parameters['cb1']
    cw2 = parameters['cw2']
    cb2 = parameters['cb2']
    
    fw1 = parameters['fw1']
    fb1 = parameters['fb1']
    fw2 = parameters['fw2']
    fb2 = parameters['fb2']
    
    J = []  #store the loss o every batch
    A = []  #store the accuracy of every batch
    
    
    #training process.
    for step in range(iters):
        #get the batch data.
        start = (step*batch_size)%(train_dataset.shape[0])
        end = start + batch_size
        
        batch_dataset = train_dataset[start:end,:,:,:]
        batch_labels = train_labels[start:end,:]
        
        #forward propagation
        cache,output = forward_propagation(batch_dataset,parameters)
        
        #cal_loss and accuracy
        loss,accuracy = cal_loss_accuracy(batch_labels,output,parameters)
        
        #calculate the derivatives
        derivatives = backward_propagation(batch_dataset,batch_labels,cache,parameters)
        
        #update the parameters
        parameters = update_parameters(derivatives,parameters)
        
        #append the loss and accuracy of every batch
        J.append(loss)
        A.append(accuracy)
        
        #print loss and accuracy of the batch dataset.
        if(step%100==0):
            print('Step : %d'%step)
            print('Loss : %f'%loss)
            print('Accuracy : %f%%'%(round(accuracy*100,2)))
            
    return J,A,parameters

#TRAINING
J,A,parameters = train(train_dataset,neural_train_labels,iters=1000)

#for training set
_,train_pred = forward_propagation(train_dataset,parameters)
_,train_accuracy = cal_loss_accuracy(neural_train_labels,train_pred,parameters)

#for valid set
_,valid_pred = forward_propagation(valid_dataset,parameters)
_,valid_accuracy = cal_loss_accuracy(neural_valid_labels,valid_pred,parameters)

#for test set
_,test_pred = forward_propagation(test_dataset,parameters)
_,test_accuracy = cal_loss_accuracy(neural_test_labels,test_pred,parameters)

print('Accuracy of Train Set = {}'.format(round(train_accuracy*100,2)))
print('Accuracy of Valid Set = {}'.format(round(valid_accuracy*100,2)))
print('Accuracy of Test  Set = {}'.format(round(test_accuracy*100,2)))

import sklearn.metrics
F1= sklearn.metrics.f1_score(test_labels, np.argmax(test_pred,1))
AUC = sklearn.metrics.roc_auc_score(test_labels,np.argmax(test_pred,1))

print('F1 score : {}'.format(F1))
print('AUC : {}'.format(AUC))

import seaborn as sns
from sklearn.metrics import classification_report,confusion_matrix
cnfn_mat=confusion_matrix(test_labels,np.argmax(test_pred,1))
sns.heatmap(cnfn_mat,annot=cnfn_mat,fmt='',xticklabels=['0','1'],yticklabels=['0','1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()



avg_loss = list()
avg_acc = list()
i = 0
while(i<len(J)):
    avg_loss.append(np.mean(J[i:i+30]))
    avg_acc.append(np.mean(A[i:i+30]))
    i += 30

plt.plot(list(range(len(avg_loss))),avg_loss)
plt.xlabel("x")
plt.ylabel("Loss (Avg of 30 batches)")
plt.title("Loss Graph")
plt.show()

plt.plot(list(range(len(avg_acc))),avg_acc)
plt.xlabel("x")
plt.ylabel("Accuracy (Avg of 30 batches)")
plt.title("Accuracy Graph")
plt.show()

index = 10
test_image = test_dataset[index].reshape(1,28,28,1)
plt.imshow(test_image[0,:,:,0],cmap='gray')
print("Image")
plt.show()
print("True_Label = {}".format(np.argmax(neural_test_labels[index])))
print("Pred_Label = {}".format(np.argmax(test_pred[index])))

In [None]:
#CNN from scratch for bloodmnist
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt  #for visualization
# %matplotlib inline

data = np.load("../input/bloodmnist/bloodmnist.npz" )
train_dataset = data["train_images"]
train_labels = data["train_labels"]

valid_dataset = data["val_images"]
valid_labels = data["val_labels"]

test_dataset = data["test_images"]
test_labels = data["test_labels"]

print(train_dataset.shape)
print(train_labels.shape)
print(valid_dataset.shape)
print(valid_labels.shape)
print(test_dataset.shape)
print(test_labels.shape)

#Normalize the data
train_dataset = (train_dataset-127)/255
valid_dataset = (valid_dataset-127)/255
test_dataset = (test_dataset-127)/255

#One Hot Encoding the label
neural_train_labels = np.zeros((train_labels.shape[0],8))
neural_valid_labels = np.zeros((valid_labels.shape[0],8))
neural_test_labels = np.zeros((test_labels.shape[0],8))

for i,value in enumerate(train_labels):
    neural_train_labels[i,value] = 1

for i,value in enumerate(valid_labels):
    neural_valid_labels[i,value] = 1

for i,value in enumerate(test_labels):
    neural_test_labels[i,value] = 1

print("Train labels = {}".format(neural_train_labels.shape))
print("Valid labels = {}".format(neural_valid_labels.shape))
print("Test labels  = {}".format(neural_test_labels.shape))

image_size = 28
num_labels = 8
num_channels = 3 # color

def reformat(dataset):
    dataset = dataset.reshape(-1,image_size,image_size,num_channels).astype(np.float32)
    return dataset

train_dataset = reformat(train_dataset)
valid_dataset = reformat(valid_dataset)
test_dataset = reformat(test_dataset)

print('Training set   = {}'.format(train_dataset.shape, neural_train_labels.shape))
print('Validation set = {}'.format(valid_dataset.shape, neural_valid_labels.shape))
print('Test set       = {}'.format(test_dataset.shape, neural_test_labels.shape))

##HYPER_PARAMETERS
#pixels
image_size = 28

#color
num_channels = 3

#patch size
patch_size = 3

#depth
depth = 4

#hidden layers
hidden1 = 512

#hyperparameters
learning_rate = 0.05

#regularization
beta = 0.0001

#target_labels
num_classes = 8

#Activation layers used in between and final

#sigmoid
def sigmoid(X):
    return 1/(1+np.exp(-1*X))

#sigmoid_derivative
def sigmoid_derivative(x):
    return x * (1-x)

#Relu activation function
def relu(x):
    return x * (x > 0)

#relu_derivative
def relu_derivative(x):
    return 1. * (x > 0)

#softmax
def softmax(X):
    exp_X = np.exp(X)
    sum_exp_X = np.sum(exp_X,1).reshape(-1,1)  #col-wise sum
    exp_X = exp_X/sum_exp_X
    return exp_X

def initialize_parameters():
    #initialize weights values with 0 mean and 0.5 standard deviation.
    mean = 0
    std = 0.5
    
    #conv layer weights
    conv_layer1_weights = np.random.normal(mean,std,(patch_size,patch_size,num_channels,depth))
    conv_layer1_biases = np.zeros([1,depth])
    conv_layer2_weights = np.random.normal(mean,std,(patch_size,patch_size,depth,depth*4))
    conv_layer2_biases = np.zeros([1,depth*4])
    
    #fully-connected weights
    full_layer1_weights = np.random.normal(mean,std,(((image_size//4-1) * (image_size//4-1) * depth * 4),hidden1))
    full_layer1_biases = np.zeros([hidden1])
    full_layer2_weights = np.random.normal(mean,std,(hidden1,num_classes))
    full_layer2_biases = np.zeros([num_classes])
    
    parameters = dict()
    parameters['cw1'] = conv_layer1_weights
    parameters['cb1'] = conv_layer1_biases
    parameters['cw2'] = conv_layer2_weights
    parameters['cb2'] = conv_layer2_biases
    parameters['fw1'] = full_layer1_weights
    parameters['fb1'] = full_layer1_biases
    parameters['fw2'] = full_layer2_weights
    parameters['fb2'] = full_layer2_biases
    
    return parameters

#Convolution operation i.e multiplying Image with the weights.
#stride hardcoded = 2
#padding  = 0
def conv_multiply(image,weights):
    hsize = (image.shape[0]-weights.shape[0])//2 + 1
    vsize = (image.shape[1]-weights.shape[1])//2 + 1
    logits = np.zeros([hsize,vsize,weights.shape[3]])
    for d in range(weights.shape[3]):
        row = 0
        for rpos in range(0,(image.shape[0]-patch_size+1),2):
            col=0
            for cpos in range(0,(image.shape[1]-patch_size+1),2):
                logits[row,col,d] = np.sum(np.multiply(image[rpos:rpos+patch_size, cpos:cpos+patch_size, :],weights[:,:,:,d]))
                col += 1
            row+=1
    return logits

#stride = 1
def conv_multiply_stride1(image,weights):
    hsize = (image.shape[0]-weights.shape[0])//1 + 1
    vsize = (image.shape[1]-weights.shape[1])//1 + 1
    logits = np.zeros([hsize,vsize,weights.shape[3]])
    for d in range(weights.shape[3]):
        row = 0
        for rpos in range(0,(image.shape[0]-patch_size+1),1):
            col=0
            for cpos in range(0,(image.shape[1]-patch_size+1),1):
                logits[row,col,d] = np.sum(np.multiply(image[rpos:rpos+patch_size, cpos:cpos+patch_size, :],weights[:,:,:,d]))
                col += 1
            row+=1
    return logits

#FORWARD PROPAGATION
def forward_propagation(dataset,parameters):
    #convolution layers activations
    m = dataset.shape[0]
    
    #get the parameters
    cw1 = parameters['cw1']
    cb1 = parameters['cb1']
    cw2 = parameters['cw2']
    cb2 = parameters['cb2']
    
    fw1 = parameters['fw1']
    fb1 = parameters['fb1']
    fw2 = parameters['fw2']
    fb2 = parameters['fb2']
    
    #to store the intermediate activations for backward propagation
    cache = dict()
    
    conv_activation1 = list()
    conv_activation2 = list()
    
    #image by image convolutional forward propagation
    for i in range(m):
        image = dataset[i]
        logits = conv_multiply(image,cw1) + cb1
        ca1 = sigmoid(logits)
        ca2 = sigmoid(conv_multiply(ca1,cw2) + cb2).reshape((image_size // 4 -1) * (image_size // 4 -1) * depth * 4)
        
        conv_activation1.append(ca1)
        conv_activation2.append(ca2)
        
    #convert into numpy array
    conv_activation1 = np.array(conv_activation1).reshape(m,image_size // 2 -1, image_size // 2 -1, depth)
    conv_activation2 = np.array(conv_activation2).reshape(m,image_size // 4 -1, image_size // 4 -1, depth * 4)
        
    #expand the conv_activation2 into (m,num_features) 
    #num_features = (image_size // 4 * image_size // 4 * depth * 4)
    temp_activation = np.array(conv_activation2).reshape(m,(image_size // 4 -1) * (image_size // 4-1) * depth * 4)
    
    #fully connected layers activations
    full_activation1 = np.matmul(temp_activation,fw1) + fb1
    full_activation1 = sigmoid(full_activation1)
    full_activation2 = np.matmul(full_activation1,fw2) + fb2
    output = softmax(full_activation2)
    
    cache['ca1'] = conv_activation1
    cache['ca2'] = conv_activation2
    cache['fa1'] = full_activation1
    cache['output'] = output
    return cache,output

#calculate conv deltas or errors only for one example
def conv_delta(next_error,weights):
    delta = np.zeros([next_error.shape[0]*2+1,next_error.shape[1]*2+1,next_error.shape[2]//4])
    for d in range(weights.shape[3]):
        row = 0
        for rpos in range(0,delta.shape[0]-patch_size+1,2):
            col=0
            for cpos in range(0,delta.shape[2]-patch_size+1,2):
                delta[rpos:rpos+patch_size,cpos:cpos+patch_size,:] += weights[:,:,:,d]*next_error[row,col,d]
                col+=1
            row +=1
    return delta

#conv partial derivatives only for single example
def conv_derivatives(delta,activation):
    partial_derivatives = np.zeros([patch_size,patch_size,activation.shape[2],delta.shape[2]])
    for d2 in range(0,partial_derivatives.shape[3]):
        row=0
        for rpos in range(0,activation.shape[0]-patch_size+1,2):
            col = 0
            for cpos in range(0,activation.shape[1]-patch_size+1,2):
                partial_derivatives[:,:,:,d2] += np.multiply(activation[rpos:rpos+patch_size, cpos:cpos+patch_size, :],delta[row,col,d2])
                col += 1
            row += 1
    return partial_derivatives

def backward_propagation(dataset,labels,cache,parameters):
    #get activations
    output = cache['output']
    fa1 = cache['fa1']
    ca2 = cache['ca2']
    ca1 = cache['ca1']
    
    temp_act = np.array(ca2).reshape(-1,(image_size // 4-1) * (image_size // 4 -1)* depth * 4)
    
    #get parameters
    cw1 = parameters['cw1']
    cw2 = parameters['cw2']
    fw1 = parameters['fw1']
    fw2 = parameters['fw2']
    
    
    #cal errors fully connected
    error_fa2 = output - labels
    error_fa1 = np.matmul(error_fa2,fw2.T)
    error_fa1 = np.multiply(error_fa1,sigmoid_derivative(fa1))
    error_temp = np.matmul(error_fa1,fw1.T)
    error_temp = np.multiply(error_temp,sigmoid_derivative(temp_act))
    
    m = dataset.shape[0]
    
    #cal errors conv layers
    error_ca2 = np.array(error_temp).reshape(-1,image_size//4-1,image_size//4-1,depth*4)
    error_ca1 = np.zeros(ca1.shape)
    ## Image by Image error
    for i in range(m):
        error = conv_delta(error_ca2[i],cw2)
        error = np.multiply(error,sigmoid_derivative(ca1[i]))
        error_ca1 += error
    
    
    #calculate partial derivatives
    #fully connected layers
    fd2 = (np.matmul(fa1.T,error_fa2) + beta*fw2)/m
    fd1 = (np.matmul(temp_act.T,error_fa1) + beta*fw1)/m
    
    #conv layers
    cd2 = np.zeros(cw2.shape)
    cd1 = np.zeros(cw1.shape)
    
    ##Image by Image derivatives
    for i in range(m):
        cd2 = cd2 + conv_derivatives(error_ca2[i],ca1[i])
        cd1 = cd1 + conv_derivatives(error_ca1[i],dataset[i])
    cd2 = (cd2 + beta*cw2)/m
    cd1 = (cd1 + beta*cw1)/m
    
    
    #store the derivatives in dict
    derivatives = dict()
    
    derivatives['cd1'] = cd1
    derivatives['cd2'] = cd2
    derivatives['fd1'] = fd1
    derivatives['fd2'] = fd2
    
    return derivatives

def update_parameters(derivatives,parameters):
    #get parameters
    cw1 = parameters['cw1']
    cw2 = parameters['cw2']
    fw1 = parameters['fw1']
    fw2 = parameters['fw2']
    
    #get derivatives
    cd1 = derivatives['cd1']
    cd2 = derivatives['cd2']
    fd1 = derivatives['fd1']
    fd2 = derivatives['fd2']
    
    #update
    cw1 = cw1 - learning_rate*cd1
    cw2 = cw2 - learning_rate*cd2
    fw1 = fw1 - learning_rate*fd1
    fw2 = fw2 - learning_rate*fd2
    
    #update the dict
    parameters['cw1'] = cw1
    parameters['cw2'] = cw2
    parameters['fw1'] = fw1
    parameters['fw2'] = fw2
    
    return parameters

def cal_loss_accuracy(true_labels,predictions,parameters):
    #get parameters
    cw1 = parameters['cw1']
    cw2 = parameters['cw2']
    fw1 = parameters['fw1']
    fw2 = parameters['fw2']
    
    m = len(true_labels)
    
    #cal loss
    loss = -1*(np.sum(np.multiply(np.log(predictions),true_labels),1) + np.sum(np.multiply(np.log(1-predictions),1-true_labels),1))
    loss = np.sum(loss)
    loss = loss + beta*(np.sum(cw1**2) + np.sum(cw2**2) + np.sum(fw1**2) + np.sum(fw2**2))
    loss = loss/m
    
    #cal accuracy
    accuracy = np.sum(np.argmax(true_labels,1)==np.argmax(predictions,1))/m
    
    return loss,accuracy

#train function
def train(train_dataset,train_labels,batch_size=16,iters=101,stride=2):
    
    #initialize the parameters
    parameters = initialize_parameters()
    
    cw1 = parameters['cw1']
    cb1 = parameters['cb1']
    cw2 = parameters['cw2']
    cb2 = parameters['cb2']
    
    fw1 = parameters['fw1']
    fb1 = parameters['fb1']
    fw2 = parameters['fw2']
    fb2 = parameters['fb2']
    
    J = []  #store the loss o every batch
    A = []  #store the accuracy of every batch
    
    
    #training process.
    for step in range(iters):
        #get the batch data.
        start = (step*batch_size)%(train_dataset.shape[0])
        end = start + batch_size
        
        batch_dataset = train_dataset[start:end,:,:,:]
        batch_labels = train_labels[start:end,:]
        
        #forward propagation
        cache,output = forward_propagation(batch_dataset,parameters)
        
        #cal_loss and accuracy
        loss,accuracy = cal_loss_accuracy(batch_labels,output,parameters)
        
        #calculate the derivatives
        derivatives = backward_propagation(batch_dataset,batch_labels,cache,parameters)
        
        #update the parameters
        parameters = update_parameters(derivatives,parameters)
        
        #append the loss and accuracy of every batch
        J.append(loss)
        A.append(accuracy)
        
        #print loss and accuracy of the batch dataset.
        if(step%100==0):
            print('Step : %d'%step)
            print('Loss : %f'%loss)
            print('Accuracy : %f%%'%(round(accuracy*100,2)))
            
    return J,A,parameters

#TRAINING
J,A,parameters = train(train_dataset,neural_train_labels,iters=2000)

#for training set
_,train_pred = forward_propagation(train_dataset,parameters)
_,train_accuracy = cal_loss_accuracy(neural_train_labels,train_pred,parameters)

#for valid set
_,valid_pred = forward_propagation(valid_dataset,parameters)
_,valid_accuracy = cal_loss_accuracy(neural_valid_labels,valid_pred,parameters)

#for test set
_,test_pred = forward_propagation(test_dataset,parameters)
_,test_accuracy = cal_loss_accuracy(neural_test_labels,test_pred,parameters)

print('Accuracy of Train Set = {}'.format(round(train_accuracy*100,2)))
print('Accuracy of Valid Set = {}'.format(round(valid_accuracy*100,2)))
print('Accuracy of Test  Set = {}'.format(round(test_accuracy*100,2)))

import seaborn as sns
from sklearn.metrics import classification_report,confusion_matrix
cnfn_mat=confusion_matrix(test_labels,np.argmax(test_pred,1))
sns.heatmap(cnfn_mat,annot=cnfn_mat,fmt='',xticklabels=['0','1'],yticklabels=['0','1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()



avg_loss = list()
avg_acc = list()
i = 0
while(i<len(J)):
    avg_loss.append(np.mean(J[i:i+30]))
    avg_acc.append(np.mean(A[i:i+30]))
    i += 30

plt.plot(list(range(len(avg_loss))),avg_loss)
plt.xlabel("x")
plt.ylabel("Loss (Avg of 30 batches)")
plt.title("Loss Graph")
plt.show()

plt.plot(list(range(len(avg_acc))),avg_acc)
plt.xlabel("x")
plt.ylabel("Accuracy (Avg of 30 batches)")
plt.title("Accuracy Graph")
plt.show()

index = 29
test_image = test_dataset[index].reshape(1,28,28,3)
plt.imshow(test_image[0,:,:,0],cmap='gray')
print("Image")
plt.show()
print("True_Label = {}".format(np.argmax(neural_test_labels[index])))
print("Pred_Label = {}".format(np.argmax(test_pred[index])))

In [None]:
#resnet50 on pneumoniamnist
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import make_grid
from IPython.display import Image
from keras.utils.np_utils import to_categorical   
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

DATA_DIR = "../input/pneumoniamnist/pneumoniamnist.npz"
X_train = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
print(f"Data type: {type(X_train)}")
data = X_train["train_images"].astype(np.float64)
x = []
for img in data:
    x.append(np.dstack([img,img,img]))
x = np.array(x)
label_data = X_train["train_labels"]
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)    
print(y.shape)
#y = to_categorical(np.array(X_train["train_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_train = img

class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        
        if self.transform:
            x = self.transform(x)
        
        return x, y
    
    def __len__(self):
        return len(self.data)
    
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])
cropped_dataset = MyDataset(X_train, y, transform=transform)
print(len(cropped_dataset))

train_dataloader = torch.utils.data.DataLoader(cropped_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

def imshow(inp, title=None):
    
    inp = inp.cpu() if device else inp
    inp = inp.numpy().transpose((1, 2, 0))
    
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)
    
images, labels = next(iter(train_dataloader)) 
print("images-size:", images.shape)

out = make_grid(images)
print("out-size:", out.shape)

#imshow(out, title=[cropped_dataset.classes[x] for x in labels])

DATA_DIR = "../input/pneumoniamnist/pneumoniamnist.npz"
X_test = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
print(f"Data type: {type(X_test)}")
data = X_test["test_images"].astype(np.float64)
val_data = X_test["val_images"].astype(np.float64)
x = []
for img in data:
    x.append(np.dstack([img,img,img]))
x = np.array(x)
x_val = []
for img in val_data:
    x_val.append(np.dstack([img,img,img]))
x_val = np.array(x_val)
label_data = np.array(X_test["test_labels"])
val_label = np.array(X_test["val_labels"])
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)
y_val = []
for label in val_label:
    y_val.append(label[0])
y_val = np.array(y_val)
print(y.shape)
#y = to_categorical(np.array(X_test["test_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_test = img
X_val = (x_val*255).astype(np.uint8)

test_dataset = MyDataset(X_test, y, transform=transform)
val_dataset =  MyDataset(X_val, y_val, transform=transform)
print(len(test_dataset))
print(len(val_dataset))

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

idx2lbl={0:'False',1:'True'}
inv_normalize = transforms.Normalize(
    mean=[-0.485/0.229, -0.456/0.224, -0.406/0.255],
    std=[1/0.229, 1/0.224, 1/0.255]
)

images,labels=next(iter(train_dataloader))
fig=plt.figure(figsize=(25,4))

for i in range(1,21):
    ax=fig.add_subplot(2,10,i,xticks=[],yticks=[])
    ax.imshow(transforms.ToPILImage()(inv_normalize(images[i])))
    ax.set_title(str(idx2lbl[labels[i].item()]))

import torch
from torch import nn
from torch import optim,no_grad
from torch.utils.data import DataLoader
from torchvision import models,transforms,datasets
from torch.nn import Dropout,ReLU,Linear,Sequential,LogSoftmax,Softmax
import pickle
from tqdm import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix

model=models.resnet50(pretrained=True)

#set requires_grad to False inorder to freeze parameter update
for param in model.parameters():
    param.requires_grad=False

model.fc

#create custom classifcation layer
classifier=Sequential(
    Dropout(p=0.5),
    Linear(in_features=2048, out_features=512, bias=True),
    ReLU(),
    Linear(in_features=512, out_features=2, bias=True),
    LogSoftmax(dim=1)
)

#replacing resnet classifier layer with custom one
model.fc=classifier

criterion=nn.NLLLoss()

optimizer=optim.Adam(model.fc.parameters(),lr=0.003)

model

def evaluate(model,dataLoader):
    
    predicted_labels_list=list()
    actual_labels_list=list()
    
    with no_grad(): #turning off gradient calculation
        model.eval() #setting model to evaluation mode,i.e,all dropout will be deactivated
        
        test_loss_acm=0
        accuracy_acm=0

        for images,labels in dataLoader:

            output=model.forward(images)
            test_loss=criterion(output,labels)
            result=nn.functional.softmax(output,dim=1)
            test_loss_acm+=test_loss.item()
            result_labels=torch.argmax(result,dim=1)
            bools=(result_labels==labels)
            accuracy=bools.sum().type(torch.float)/len(bools)
            accuracy_acm+=accuracy
            
            predicted_labels_list.extend([x.item() for x in result_labels])
            actual_labels_list.extend([x.item() for x in labels])
            
        #setting model back to train mode
        model.train()

        avg_test_loss=test_loss_acm/len(dataLoader)
        avg_test_acc=accuracy_acm/len(dataLoader)
        
    return (avg_test_loss,avg_test_acc,predicted_labels_list,actual_labels_list)

epochs=5

train_loss_pe=list()
val_loss_pe=list()

train_acc_pe=list()
val_acc_pe=list()

for epoch in range(1,epochs+1):
    running_loss=0
    running_acc=0
    
    for images,labels in tqdm(train_dataloader):
        
        #set optimizer grad to zero
        optimizer.zero_grad()
        
        #forward pass
        output=model.forward(images)
    
        #loss
        loss=criterion(output,labels)
        #train accuracy
        result=torch.argmax(output,dim=1)
        running_loss+=loss.item()
        running_acc+=torch.mean((result==labels).type(torch.float))
        
        #backprop
        loss.backward()
        
        #descent
        optimizer.step()
    else:
        train_loss=running_loss/len(train_dataloader)
        train_acc=running_acc/len(train_dataloader)
        
        val_loss,val_acc,pred,actual=evaluate(model,val_dataloader)
        
        train_loss_pe.append(train_loss)
        val_loss_pe.append(val_loss)
        
        train_acc_pe.append(train_acc.item())
        val_acc_pe.append(val_acc.item())
        
        print("EPOCH: {}".format(epoch))
        print("Train Loss: {:.3f}".format(train_loss,end=" "))
        print("Val Loss: {:.3f}".format(val_loss,end=" "))
        print("Train Accuracy: {:.2f}%".format(train_acc*100,end=" "))
        print("Val Accuracy: {:.2f}%".format(val_acc*100,end=" "))
        
        with open("./resnet50_tl_{}.pth".format(epoch),"wb") as f:
            model.eval()
            pickle.dump(model,f)
            model.train()

plots=[(train_loss_pe,val_loss_pe),(train_acc_pe,val_acc_pe)]
plt_labels=[("Training Loss","Validation Loss"),("Training Accuracy","Validation Accuracy")]
plt_titles=["Loss","Accuracy"]
plt.figure(figsize=(20,7))
for i in range(0,2):
    ax=plt.subplot(1,2,i+1)
    ax.plot(plots[i][0],label=plt_labels[i][0])
    ax.plot(plots[i][1],label=plt_labels[i][1])
    ax.set_title(plt_titles[i])
    ax.legend()

#selecting the best model
with open("./resnet50_tl_5.pth","rb") as f:
    loaded_model=pickle.load(f)

avg_test_loss,avg_test_acc,predicted_label,actual_label=evaluate(loaded_model,test_dataloader)
print("Test Loss:{:.3f}".format(avg_test_loss,end="  "))
print("Test Accuracy:{:.2f}%".format(avg_test_acc*100,end=" "))

cnfn_mat=confusion_matrix(actual_label,predicted_label)
sns.heatmap(cnfn_mat,annot=cnfn_mat,fmt='',xticklabels=['0','1'],yticklabels=['0','1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

report=pd.DataFrame.from_dict(classification_report(actual_label,predicted_label,output_dict=True)).T
report=report[['f1-score','precision','recall','support']]
report

In [None]:
#resnet50 for bloodmnist
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import make_grid
from IPython.display import Image
from keras.utils.np_utils import to_categorical   
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

DATA_DIR = "../input/bloodmnist/bloodmnist.npz" #change to pneumoniamnist/pneumoniamnist.npz
X_train = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
print(f"Data type: {type(X_train)}")
data = X_train["train_images"].astype(np.float64)
x = []
for img in data:
    x.append(img) #use x.append(np.dstack[img,img,img]) for pneumoniamnist
x = np.array(x)
label_data = X_train["train_labels"]
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)    
print(y.shape)
#y = to_categorical(np.array(X_train["train_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_train = img

class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        
        if self.transform:
            x = self.transform(x)
        
        return x, y
    
    def __len__(self):
        return len(self.data)
    
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])
cropped_dataset = MyDataset(X_train, y, transform=transform)
print(len(cropped_dataset))

train_dataloader = torch.utils.data.DataLoader(cropped_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

def imshow(inp, title=None):
    
    inp = inp.cpu() if device else inp
    inp = inp.numpy().transpose((1, 2, 0))
    
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)
    
images, labels = next(iter(train_dataloader)) 
print("images-size:", images.shape)

out = make_grid(images)
print("out-size:", out.shape)

#imshow(out, title=[cropped_dataset.classes[x] for x in labels])

DATA_DIR = "../input/bloodmnist/bloodmnist.npz"
X_test = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
print(f"Data type: {type(X_test)}")
data = X_test["test_images"].astype(np.float64)
val_data = X_test["val_images"].astype(np.float64)
x = []
for img in data:
    x.append(img)
x = np.array(x)
x_val = []
for img in val_data:
    x_val.append(img)
x_val = np.array(x_val)
label_data = np.array(X_test["test_labels"])
val_label = np.array(X_test["val_labels"])
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)
y_val = []
for label in val_label:
    y_val.append(label[0])
y_val = np.array(y_val)
print(y.shape)
#y = to_categorical(np.array(X_test["test_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_test = img
X_val = (x_val*255).astype(np.uint8)

test_dataset = MyDataset(X_test, y, transform=transform)
val_dataset =  MyDataset(X_val, y_val, transform=transform)
print(len(test_dataset))
print(len(val_dataset))

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

idx2lbl={0:'0',1:'1',2:'2',3:'3',4:'4',5:'5',6:'6',7:'7'}
inv_normalize = transforms.Normalize(
    mean=[-0.485/0.229, -0.456/0.224, -0.406/0.255],
    std=[1/0.229, 1/0.224, 1/0.255]
)

images,labels=next(iter(train_dataloader))
fig=plt.figure(figsize=(25,4))

for i in range(1,21):
    ax=fig.add_subplot(2,10,i,xticks=[],yticks=[])
    ax.imshow(transforms.ToPILImage()(inv_normalize(images[i])))
    ax.set_title(str(idx2lbl[labels[i].item()]))

import torch
from torch import nn
from torch import optim,no_grad
from torch.utils.data import DataLoader
from torchvision import models,transforms,datasets
from torch.nn import Dropout,ReLU,Linear,Sequential,LogSoftmax,Softmax
import pickle
from tqdm import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix

model=models.resnet50(pretrained=True)

#set requires_grad to False inorder to freeze parameter update
for param in model.parameters():
    param.requires_grad=False

model.fc

#create custom classifcation layer
classifier=Sequential(
    Dropout(p=0.5),
    Linear(in_features=2048, out_features=512, bias=True),
    ReLU(),
    Linear(in_features=512, out_features=8, bias=True),
    LogSoftmax(dim=1)
)

#replacing resnet classifier layer with custom one
model.fc=classifier

criterion=nn.NLLLoss()

optimizer=optim.Adam(model.fc.parameters(),lr=0.003)

model

def evaluate(model,dataLoader):
    
    predicted_labels_list=list()
    actual_labels_list=list()
    
    with no_grad(): #turning off gradient calculation
        model.eval() #setting model to evaluation mode,i.e,all dropout will be deactivated
        
        test_loss_acm=0
        accuracy_acm=0

        for images,labels in dataLoader:

            output=model.forward(images)
            test_loss=criterion(output,labels)
            result=nn.functional.softmax(output,dim=1)
            test_loss_acm+=test_loss.item()
            result_labels=torch.argmax(result,dim=1)
            bools=(result_labels==labels)
            accuracy=bools.sum().type(torch.float)/len(bools)
            accuracy_acm+=accuracy
            
            predicted_labels_list.extend([x.item() for x in result_labels])
            actual_labels_list.extend([x.item() for x in labels])
            
        #setting model back to train mode
        model.train()

        avg_test_loss=test_loss_acm/len(dataLoader)
        avg_test_acc=accuracy_acm/len(dataLoader)
        
    return (avg_test_loss,avg_test_acc,predicted_labels_list,actual_labels_list)

epochs=5

train_loss_pe=list()
val_loss_pe=list()

train_acc_pe=list()
val_acc_pe=list()

for epoch in range(1,epochs+1):
    running_loss=0
    running_acc=0
    
    for images,labels in tqdm(train_dataloader):
        
        #set optimizer grad to zero
        optimizer.zero_grad()
        
        #forward pass
        output=model.forward(images)
    
        #loss
        loss=criterion(output,labels)
        #train accuracy
        result=torch.argmax(output,dim=1)
        running_loss+=loss.item()
        running_acc+=torch.mean((result==labels).type(torch.float))
        
        #backprop
        loss.backward()
        
        #descent
        optimizer.step()
    else:
        train_loss=running_loss/len(train_dataloader)
        train_acc=running_acc/len(train_dataloader)
        
        val_loss,val_acc,pred,actual=evaluate(model,val_dataloader)
        
        train_loss_pe.append(train_loss)
        val_loss_pe.append(val_loss)
        
        train_acc_pe.append(train_acc.item())
        val_acc_pe.append(val_acc.item())
        
        print("EPOCH: {}".format(epoch))
        print("Train Loss: {:.3f}".format(train_loss,end=" "))
        print("Val Loss: {:.3f}".format(val_loss,end=" "))
        print("Train Accuracy: {:.2f}%".format(train_acc*100,end=" "))
        print("Val Accuracy: {:.2f}%".format(val_acc*100,end=" "))
        
        with open("./resnet50_tl_{}.pth".format(epoch),"wb") as f:
            model.eval()
            pickle.dump(model,f)
            model.train()

plots=[(train_loss_pe,val_loss_pe),(train_acc_pe,val_acc_pe)]
plt_labels=[("Training Loss","Validation Loss"),("Training Accuracy","Validation Accuracy")]
plt_titles=["Loss","Accuracy"]
plt.figure(figsize=(20,7))
for i in range(0,2):
    ax=plt.subplot(1,2,i+1)
    ax.plot(plots[i][0],label=plt_labels[i][0])
    ax.plot(plots[i][1],label=plt_labels[i][1])
    ax.set_title(plt_titles[i])
    ax.legend()

#selecting the best model
with open("./resnet50_tl_3.pth","rb") as f:
    loaded_model=pickle.load(f)

avg_test_loss,avg_test_acc,predicted_label,actual_label=evaluate(loaded_model,test_dataloader)
print("Test Loss:{:.3f}".format(avg_test_loss,end="  "))
print("Test Accuracy:{:.2f}%".format(avg_test_acc*100,end=" "))

cnfn_mat=confusion_matrix(actual_label,predicted_label)
sns.heatmap(cnfn_mat,annot=cnfn_mat,fmt='',xticklabels=['0','1'],yticklabels=['0','1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

report=pd.DataFrame.from_dict(classification_report(actual_label,predicted_label,output_dict=True)).T
report=report[['f1-score','precision','recall','support']]
report

In [None]:
#vgg16 for pneumoniamnist
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import make_grid
from IPython.display import Image
from keras.utils.np_utils import to_categorical   
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

# check GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

import torch.optim as optim
import time

from torchvision import models

DATA_DIR = "../input/pneumoniamnist/pneumoniamnist.npz" #change to pneumoniamnist/pneumoniamnist.npz
X_train = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
data = X_train['train_images'].astype(np.float64)
print('Hi')
x = []
for img in data:
    x.append(np.dstack((img,img,img))) # for pneumoniamnist
x = np.array(x)
label_data = X_train['train_labels']
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)    
print(y.shape)
#y = to_categorical(np.array(X_train["train_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_train = img

class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        
        if self.transform:
            x = self.transform(x)
        
        return x, y
    
    def __len__(self):
        return len(self.data)
    
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.225, 0.225, 0.225)),])
cropped_dataset = MyDataset(X_train, y, transform=transform)
print(len(cropped_dataset))

train_dataloader = torch.utils.data.DataLoader(cropped_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

def imshow(inp, title=None):
    
    inp = inp.cpu() if device else inp
    inp = inp.numpy().transpose((1, 2, 0))
    
    mean = np.array([0.5, 0.5, 0.5])
    std = np.array([0.225, 0.225, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)
    
images, labels = next(iter(train_dataloader)) 
print("images-size:", images.shape)

out = make_grid(images)
print("out-size:", out.shape)

#imshow(out, title=[cropped_dataset.classes[x] for x in labels])

DATA_DIR = "../input/pneumoniamnist/pneumoniamnist.npz"
X_test = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
print(f"Data type: {type(X_test)}")
data = X_test["test_images"].astype(np.float64)
val_data = X_test["val_images"].astype(np.float64)
x = []
for img in data:
    x.append(np.dstack((img,img,img)))
x = np.array(x)
x_val = []
for img in val_data:
    x_val.append(np.dstack((img,img,img)))
x_val = np.array(x_val)
label_data = np.array(X_test["test_labels"])
val_label = np.array(X_test["val_labels"])
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)
y_val = []
for label in val_label:
    y_val.append(label[0])
y_val = np.array(y_val)
print(y.shape)
#y = to_categorical(np.array(X_test["test_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_test = img
X_val = (x_val*255).astype(np.uint8)

test_dataset = MyDataset(X_test, y, transform=transform)
val_dataset =  MyDataset(X_val, y_val, transform=transform)
print(len(test_dataset))
print(len(val_dataset))

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

model=models.vgg16(pretrained=True)
model.to(device)
model

# change the number of classes 
model.classifier[6].out_features = 2

# freeze convolution weights
for param in model.features.parameters():
    param.requires_grad = False

# optimizer
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001,weight_decay=1e-4)

# loss function
criterion = nn.CrossEntropyLoss()

"""In the validate() method, we are calculating the loss and accuracy. But we are not backpropagating the gradients. Backpropagation is only required during training."""
# Training and Validation Functions
# validation function
def validate(model, test_dataloader):
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    
    for int, data in enumerate(test_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        output = model.forward(data)
        loss = criterion(output, target)
        
        val_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        val_running_correct += (preds == target).sum().item()
    
    val_loss = val_running_loss/len(test_dataloader.dataset)
    val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)

    print(f'Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.2f}')
    
    return val_loss, val_accuracy

# we will define the fit() method for training.
# training function
def fit(model, train_dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    
    for i, data in enumerate(train_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        output = model.forward(data)
        loss = criterion(output, target)

        train_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        train_running_correct += (preds == target).sum().item()

        loss.backward()
        optimizer.step()
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)

    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
    
    return train_loss, train_accuracy

import torch.optim as optim
import time
"""Let’s train the model for 10 epochs. For each epoch, we will call the fit() and validate() method."""
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []

start = time.time()

for epoch in range(10):
    train_epoch_loss, train_epoch_accuracy = fit(model, train_dataloader)
    val_epoch_loss, val_epoch_accuracy = validate(model, val_dataloader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    
end = time.time()
print((end-start)/60, 'minutes')

# Visualizing the Plots
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validataion accuracy')
plt.legend()
plt.savefig('accuracy.png')
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validataion loss')
plt.legend()
plt.savefig('loss.png')
plt.show()

def evaluate(model,dataLoader):
    
    predicted_labels_list=list()
    actual_labels_list=list()
    
    with no_grad(): #turning off gradient calculation
        model.eval() #setting model to evaluation mode,i.e,all dropout will be deactivated
        
        test_loss_acm=0
        accuracy_acm=0

        for i,data in enumerate(dataLoader):
            images, labels = data[0].to(device), data[1].to(device)
            output=model.forward(images)
            test_loss=criterion(output,labels)
            result=nn.functional.softmax(output,dim=1)
            test_loss_acm+=test_loss.item()
            result_labels=torch.argmax(result,dim=1)
            bools=(result_labels==labels)
            accuracy=bools.sum().type(torch.float)/len(bools)
            accuracy_acm+=accuracy
            
            predicted_labels_list.extend([x.item() for x in result_labels])
            actual_labels_list.extend([x.item() for x in labels])
            
        #setting model back to train mode
        model.train()

        avg_test_loss=test_loss_acm/len(dataLoader)
        avg_test_acc=accuracy_acm/len(dataLoader)
        
    return (avg_test_loss,avg_test_acc,predicted_labels_list,actual_labels_list)

from torch import optim,no_grad
avg_test_loss,avg_test_acc,predicted_label,actual_label=evaluate(model,test_dataloader)
print("Test Loss:{:.3f}".format(avg_test_loss,end="  "))
print("Test Accuracy:{:.2f}%".format(avg_test_acc*100,end=" "))

from sklearn.metrics import classification_report,confusion_matrix
import seaborn as sns
cnfn_mat=confusion_matrix(actual_label,predicted_label)
sns.heatmap(cnfn_mat,annot=cnfn_mat,fmt='',xticklabels=['0','1'],yticklabels=['0','1'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

report=pd.DataFrame.from_dict(classification_report(actual_label,predicted_label,output_dict=True)).T
report=report[['f1-score','precision','recall','support']]
report

In [None]:
#vgg16 for bloodmnist
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import make_grid
from IPython.display import Image
from keras.utils.np_utils import to_categorical 
from torch import optim,no_grad
from torch.utils.data import DataLoader
from torchvision import models
from torch.nn import Dropout,ReLU,Linear,Sequential,LogSoftmax,Softmax
import pickle
from tqdm import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

# check GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

import torch.optim as optim
import time

DATA_DIR = "../input/bloodmnist/bloodmnist.npz"
X_train = np.load(DATA_DIR)
#print(f"Shape of training data: {X_train.shape}")
print(f"Data type: {type(X_train)}")
data = X_train["train_images"].astype(np.float64)
test_data = X_train["test_images"].astype(np.float64)
val_data = X_train["val_images"].astype(np.float64)
x = np.array(data)
x_test = np.array(test_data)
x_val = np.array(val_data)
label_data = X_train["train_labels"]
test_label = X_train["test_labels"]
val_label = X_train["val_labels"]
y = []
for label in label_data:
    y.append(label[0])
y = np.array(y)
y_test = []
for label in test_label:
    y_test.append(label[0])
y_test = np.array(y_test) 
y_val = []
for label in val_label:
    y_val.append(label[0])
y_val = np.array(y_val) 
print(y.shape)
#y = to_categorical(np.array(X_train["train_labels"]),num_classes=2)
data = x
data = 255 * data
img = data.astype(np.uint8)
X_train = img
X_val = (x_val*255).astype(np.uint8)
X_test  = (x_test*255).astype(np.uint8)
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        
        if self.transform:
            x = self.transform(x)
        
        return x, y
    
    def __len__(self):
        return len(self.data)
    
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])
y1_train = y
y1_test = y_test
y1_val = y_val

cropped_dataset = MyDataset(X_train, y, transform=transform)
test_dataset = MyDataset(X_test, y_test, transform=transform)
val_dataset =  MyDataset(X_val, y_val, transform=transform)

train_dataloader = torch.utils.data.DataLoader(cropped_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=128,
                                         shuffle=True, num_workers=2, pin_memory=True)

model=models.vgg16(pretrained=True)

model.to(device)
model

# change the number of classes 
model.classifier[6].out_features = 8

# freeze convolution weights
for param in model.features.parameters():
    param.requires_grad = False

model

# optimizer
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

# loss function
criterion = nn.CrossEntropyLoss()

"""In the validate() method, we are calculating the loss and accuracy. But we are not backpropagating the gradients. Backpropagation is only required during training."""
# Training and Validation Functions
# validation function
def validate(model, test_dataloader):
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    
    for int, data in enumerate(test_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        output = model.forward(data)
        loss = criterion(output, target)
        
        val_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        val_running_correct += (preds == target).sum().item()
    
    val_loss = val_running_loss/len(test_dataloader.dataset)
    val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)

    print(f'Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.2f}')
    
    return val_loss, val_accuracy

# we will define the fit() method for training.
# training function
def fit(model, train_dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    
    for i, data in enumerate(train_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        output = model.forward(data)
        loss = criterion(output, target)

        train_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        train_running_correct += (preds == target).sum().item()

        loss.backward()
        optimizer.step()
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)

    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
    
    return train_loss, train_accuracy

import torch.optim as optim
import time
"""Let’s train the model for 10 epochs. For each epoch, we will call the fit() and validate() method."""
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []

start = time.time()

for epoch in range(10):
    train_epoch_loss, train_epoch_accuracy = fit(model, train_dataloader)
    val_epoch_loss, val_epoch_accuracy = validate(model, val_dataloader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    
end = time.time()
print((end-start)/60, 'minutes')

# Visualizing the Plots
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validataion accuracy')
plt.legend()
plt.savefig('accuracy.png')
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validataion loss')
plt.legend()
plt.savefig('loss.png')
plt.show()

def evaluate(model,dataLoader):
    
    predicted_labels_list=list()
    actual_labels_list=list()
    
    with no_grad(): #turning off gradient calculation
        model.eval() #setting model to evaluation mode,i.e,all dropout will be deactivated
        
        test_loss_acm=0
        accuracy_acm=0

        for i,data in enumerate(dataLoader):
            images, labels = data[0].to(device), data[1].to(device)
            output=model.forward(images)
            test_loss=criterion(output,labels)
            result=nn.functional.softmax(output,dim=1)
            test_loss_acm+=test_loss.item()
            result_labels=torch.argmax(result,dim=1)
            bools=(result_labels==labels)
            accuracy=bools.sum().type(torch.float)/len(bools)
            accuracy_acm+=accuracy
            
            predicted_labels_list.extend([x.item() for x in result_labels])
            actual_labels_list.extend([x.item() for x in labels])
            
        #setting model back to train mode
        model.train()

        avg_test_loss=test_loss_acm/len(dataLoader)
        avg_test_acc=accuracy_acm/len(dataLoader)
        
    return (avg_test_loss,avg_test_acc,predicted_labels_list,actual_labels_list)

avg_test_loss,avg_test_acc,predicted_label,actual_label=evaluate(model,test_dataloader)
print("Test Loss:{:.3f}".format(avg_test_loss,end="  "))
print("Test Accuracy:{:.2f}%".format(avg_test_acc*100,end=" "))

cnfn_mat=confusion_matrix(actual_label,predicted_label)
sns.heatmap(cnfn_mat,annot=cnfn_mat,fmt='',xticklabels=['0','1','2','3','4','5','6','7'],yticklabels=['0','1','2','3','4','5','6','7'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

report=pd.DataFrame.from_dict(classification_report(actual_label,predicted_label,output_dict=True)).T
report=report[['f1-score','precision','recall','support']]
report

In [None]:
#FLDA
"""
Fisher Linear Discriminant 

Returns the labels for the test dataset from the Fischer Linear Discriminant using the training data
"""
def FisherLD(trainX, trainY, nClasses, ndim, dim) :
    classMean = []
    for i in range(nClasses):
        classMean.append(np.mean(trainX[trainY.T[0] == i], axis = 0))
    mean = np.mean(trainX, axis=0)
    
    SW = np.zeros((ndim, ndim))
    for cl,mv in zip(range(nClasses), classMean):
        class_sc_mat = np.zeros((ndim,ndim))                  # scatter matrix for every class
        for row in trainX[trainY.T[0] == i]:
            row, mv = row.reshape(ndim,1), mv.reshape(ndim,1) # make column vectors
            class_sc_mat += (row-mv).dot((row-mv).T)
        SW += class_sc_mat
            
    SB = np.zeros((ndim, ndim))
    for i,mean_vec in enumerate(classMean):  
        n = trainX[trainY.T[0] == i, :].shape[0]
        mean_vec = mean_vec.reshape(ndim,1) # make column vector
        mean = mean.reshape(ndim,1) # make column vector
        SB += n * (mean_vec - mean).dot((mean_vec - mean).T)
    
    return np.linalg.pinv(SW).dot(SB)

def Pred_FisherLD(testX, W, nClasses) :
    probDensR = np.zeros((testX.shape[0], nClasses))
    for i in range(testX.shape[0]):
        point = np.array([testX[i][j] for j in range(testX.shape[1])])
        probDensR[i] = W.dot(point)
    return probDensR

W = FisherLD(pnuTrainX, pnuTrainY, len(pnuClasses), len(pnuTrainX[0]), 2)

FLDProbDensR = Pred_FisherLD(pnuTestX, W, 2)

pnuFLDTestR = FLDProbDensR.argmax(axis = 1)

from __future__ import print_function, division

class LDA():
    def __init__(self):
        self.w = None

    def transform(self, X, y):
        self.fit(X[y == 0], X[y == 1], y)
        # Project data onto vector
        X_transform = X.dot(self.w)
        return X_transform
    
    def fit(self, X1, X2, y):
        # Separate data by class
        #X1 = X[y == 0]
        #X2 = X[y == 1]

        # Calculate the covariance matrices of the two datasets
        mean1 = X1.mean(0)
        mean2 = X2.mean(0)
        
        cov1 = np.zeros((X1.shape[1], X1.shape[1]))
        for i in X1:
            cov1 += (i - mean1).T.dot((i - mean2))
        
        cov2 = np.zeros((X2.shape[1], X2.shape[1]))
        for i in X2:
            cov1 += (i - mean2).T.dot((i - mean2))
            
        cov_tot = cov1 + cov2
        # Calculate the mean of the two datasets
        mean1 = X1.mean(0)
        mean2 = X2.mean(0)
        mean_diff = np.atleast_1d(mean1 - mean2)
        # Determine the vector which when X is projected onto it best separates the
        # data by class. w = (mean1 - mean2) / (cov1 + cov2)
        self.w = np.linalg.pinv(cov_tot).dot(mean_diff)

    def predict(self, X):
        y_pred = []
        for sample in X:
            h = sample.dot(self.w)
            y = 1 * (h < 0)
            y_pred.append(y)
        return y_pred

y = pnuTrainY.T[0]
X1 = pnuTrainX[y == 0]
X2 = pnuTrainX[y == 1]

lda = LDA()

lda.fit(X1, X2, y)

pnuFLDTestR = lda.predict(pnuTestX)

FLDCM = confMatrix(pnuTestY, pnuFLDTestR, pnuClasses)
print("pnu FLD Accuracy:", accuracy(FLDCM))
print("pnu FLD F1 Score:", f1Score(FLDCM))
print("pnu FLD AUC Score:", roc_auc_score(pnuFLDTestR, pnuTestY))

In [None]:
#LSTM
# Commented out IPython magic to ensure Python compatibility.
import IPython.display as ipd  # To play sound in the notebook
from scipy.io import wavfile # for reading wave files as numpy arrays
import wave # opening .wav files
import struct # for padding
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # visualizations
# %matplotlib inline
import os # operating system
from os.path import join
import time

import os

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

RATE = 16000
data_dir = "../input/darpa-timit-acousticphonetic-continuous-speech/data"
train_csv_file = "../input/darpa-timit-acousticphonetic-continuous-speech/train_data.csv"
test_csv_file = "../input/darpa-timit-acousticphonetic-continuous-speech/test_data.csv"

def get_good_audio_files(filename):
    df = pd.read_csv(filename)
    return df[df['is_converted_audio'] == True]

def parse_phn_timestamps(wrd_path, verbose=False):
    print('phn_path', wrd_path) if verbose else None
    speaker_id = wrd_path.split('/')[-2]
    sentence_id = wrd_path.split('/')[-1].replace('.PHN', '')
    wrd_file = open(wrd_path)
    content = wrd_file.read()
    content = content.split('\n')
    content = [tuple(foo.split(' ') + [speaker_id, sentence_id]) for foo in content if foo != '']
    wrd_file.close()
    return content

def read_audio(wave_path, verbose=False):
    rate, data = wavfile.read(wave_path)
    # make sure the rate of the file is the RATE that we want
    assert rate == RATE
    print("Sampling (frame) rate = ", rate) if verbose else None
    print("Total samples (frames) = ", data.shape) if verbose else None
    return data

def join_dirs(row):
    return os.path.join(data_dir,
                       row['test_or_train'],
                       row['dialect_region'],
                       row['speaker_id'],
                       row['filename'])

def parse_word_waves(time_aligned_words, audio_data, verbose=False):
    return [align_data(data, words, verbose) for data, words in zip(audio_data, time_aligned_words)]
    
def align_data(data, words, verbose=False):
    aligned = []
    print('len(data)', len(data)) if verbose else None
    print('len(words)', len(words)) if verbose else None
    print('data', data) if verbose else None
    print('words', words) if verbose else None
    for tup in words[1:-1]:
        print('tup',tup) if verbose else None
        start = int(tup[0])
        end = int(tup[1])
        word = tup[2]
        assert start >= 0
        assert end <= len(data)
        aligned.append( (data[start:end], word))
    return aligned

def make_data_set(train_csv_file):
    train_csv = get_good_audio_files(train_csv_file)
    train_csv['filepath'] = train_csv.apply(lambda row: join_dirs(row), axis=1)
    waves = train_csv['filepath']
    audio_data = [read_audio(wave) for wave in waves]
    wrds = [wave.replace('.WAV.wav', '') + '.PHN' for wave in waves]
    word_data = [parse_phn_timestamps(wrd) for wrd in wrds]
    train_data = [align_data(audio, wrd) for audio, wrd in zip(audio_data, word_data)]
    train_data = [item for sublist in train_data for item in sublist]
    return train_data

def add_padding(data, length) :
    padded_data = []
    for row in data :
        x1 = np.zeros(length)
        if row[0].shape[0] > length:
            x1 = row[0][:length]
        else:
            x1[:row[0].shape[0]] = row[0]
        padded_data.append((x1, row[1]))
    return padded_data

train_data = make_data_set(train_csv_file)
test_data = make_data_set(test_csv_file)

vowels = ["iy", "ih", "eh", "ey", "ae", "aa", "aw", "ay", "ah", "ao", "oy", "ow", "uh", "uw", "ux", "er", "ax", "ix", "axr", "ax-h"]

train_data = add_padding(train_data, 100)
test_data = add_padding(test_data, 100)

trainX = np.array([i[0] for i in train_data])
trainY = np.array([int(i[1] in vowels) for i in train_data])

testX = np.array([i[0] for i in test_data])
testY = np.array([int(i[1] in vowels) for i in test_data])

trainX = torch.from_numpy(trainX).type(torch.Tensor)
trainY = torch.from_numpy(trainY).type(torch.LongTensor)

testX = torch.from_numpy(testX).type(torch.Tensor)
testY = torch.from_numpy(testY).type(torch.LongTensor)

trainX = torch.reshape(trainX,   (trainX.shape[0], trainX.shape[1], 1))
trainX = torch.reshape(testX,  (testX.shape[0], testX.shape[1], 1))

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=1, num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # setup LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)

        # setup output layer
        self.linear = nn.Linear(self.hidden_dim, output_dim)

    def forward(self, input, hidden=None):
        # lstm step => then ONLY take the sequence's final timetep to pass into the linear/dense layer
        # Note: lstm_out contains outputs for every step of the sequence we are looping over (for BPTT)
        # but we just need the output of the last step of the sequence, aka lstm_out[-1]
        lstm_out, hidden = self.lstm(input, hidden)
        logits = self.linear(lstm_out[-1])              # equivalent to return_sequences=False from Keras
        genre_scores = F.log_softmax(logits, dim=1)
        return genre_scores, hidden

    def get_accuracy(self, logits, target):
        """ compute accuracy for training round """
        corrects = (
                torch.max(logits, 1)[1].view(target.size()).data == target.data
        ).sum()
        accuracy = 100.0 * corrects / self.batch_size
        return accuracy.item()

num_epochs = 10

# Define model
print("Build LSTM RNN model ...")
model = LSTM(input_dim=1, hidden_dim=10, output_dim=1, num_layers=2)
loss_function = nn.NLLLoss()  # expects ouputs from LogSoftmax

optimizer = optim.Adam(model.parameters(), lr=0.001)

# To keep LSTM stateful between batches, you can set stateful = True, which is not suggested for training
stateful = False

train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("\nTraining on GPU")
else:
    print("\nNo GPU, training on CPU")

print("Training ...")
for epoch in range(1):

    train_running_loss, train_acc = 0.0, 0.0

    # Init hidden state - if you don't want a stateful LSTM (between epochs)
    hidden_state = None
    # zero out gradient, so they don't accumulate btw batches
    model.zero_grad()

    y_pred, hidden_state = model(trainX, hidden_state)  # forward pass

    # Stateful = False for training. Do we go Stateful = True during inference/prediction time?
    if not stateful:
        hidden_state = None
    else:
        h_0, c_0 = hidden_state
        h_0.detach_(), c_0.detach_()
        hidden_state = (h_0, c_0)
    
    print(y_pred.shape)
#     loss = loss_function(y_pred, trainY)  # compute loss
#     loss.backward()  # backward pass
#     optimizer.step()  # parameter update

#     train_running_loss += loss.detach().item()  # unpacks the tensor into a scalar value
#     train_acc += model.get_accuracy(y_pred, trainY)

#     print(
#         "Epoch:  %d | NLLoss: %.4f | Train Accuracy: %.2f"
#         % (epoch, train_running_loss / num_batches, train_acc / num_batches)
#     )

# # visualization loss
# plt.plot(epoch_list, val_loss_list)
# plt.xlabel("# of epochs")
# plt.ylabel("Loss")
# plt.title("LSTM: Loss vs # epochs")
# plt.show()

# # visualization accuracy
# plt.plot(epoch_list, val_accuracy_list, color="red")
# plt.xlabel("# of epochs")
# plt.ylabel("Accuracy")
# plt.title("LSTM: Accuracy vs # epochs")
# # plt.savefig('graph.png')
# plt.show()