In [1]:
import os
import cv2
import pickle
import numpy as np
import pdb
import requests
from collections import defaultdict
import random 
import time

from sklearn import tree
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from tqdm import *

import matplotlib.pyplot as plt
from functools import wraps
from time import time as _timenow 
from sys import stderr


## Load CIFAR-10 Data

In [2]:
def load_cifar():
    
    trn_data, trn_labels, tst_data, tst_labels = [], [], [], []
    def unpickle(file):
        with open(file, 'rb') as fo:
            data = pickle.load(fo, encoding ='latin1')
        return data
    
    for i in trange(1):
        batchName = './data/data_batch_{0}'.format(i + 1)
        unpickled = unpickle(batchName)
        trn_data.extend(unpickled['data'])
        trn_labels.extend(unpickled['labels'])
    unpickled = unpickle('./data/test_batch')
    tst_data.extend(unpickled['data'])
    tst_labels.extend(unpickled['labels'])
    return trn_data, trn_labels, tst_data, tst_labels


## Image preprocessing

In [3]:
def image_prep(train, test):
    ''' pre-processes the given image
        performs mean normalization and other such operations'''
    scaler = preprocessing.StandardScaler().fit(train)
    test_data = scaler.transform(test)
    train_data = scaler.transform(train)
    return train_data, test_data

In [4]:
def unpickle(file):
    with open(file, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    return data

## Dimensionality reduction using PCA

In [None]:
def reduce_dim(**kwargs):
    ''' performs dimensionality reduction'''
    if kwargs['method'] == 'pca':
        pca = PCA(n_components = kwargs['numbercomponents'])
        pca = pca.fit(kwargs['train'])
        train_new = pca.transform(kwargs['train'])
        test_new = pca.transform(kwargs['test'])
        return train_new, test_new
    elif kwargs['method'] == 'lda':
        c = LinearDiscriminantAnalysis(numbercomponents = 600)
        c = c.fit(kwargs['train'], kwargs['train_label'])
        train_new = c.transform(kwargs['train'])
        test_new = c.transform(kwargs['test'])
        return train_new, test_new
        

## Classification using kernel SVM

In [None]:
def classify_mlp(X, Y, hidden_layer1, hidden_layer2, hidden_layer3, i, **kwargs):
    ''' trains a classifier by taking input features
        and their respective targets and returns the trained model'''
    c = MLPClassifier(hidden_layer_sizes = (hidden_layer1, hidden_layer2, hidden_layer3), max_iter = i)
    c.fit(X, Y)
    return c
        


## Evaluation 

In [None]:
def evaluate(target, predicted):
    f1 = f1_score(target, predicted, average='micro')
    acc = accuracy_score(target, predicted)
    return f1, acc

In [None]:
def test(**kwargs):
    '''takes test data and trained classifier model,
    performs classification and prints accuracy and f1-score'''
    if kwargs['method'] == 'MLP':
        if kwargs['method'] == 'MLP':
            mlp = kwargs['model']
            X_test = kwargs['test']
            o = mlp.predict(X_test)
        return o

In [None]:
def number_hidden_layers():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)  
    acc = []
    x_axis = []
    
    
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dimensions(train = trn_data, test = tst_data, method = 'pca', numbercomponents = 90)
    model = MLPClassifier(hidden_layer_sizes = (100), max_iter = 5000)  
    model.fit(trn_data, Y_train)
    output = test(test = tst_data, model = model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
    acc.append(accuracy)
    x_axis.append(1)
    
    
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dimensions(train = trn_data, test = tst_data, method = 'pca', numbercomponents = 90)
    model = MLPClassifier(hidden_layer_sizes = (100, 100), max_iter = 5000)  
    model.fit(trn_data, Y_train)
    output = test(test = tst_data, model = model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
    acc.append(accuracy)
    x_axis.append(2)
    
    
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dimensions(train = trn_data, test = tst_data, method = 'pca', numbercomponents = 90)
    model = MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter = 5000)  
    model.fit(trn_data, Y_train)
    output = test(test = tst_data, model = model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
    acc.append(accuracy)
    x_axis.append(3)
    
    
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dimensions(train = trn_data, test = tst_data, method = 'pca', numbercomponents = 90)
    model = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100), max_iter = 5000)  
    model.fit(trn_data, Y_train)
    output = test(test = tst_data, model = model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
    acc.append(accuracy)
    x_axis.append(4)
    
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dimensions(train = trn_data, test = tst_data, method = 'pca', numbercomponents = 90)
    model = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100, 100), max_iter = 5000)  
    model.fit(trn_data, Y_train)
    output = test(test = tst_data, model = model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
    acc.append(accuracy)
    x_axis.append(5)
    
    plt.plot(xaxis, acc)
    plt.xlabel('No. of components for PCA')
    plt.ylabel('Accuracy')
    plt.show()
    
    return acc_arr, x_axis

In [None]:
def find_pca_components():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels,test_size = 0.20) 
    acc = []
    xaxis = []
    i = 50
    while i <= 150:
        trn_data, tst_data = image_prep(X_train, X_test)
        trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, method ='pca', numbercomponents = i)
        model = classify_mlp(trn_data, Y_train, 300, 300, 300, 5000, method = 'MLP')
        output = test(test = tst_data, model = model, method ='MLP')
        f_score, accuracy = evaluate(Y_test, output)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        acc.append(accuracy)
        xaxis.append(i)
        i += 10
        
    plt.plot(xaxis, acc)
    plt.xlabel('No. of components for PCA')
    plt.ylabel('Accuracy')
    plt.show()
    return acc, xaxis
    
    ''' perform dimesioality reduction/feature extraction and classify the features into one of 10 classses
        print accuracy and f1-score.
        '''


In [None]:
def max_no_iter():
    i = 1
    acc= []
    x_axis = []
    while(i < 6):
        trn_data, trn_labels, tst_data, tst_labels = load_cifar()
        X_train, X_test, y_train, y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)  
        trn_data, tst_data = image_prep(X_train, X_test)
        trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, method ='pca', numbercomponents = 100)
        model = classify_mlp(trn_data, y_train, 200, 200, 200, i*1000, method = 'MLP')
        output = test(test = tst_data, model= model, method = 'MLP')
        f_score, accuracy = evaluate(y_test, output)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        
        acc.append(accuracy)
        x_axis.append(i*1000)
        i +=1
    plt.plot(x,acc)
    plt.xlabel('No. of max iterations') 
    plt.ylabel('Accuracy') 
    plt.show()
    return acc_arr, x_axis

In [None]:
# varying number of nodes in each hidden layer 1000 to 4000

def number_nodes():
    i = 1
    acc = []
    x_axis = []
    while(i<5):
        trn_data, trn_labels, tst_data, tst_labels = load_cifar()
        X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)  
        trn_data, tst_data = image_prep(X_train, X_test)
        trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, method = 'pca', no_of_comp = 90)
        model = classify_mlp(trn_data, Y_train, 1000*i, 1000*i, 1000*i, 5000, method = 'MLP')
        output = test(test = tst_data, model= model, method='MLP')
        f_score, accuracy = evaluate(Y_test, output)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        acc.append(accuracy)
        x_axis.append(i*1000)
        i +=1
    plt.plot(x,acc)
    plt.xlabel('No. nodes in a hidden layer') 
    plt.ylabel('Accuracy') 
    plt.show()
    return acc_arr, x_axis

In [None]:
def final_mlp_pca():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, method = 'pca', numbercomponents = 80)
    model = classify_mlp(trn_data, Y_train, 2000, 2000, 2000, 5000, method = 'MLP')
    output = test(test = tst_data, model= model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))

In [None]:
def final_mlp_lda():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)
    trn_data, tst_data = image_prep(X_train, X_test)
    trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, train_label = Y_train, method = 'lda')
    model = classify_mlp(trn_data, Y_train, 2000, 2000, 2000, 5000, method = 'MLP')
    output = test(test = tst_data, model= model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))

In [None]:
def final_mlp_raw():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)
    trn_data, tst_data = image_prep(X_train, X_test)
#     trn_data, tst_data = reduce_dimensions(train = trn_data, test = tst_data, train_label = Y_train, method = 'lda')
    model = classify_mlp(trn_data, Y_train, 2000, 2000, 2000, 5000, method = 'MLP')
    output = test(test = tst_data, model= model, method = 'MLP')
    f_score, accuracy = evaluate(Y_test, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))

In [None]:
if __name__ == '__main__':
    
#     find_pca_components()
#     find_min_samples_split()
#     find_n_estimators()
#     final_pca()
#     final_lda()
#     find_pca_components_svm()
    final_mlp_pca()
    final_mlp_lda()
    final_mlp_raw()

    

100%|██████████| 1/1 [00:00<00:00,  5.85it/s]
