In [26]:
import os
import cv2
import pickle
import numpy as np
import pdb
import requests
from collections import defaultdict
import random 
import time
import matplotlib.pyplot as plt 

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import *

from functools import wraps
from time import time as _timenow 
from sys import stderr


## Load CIFAR-10 Data

In [27]:
def load_cifar():
    
    trn_data, trn_labels, tst_data, tst_labels = [], [], [], []
    def unpickle(file):
        with open(file, 'rb') as fo:
            data = pickle.load(fo, encoding='latin1')
        return data
    
    for i in trange(1):
        batchName = './data/data_batch_{0}'.format(i + 1)
        unpickled = unpickle(batchName)
        trn_data.extend(unpickled['data'])
        trn_labels.extend(unpickled['labels'])
        print("Data Read")
    unpickled = unpickle('./data/test_batch')
    tst_data.extend(unpickled['data'])
    tst_labels.extend(unpickled['labels'])
    return trn_data, trn_labels, tst_data, tst_labels

## Image preprocessing

In [28]:
def image_prep(train, test):
    ''' pre-processes the given image
        performs mean normalization and other such operations'''
    scaler = preprocessing.StandardScaler().fit(train)
    train_data = scaler.transform(train)
    test_data = scaler.transform(test)
    
    return train_data, test_data

## Dimensionality reduction using PCA

In [29]:
def reduce_dim(**kwargs):
    ''' performs dimensionality reduction'''
    if kwargs['method'] == 'lda':
        clf = LinearDiscriminantAnalysis(n_components=100)
        clf = clf.fit(kwargs['train'], kwargs['train_label'])
        train_new = clf.transform(kwargs['train'])
        test_new = clf.transform(kwargs['test'])
        return train_new, test_new
    if kwargs['method'] == 'pca':
        pca = PCA(n_components=100,whiten=True)
        pca = pca.fit(kwargs['train'])
        train_new = pca.transform(kwargs['train'])
        test_new = pca.transform(kwargs['test'])
        return train_new, test_new

## Classification using kernel SVM

In [30]:
def classify(X, y, **kwargs):
    ''' trains a classifier by taking input features
        and their respective targets and returns the trained model'''
    if kwargs['method'] == 'SVM':
        pass

## Evaluation 

In [31]:
def evaluate(target, predicted):
    f1 = f1_score(target, predicted, average='micro')
    acc = accuracy_score(target, predicted)
    return f1, acc

In [32]:
def test(train, test):
    '''takes test data and trained classifier model,
    performs classification and prints accuracy and f1-score'''
    scaler = preprocessing.StandardScaler().fit(train)
    train_data = scaler.transform(train)
    test_data = scaler.transform(test)
    
    return train_data, test_data
    

In [39]:
def param1_search():
    i=90
    a=[]
    ind=[]
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    while(i<900):
        print(i)
        pca = PCA(n_components=i, whiten=True)
        trn_data= np.asarray(trn_data)
        tst_data= np.asarray(tst_data)
        pca = pca.fit(trn_data)
        x_train_pca = pca.transform(trn_data)
        x_test_pca = pca.transform(tst_data)
        
        x_train_pca, x_val , y_train, y_val = train_test_split(x_train_pca, trn_labels,test_size = 0.20)
        svc = SVC(gamma='auto',decision_function_shape='ovr',kernel='rbf')
        svc.fit(x_train_pca,y_train)
        y_pred_svm = svc.predict(x_val)
        
        f_score, accuracy = evaluate(y_val, y_pred_svm)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        a.append(i)
        ind.append(accuracy)
        i=i+100
    plt.plot(a,ind)
    plt.xlabel('No. of PCA Components') 
    plt.ylabel('Accuracy') 
    plt.show() 

In [40]:
def param2_search():
    a=[]
    ind=[]
    i=0
    dic=['poly','sigmoid', 'linear','rbf']
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    trn_data, tst_data = image_prep(trn_data, tst_data)
    trn_data= np.asarray(trn_data)
    tst_data= np.asarray(tst_data)
    pca = PCA(n_components=100, whiten=True)
    pca = pca.fit(trn_data)
    x1_train_pca = pca.transform(trn_data)
    x1_test_pca = pca.transform(tst_data)
         
    while(i<4):
        x_train_pca, x_val , y_train, y_test = train_test_split(x1_train_pca, trn_labels,test_size = 0.20)
        print(dic[i])
        svc = SVC(gamma='auto',decision_function_shape='ovr',kernel=dic[i])
        svc.fit(x_train_pca,y_train)
        y_pred_svm = svc.predict(x_val)
        f_score, accuracy = evaluate(y_pred_svm , y_test)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        a.append(i)
        ind.append(accuracy)
        i=i+1
    
    plt.plot(a,ind)
    plt.xlabel('Kernel used')
    plt.ylabel('Accuracy') 
    plt.show() 
    print(dic)

In [41]:
def param3_search():
    ind=[]
    i=0
    dic=[10000,20000,30000,40000]
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    trn_data, tst_data = image_prep(trn_data, tst_data)
    trn_data= np.asarray(trn_data)
    tst_data= np.asarray(tst_data)
    pca = PCA(n_components=100, whiten=True)
    pca = pca.fit(trn_data)
    x1_train_pca = pca.transform(trn_data)
    x1_test_pca = pca.transform(tst_data)
         
    while(i<4):
        x_train_pca, x_val , y_train, y_test = train_test_split(x1_train_pca, trn_labels,test_size = 0.20)
        print(dic[i])
        svc = SVC(gamma='auto',decision_function_shape='ovr',kernel='rbf',max_iter=dic[i])
        svc.fit(x_train_pca,y_train)
        y_pred_svm = svc.predict(x_val)
        f_score, accuracy = evaluate(y_pred_svm , y_test)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        ind.append(accuracy)
        i=i+1
    
    plt.plot(dic,ind)
    plt.xlabel('Max iterations')
    plt.ylabel('Accuracy') 
    plt.show() 
    print(dic)

In [46]:
def lda_method():    
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    trn_data, tst_data = image_prep(trn_data, tst_data)
    trn_data= np.asarray(trn_data)
    tst_data= np.asarray(tst_data)
    clf = LinearDiscriminantAnalysis(n_components=100)
    clf = clf.fit(trn_data, trn_labels)
    x_train_lda = clf.transform(trn_data)
    x_test_lda = clf.transform(tst_data)
    svc = SVC(gamma='auto',decision_function_shape='ovo',kernel='rbf',max_iter=20000)
    svc.fit(x_train_lda,trn_labels)
    y_pred_svm = svc.predict(x_test_lda)
    f_score, accuracy = evaluate(y_pred_svm , tst_labels)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))

In [47]:
def main():
    print("Search for Parameter 1-PCA")
    param1_search();
    print("Search for Parameter 2-Kernel")
    param2_search();
    print("Search for Parameter 3-Max Iterations")
    param3_search();
    print("Accuracy for LDA method")
    lda_method();
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    trn_data, tst_data = image_prep(trn_data, tst_data)
    trn_data= np.asarray(trn_data)
    tst_data= np.asarray(tst_data)
    pca = PCA(n_components=90, whiten=True)
    pca = pca.fit(trn_data)
    x_train_pca = pca.transform(trn_data)
    x_test_pca = pca.transform(tst_data)  
    svc = SVC(gamma='auto',decision_function_shape='ovo',kernel='rbf',max_iter=20000)
    svc.fit(x_train_pca,trn_labels)
    y_pred_svm = svc.predict(x_test_pca)
    f_score, accuracy = evaluate(y_pred_svm , tst_labels)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))

In [48]:
if __name__ == '__main__':
    main()

100%|██████████| 1/1 [00:00<00:00,  9.64it/s]

Search for Parameter 1-PCA
Search for Parameter 2-Kernel
Search for Parameter 3-Max Iterations
Accuracy for LDA method
Data Read



  0%|          | 0/1 [00:00<?, ?it/s]

Val - F1 score: 0.2565
 Accuracy: 0.2565


100%|██████████| 1/1 [00:00<00:00,  8.98it/s]


Data Read
Val - F1 score: 0.4762
 Accuracy: 0.4762
