In [1]:
import os
import cv2
import pickle
import numpy as np
import pdb
import requests
from collections import defaultdict
import random 
import time

from sklearn import tree
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from tqdm import *

import matplotlib.pyplot as plt
from functools import wraps
from time import time as _timenow 
from sys import stderr


## Load CIFAR-10 Data

In [2]:
def load_cifar():
    
    trn_data, trn_labels, tst_data, tst_labels = [], [], [], []
    def unpickle(file):
        with open(file, 'rb') as fo:
            data = pickle.load(fo, encoding ='latin1')
        return data
    
    for i in trange(1):
        batchName = './data/data_batch_{0}'.format(i + 1)
        unpickled = unpickle(batchName)
        trn_data.extend(unpickled['data'])
        trn_labels.extend(unpickled['labels'])
    unpickled = unpickle('./data/test_batch')
    tst_data.extend(unpickled['data'])
    tst_labels.extend(unpickled['labels'])
    return trn_data, trn_labels, tst_data, tst_labels


## Image preprocessing

In [3]:
def image_prep(train, test):
    ''' pre-processes the given image
        performs mean normalization and other such operations'''
    scaler = preprocessing.StandardScaler().fit(train)
    test_data = scaler.transform(test)
    train_data = scaler.transform(train)
    return train_data, test_data

## Dimensionality reduction using PCA

In [4]:
def reduce_dim(**kwargs):
    ''' performs dimensionality reduction'''
    if kwargs['method'] == 'pca':
        pca = PCA(n_components = kwargs['numbercomponents'])
        pca = pca.fit(kwargs['train'])
        train_new = pca.transform(kwargs['train'])
        test_new = pca.transform(kwargs['test'])
        return train_new, test_new
    elif kwargs['method'] == 'lda':
        c = LinearDiscriminantAnalysis(numbercomponents = 600)
        c = c.fit(kwargs['train'], kwargs['train_label'])
        train_new = c.transform(kwargs['train'])
        test_new = c.transform(kwargs['test'])
        return train_new, test_new
        

## Classification using kernel SVM

In [5]:
def classify(X, Y, **kwargs):
    ''' trains a classifier by taking input features
        and their respective targets and returns the trained model'''
    if kwargs['method'] == 'SVM':
        c = LinearSVC()
        c.fit(X, Y)
        return c

## Evaluation 

In [6]:
def evaluate(target, predicted):
    f1 = f1_score(target, predicted, average='micro')
    acc = accuracy_score(target, predicted)
    return f1, acc

In [7]:
def test(**kwargs):
    '''takes test data and trained classifier model,
    performs classification and prints accuracy and f1-score'''
    if kwargs['method'] == 'CART':
        output = kwargs['model'].predict(kwargs['test'])
    if kwargs['method'] == 'SVM':
        output = kwargs['model'].predict(kwargs['test'])
    return output

In [8]:
def find_pca_components_svm():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels,test_size = 0.20) 
    acc = []
    xaxis = []
    i = 50
    while i <= 250:
        trn_data, tst_data = image_prep(X_train, X_test)
        trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, method ='pca', numbercomponents = i)
        model = classify(trn_data, Y_train, method ='SVM')
        output = test(test = tst_data, model = model, method ='SVM')
        f_score, accuracy = evaluate(Y_test, output)
        print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))
        acc.append(accuracy)
        xaxis.append(i)
        i += 10
        
    plt.plot(xaxis, acc)
    plt.xlabel('No. of components for PCA')
    plt.ylabel('Accuracy')
    plt.show()
    return acc, xaxis
    
    ''' perform dimesioality reduction/feature extraction and classify the features into one of 10 classses
        print accuracy and f1-score.
        '''

In [9]:
# SVM
def linear_svm():
    trn_data, trn_labels, tst_data, tst_labels = load_cifar()
    X_train, X_test, Y_train, Y_test = train_test_split(trn_data, trn_labels, test_size = 0.20)
    trn_data, tst_data = image_prep(X_train, tst_data)
    trn_data, tst_data = reduce_dim(train = trn_data, test = tst_data, train_label = Y_train, method = 'pca', numbercomponents = 100)
    model = classify(trn_data, Y_train, method = 'SVM')
    output = test(test = tst_data, model = model, method = 'SVM')
    f_score, accuracy = evaluate(tst_labels, output)
    print('Val - F1 score: {}\n Accuracy: {}'.format(f_score, accuracy))

In [10]:
if __name__ == '__main__':
    
#     find_pca_components()
#     find_min_samples_split()
#     find_n_estimators()
#     final_pca()
#     final_lda()
    linear_svm()
    

100%|██████████| 1/1 [00:00<00:00, 18.03it/s]


Val - F1 score: 0.2445
 Accuracy: 0.2445


