In [2]:
from __future__ import division
from __future__ import print_function

import time
import os, glob, sys
import numpy as np
import itertools
import pandas as pd
from sklearn import svm
from sklearn import metrics


# Get the current working directory
cwd = os.getcwd()
mydir = os.path.abspath(os.path.join(cwd, ".."))
sys.path.append(mydir)
from DataGenerator import genTrainValidFolds

from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

# SVM Model
-------

In [3]:
# Linear SVC with rbf and different gamma values
class SVM_model():
    def __init__(self, kernel, c_arr, gamma_arr):
        self.c_arr = c_arr#[0.1, 1.0, 10.0]#, 1000.0]            
        self.gamma_arr = gamma_arr#[0.1, 1, 10.0]
        self.kernel=kernel

    def classify(self, trainData, trainLabels, validData):
        pred_dict = {}
        for (c,gamma) in itertools.product(self.c_arr, self.gamma_arr):
            string = "c" + str(c) + "_" + "gamma" + str(gamma)  
#             print ('Running For: ', string)
            clf = svm.SVC(kernel=self.kernel, C=c, gamma=gamma)
            classifier = clf.fit(trainData,trainLabels)
            pred_dict[string] = clf.predict(validData)
        return pred_dict

In [4]:
# Provide the directory of the batches so that we can generate the sequence one after one.

def buildModel(c_arr, gamma_arr, FeatureDir):
    objSVM = SVM_model(kernel='rbf', c_arr=c_arr, gamma_arr=gamma_arr)
    estimatEach_c_gamma_Dict = {}
    validLabelDict = {}
    
    for foldNUM, (trainData, trainLabels, validData, validLabels, labelDict) in enumerate(genTrainValidFolds(FeatureDir, oneHot=False)):
        if foldNUM==0:
            print ('Validation Data and Labels shape: ', validData.shape, validLabels.shape)
            print ('Training Data and Labels shape: ', trainData.shape, trainLabels.shape)
            print ('The Label Dictionary is given as: ', labelDict)
            
        print ('Cross-Vaidation Fold: ', foldNUM)
        
        estimatEach_c_gamma_Dict[foldNUM] = objSVM.classify(trainData=trainData, 
                                                        trainLabels=trainLabels, 
                                                        validData=validData)
        
        validLabelDict[foldNUM] = validLabels
#         break
    return estimatEach_c_gamma_Dict, validLabelDict 

# Performance Metric
-------

In [5]:

def performanceMetric(c_arr, gamma_arr, estimatEach_c_gamma_Dict, validLabelDict):
    performanceDF = pd.DataFrame(np.NaN, index=["c" + str(c) + "_" + "gamma" + str(gamma) for c, gamma in itertools.product(c_range, gamma_range)],
                                            columns=['avg-numAirplaneClssified','avg-numCatClassified', 'avg-airplaneAccuracy', 'avg-catAccuracy','avg-totAccuracy'])

    for foldNUM, c_gamma_prediction in  estimatEach_c_gamma_Dict.items():
    #     print ('Running for cross validation fold : ', numFold)
    #     print (c_gamma_prediction)
        for c_gamma, estimates in c_gamma_prediction.items():
#             print (c_gamma)
            confusionMatrix = pd.crosstab(validLabelDict[foldNUM], estimates)
#             print (confusionMatrix)
            total = (sum(sum(np.array(confusionMatrix))))

            if pd.isnull(performanceDF.ix[c_gamma, 'avg-numAirplaneClssified']):
                performanceDF.ix[c_gamma, 'avg-numAirplaneClssified'] = confusionMatrix.ix[0,0]
            else:
                performanceDF.ix[c_gamma, 'avg-numAirplaneClssified'] = (performanceDF.ix[c_gamma, 'avg-numAirplaneClssified'] + 
                                                                      confusionMatrix.ix[0,0])

            if pd.isnull(performanceDF.ix[c_gamma, 'avg-numCatClassified']):
                performanceDF.ix[c_gamma, 'avg-numCatClassified'] = confusionMatrix.ix[1,1]
            else:
                performanceDF.ix[c_gamma, 'avg-numCatClassified'] = (performanceDF.ix[c_gamma, 'avg-numCatClassified'] + 
                                                                      confusionMatrix.ix[1,1])


            if pd.isnull(performanceDF.ix[c_gamma, 'avg-airplaneAccuracy']):
                performanceDF.ix[c_gamma, 'avg-airplaneAccuracy'] = confusionMatrix.ix[0,0]/(confusionMatrix.ix[0,0] + confusionMatrix.ix[0,1]) 
            else:
                performanceDF.ix[c_gamma, 'avg-airplaneAccuracy'] = (performanceDF.ix[c_gamma, 'avg-airplaneAccuracy'] + 
                                                                      confusionMatrix.ix[0,0]/(confusionMatrix.ix[0,0] + confusionMatrix.ix[0,1]))

            if pd.isnull(performanceDF.ix[c_gamma, 'avg-catAccuracy']):
                performanceDF.ix[c_gamma, 'avg-catAccuracy'] = confusionMatrix.ix[1,1]/(confusionMatrix.ix[1,0] + confusionMatrix.ix[1,1]) 
            else:
                performanceDF.ix[c_gamma, 'avg-catAccuracy'] = (performanceDF.ix[c_gamma, 'avg-catAccuracy'] + 
                                                                      confusionMatrix.ix[1,1]/(confusionMatrix.ix[1,0] + confusionMatrix.ix[1,1]))


            if pd.isnull(performanceDF.ix[c_gamma, 'avg-totAccuracy']):
                performanceDF.ix[c_gamma, 'avg-totAccuracy'] = (confusionMatrix.ix[0,0] + confusionMatrix.ix[1,1])/total
            else:
                performanceDF.ix[c_gamma, 'avg-totAccuracy'] = (performanceDF.ix[c_gamma, 'avg-totAccuracy'] + 
                                                                      ((confusionMatrix.ix[0,0] + confusionMatrix.ix[1,1])/total))

    return performanceDF/(foldNUM+1)

# performanceDF



          

# Evaluate Models:
------

In [6]:
HOGp1batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/featureModels/HOGp1/batchData/"
HOGp2batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/featureModels/HOGp2/batchData/"
HOGp3batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/featureModels/HOGp3/batchData/"
HOGp4batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/featureModels/HOGp4/batchData/"

## HOG | Orientation=18 | kernel: (9,9) | c_arr = [0.1, 1.0, 10.0] |  gamma_arr = [0.1, 1.0, 10.0]
-----

In [6]:
c_arr = [0.1, 1.0, 10.0]#, 1000.0]            
gamma_arr = [0.1, 1, 10.0]

FeatureDir = HOGp1batch_dir

start_time = time.time()

estimatEach_c_gamma_Dict, validLabelDict = buildModel(c_arr=c_arr, gamma_arr=gamma_arr, FeatureDir=FeatureDir)

print("--- %s seconds " % (time.time() - start_time) + 
      '--- %s minutes' %str((time.time() - start_time)/60) + 
      '--- %s hours' %str((time.time() - start_time)/3600))

Validation Data and Labels shape:  (1000, 162) (1000,)
Training Data and Labels shape:  (9000, 162) (9000,)
The Label Dictionary is given as:  {0: 'trainDataAirplane.pickle', 1: 'trainDataCat.pickle'}
Validation Data and Labels shape:  (1000, 162) (1000,)
Training Data and Labels shape:  (9000, 162) (9000,)
The Label Dictionary is given as:  {0: 'trainDataAirplane.pickle', 1: 'trainDataCat.pickle'}
Validation Data and Labels shape:  (1000, 162) (1000,)
Training Data and Labels shape:  (9000, 162) (9000,)
The Label Dictionary is given as:  {0: 'trainDataAirplane.pickle', 1: 'trainDataCat.pickle'}
Validation Data and Labels shape:  (1000, 162) (1000,)
Training Data and Labels shape:  (9000, 162) (9000,)
The Label Dictionary is given as:  {0: 'trainDataAirplane.pickle', 1: 'trainDataCat.pickle'}
Validation Data and Labels shape:  (1000, 162) (1000,)
Training Data and Labels shape:  (9000, 162) (9000,)
The Label Dictionary is given as:  {0: 'trainDataAirplane.pickle', 1: 'trainDataCat.pick

In [12]:
performanceDF = performanceMetric(c_arr=c_arr, 
                                  gamma_arr=gamma_arr,
                                  estimatEach_c_gamma_Dict=estimatEach_c_gamma_Dict, 
                                  validLabelDict=validLabelDict)
performanceDF

Unnamed: 0,avg-numAirplaneClssified,avg-numCatClassified,avg-airplaneAccuracy,avg-catAccuracy,avg-totAccuracy
c0.1_gamma0.1,396.7,445.1,0.7934,0.8902,0.8418
c0.1_gamma1,430.0,434.9,0.86,0.8698,0.8649
c0.1_gamma10.0,499.5,27.1,0.999,0.0542,0.5266
c1.0_gamma0.1,419.4,439.6,0.8388,0.8792,0.859
c1.0_gamma1,433.6,444.6,0.8672,0.8892,0.8782
c1.0_gamma10.0,472.9,252.6,0.9458,0.5052,0.7255
c10.0_gamma0.1,427.2,442.4,0.8544,0.8848,0.8696
c10.0_gamma1,434.2,440.8,0.8684,0.8816,0.875
c10.0_gamma10.0,470.5,272.8,0.941,0.5456,0.7433


## HOG | Orientation=18 | kernel: (6,6) | c_arr = [0.1, 1.0, 10.0] |  gamma_arr = [0.1, 1.0, 10.0]
-----

In [8]:
c_arr = [0.1, 1.0, 10.0]#, 1000.0]            
gamma_arr = [0.1, 1, 10.0]

FeatureDir = HOGp1batch_dir
print (FeatureDir)
start_time = time.time()

estimatEach_c_gamma_Dict, validLabelDict = buildModel(c_arr=c_arr, gamma_arr=gamma_arr, FeatureDir=FeatureDir)

print("--- %s seconds " % (time.time() - start_time) + 
      '--- %s minutes' %str((time.time() - start_time)/60) + 
      '--- %s hours' %str((time.time() - start_time)/3600))

/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/featureModels/HOGp1/batchData/
Validation Data and Labels shape:  (1000, 1152) (1000,)
Training Data and Labels shape:  (9000, 1152) (9000,)
The Label Dictionary is given as:  {0: 'trainDataAirplane.pickle', 1: 'trainDataCat.pickle'}
Cross-Vaidation Fold:  0


KeyboardInterrupt: 