# Imports 

In [37]:
import sys 

import numpy as np 
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.externals import joblib

import torch
import torchvision 
import torchvision.transforms as transforms

import pickle
import pandas as pd
import os
from skimage import io

sys.path.append('../../Utils')

from SVC_Utils import *
from data_downloaders import *

In [5]:
get_lfw('./data')
data_dir = "./data/lfw/lfw_20/"

Downloading LFW.
LFW successfully downloaded and preprocessed.


In [8]:
images=[]

for i in os.listdir(data_dir):
    for j in os.listdir(os.path.join(data_dir,i)):
        images.append(os.path.join(data_dir,i,j))
        
classes=[]
classes_to_idx={}
j=0;

for i in images:
    name=i.split('/')[4];
    if name not in classes_to_idx:
        classes.append(name)
        classes_to_idx[name]=j
        j+=1

images=np.random.permutation(images)

In [16]:
trainset_len=int(.8*(len(images)))
train_imgs=images[:trainset_len];
test_imgs=images[trainset_len:]
print(len(trainset));print(len(testset))

2418
605


In [38]:
class LFW(torch.utils.data.Dataset):
    def __init__(self, img_list, classes_list, transform=None):
        self.classes_to_idx=classes_list
        self.img_list=img_list
        self.transform=transform;
    
    def __len__(self):
        return len(self.img_list);
    
    def __getitem__(self,idx):
        img=self.img_list[idx]
        sample=io.imread(img)
        label=self.classes_to_idx[img.split('/')[4]]
        if self.transform is not None:
            sample=self.transform(sample)
        return sample, label


In [39]:
transform=transforms.ToTensor()

trainset=LFW(train_imgs,classes_to_idx,transform=transform)
testset=LFW(test_imgs,classes_to_idx,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=trainset.__len__(), shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=testset.__len__(), shuffle=True, num_workers=2)

In [40]:
traininputs, traintargets=load(trainset)
testinputs, testtargets=load(testset)

# Model Training

In [41]:
n_components=180
C_range = np.logspace(-1,1,3) #[1.e-02 1.e-01 1.e+00 1.e+01 1.e+02]
gamma_range = np.logspace(-2, 0, 3) #[1.e-03 1.e-02 1.e-01 1.e+00 1.e+01]

In [42]:
clfs=hp_grid(n_components=n_components, C_range=C_range, gamma_range=gamma_range)

In [None]:
fitted_clfs=train_grid(clfs, traininputs, traintargets)

# Model Testing/Evaluation

In [None]:
#Stores training and testing accuracies in matrices (Rows: C_range, Cols: gamma_range)

train_accs=np.random.randn(len(C_range),len(gamma_range))
test_accs=np.random.randn(len(C_range),len(gamma_range))
test_preds=[]
k=0;

for i in range(len(C_range)):
    for j in range(len(gamma_range)):
        train_accs[i,j]=predict_eval(fitted_clfs[k], traininputs, traintargets, training=True)[1]
        preds, test_accs[i,j]=predict_eval(fitted_clfs[k], testinputs, testtargets)
        test_preds.append(preds)
        k+=1

In [None]:
idx=['C = 1','C = 10']
cols=['gamma = .01','gamma = .1']

trainacc_df=pd.DataFrame(data=train_accs, index=idx, columns=cols)
testacc_df=pd.DataFrame(data=test_accs, index=idx, columns=cols)

In [None]:
#training accuracy for C/gamma grid
trainacc_df.style.background_gradient(cmap='GnBu')

In [None]:
#test accuracy for C/gamma grid
testacc_df.style.background_gradient(cmap='GnBu')

# Save Models

In [None]:
maxacc, gen=maxacc_gen(test_accs, train_accs, clfs)

fn_max_acc = 'SVMCIFAR10_maxacc_proba.pkl'
fn_gen = 'SVMCIFAR10_gen_proba.pkl'

In [None]:
save_proba(fn_max_acc, maxacc, traininputs, traintargets)
save_proba(fn_gen, gen, traininputs, traintargets)