# Imports

In [19]:
import sys 
import numpy as np 
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import torch
import torchvision 
import torchvision.transforms as transforms
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
import pickle

device=torch.device("cuda:0")

# Load CIFAR100

In [21]:
def unpickle(file):
    with open(file, 'rb') as fo:
        res = pickle.load(fo, encoding='bytes')
    return res

transform = transforms.Compose(
    [transforms.ToTensor()])

#training data
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, 
                                         download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=300,
                                          shuffle=True, num_workers=2)

valtrainloader=trainloader = torch.utils.data.DataLoader(trainset, batch_size=trainset.__len__(),
                                          shuffle=True, num_workers=2)

#test data
testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=testset.__len__(),
                                         shuffle=False, num_workers=2)

meta = unpickle('datasets/cifar-100-python/meta')

fine_label_names = [t.decode('utf8') for t in meta[b'fine_label_names']]

train = unpickle('datasets/cifar-100-python/train')

filenames = [t.decode('utf8') for t in train[b'filenames']]
fine_labels = train[b'fine_labels']
data = train[b'data']
classes=fine_label_names

Files already downloaded and verified
Files already downloaded and verified


In [44]:
def process(dataloader,transform_list):
    for data in dataloader:
        x,y=data
    x=x.view(x.shape[0],-1)
    for i in transform_list:
        i.fit(x)
        x=i.transform(x)
    y=y.numpy()
    return x,y

In [23]:
svcrbf=svm.SVC(kernel='rbf',gamma=.1,C=1)
parameters={'gamma': [1e-2, 1e-1,1],'C': [.1,1,10]}
for data in trainloader:
    traininputs,traintargets=data;
    traininputs=flatten(traininputs)
    traintargets=traintargets

In [42]:
pca=PCA(n_components=180) #isolates the most important image features
scaling = MinMaxScaler(feature_range=(-1,1)) #normalizes features between -1 and 1
pip=make_pipeline(pca,scaling,svcrbf)

tlist=[]
tlist.append(pca);tlist.append(scaling)

pip.fit(traininputs,traintargets)

In [31]:
pca.fit(traininputs)
traininputs=pca.transform(traininputs)
scaling.fit(traininputs);
traininputs=scaling.transform(traininputs)
type(traininputs)

numpy.ndarray

In [34]:
traini=torch.from_numpy(traininputs)

TypeError: expected np.ndarray (got Tensor)

In [27]:
type(traininputs)

numpy.ndarray

In [37]:
print(type(traini));print(type(traintargets))

<class 'torch.Tensor'>
<class 'torch.Tensor'>


In [39]:
a=traini.numpy()
b=traintargets.numpy()

In [40]:
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(a,b)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'gamma': [0.01, 0.1, 1], 'C': [0.1, 1, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

svc=GridSearchCV(svcrbf, parameters)

svc.fit(traininputs,traintargets)

In [45]:
testinputs,testtargets=process(testloader,tlist)

In [48]:
preds=clf.predict(testinputs)

In [78]:
for i in clf.cv_results_:
    print(i)
    print(clf.cv_results_[i])

mean_fit_time
[440.41998974 424.45138351 478.85944112 410.06473041 316.68031462
 456.58601793 288.13716388 349.31616648 463.06717809]
std_fit_time
[ 1.08146599  1.20750973 29.32461559  0.82350787  0.52982706  1.14757614
  0.39982894  0.72883102  1.54844006]
mean_score_time
[178.72021635 163.55401174 179.88715418 164.98846332 166.63075987
 165.80191517 163.63871789 164.7410988  166.751791  ]
std_score_time
[11.86617793  0.43277718 14.93605932  0.69756003  2.60520886  0.81057945
  0.47005187  0.9253255   1.52683554]
param_C
[0.1 0.1 0.1 1 1 1 10 10 10]
param_gamma
[0.01 0.1 1 0.01 0.1 1 0.01 0.1 1]
params
[{'C': 0.1, 'gamma': 0.01}, {'C': 0.1, 'gamma': 0.1}, {'C': 0.1, 'gamma': 1}, {'C': 1, 'gamma': 0.01}, {'C': 1, 'gamma': 0.1}, {'C': 1, 'gamma': 1}, {'C': 10, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}, {'C': 10, 'gamma': 1}]
split0_test_score
[0.13083832 0.13964072 0.02982036 0.15934132 0.20592814 0.14844311
 0.19071856 0.22497006 0.16215569]
split1_test_score
[0.13251497 0.14077844 0.033



In [87]:
C=clf.best_params_['C']
C=1
gamma=clf.best_params_['gamma']
valinputs,valtargets=process(valtrainloader,tlist)

In [88]:
finsvc=svm.SVC(kernel='rbf',C=C,gamma=gamma)
finsvc.fit(valinputs,valtargets)


SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [89]:
finpreds=finsvc.predict(testinputs)


classifiers = []
C_range = np.logspace(-2,2,5) #[1.e-02 1.e-01 1.e+00 1.e+01 1.e+02]
gamma_range = np.logspace(-3, 1, 5) #[1.e-03 1.e-02 1.e-01 1.e+00 1.e+01]

for c in C_range:
    for gamma in gamma_range:
        svcrbf=svm.SVC(C=c,kernel='rbf',gamma=gamma)
        svc=make_pipeline(pca,scaling,svcrbf)
        classifiers.append([gamma,c,svc])

classif_idx=0;
for j in range(5):
    for data in trainloader:
        traininputs,traintargets=data;
        traininputs=flatten(traininputs)
        classifiers[classif_idx][2]=classifiers[classif_idx][2].fit(traininputs,traintargets)
        classif_idx+=1


# Evaluation

In [49]:
def accuracy(preds,targs,training=False):
    t=torch.from_numpy(preds)
    correct = (torch.eq(t,targs)).sum().item()
    accuracy=100*(correct/len(preds))
    sys.stdout.write("\033[1;31m")
    if training is False:
        print('Overall Testing Accuracy: ',accuracy)
    else:
        print('Overall Training Accuracy: ',accuracy)
    sys.stdout.write("\033[0;0m")    
    return accuracy

In [54]:
tars=torch.from_numpy(testtargets)

In [90]:
x=accuracy(finpreds,tars)

[1;31mOverall Testing Accuracy:  5.2
[0;0m

In [None]:
testing_accuracies=[]
for data in testloader:
    testinputs, testtargets=data;
    testinputs=flatten(testinputs)
for i in classifiers:
    x=accuracy(i[2].predict(testinputs),testtargets)
    testing_accuracies.append(x)


In [None]:
val_svm=svm.SVC(C=1,gamma=.1,kernel='rbf')
fin_svc=make_pipeline(pca,scaling,val_svm)
for data in valtrainloader:
    fininputs,fintargets=data;
    fininputs=flatten(fininputs);
fin_svc.fit(fininputs,fintargets)
x=accuracy(fin_svc.predict(testinputs),testtargets)

To try: rbf kernel, c/gamma values, pca/svd

classaccuracy=np.zeros(len(classes))
ntargets=testtargets.cpu().numpy()
for i in range(len(ntargets)):
    if TestPredictions[i]==ntargets[i]:
        classaccuracy[ntargets[i]]=classaccuracy[ntargets[i]]+1
classaccuracy=classaccuracy/10
for i in range(len(classes)):
    print('class accuracy:',classes[i],',',classaccuracy[i],'%')

from sklearn.metrics import confusion_matrix
cm=confusion_matrix(TestPredictions,ntargets)
print(classes)
print(cm);type(cm)