In [1]:
import pickle
import os
import numpy as np
import sklearn
import sklearn.preprocessing
import torch

In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def get_data_labels(dict_):
    dict_decoded_ = {}
    for key,value in dict_.items():
            if(isinstance(key,bytes)):
                key_new = key.decode('ascii')
            if(isinstance(value,bytes)):
                value = value.decode('ascii')
            dict_decoded_[key_new] = value
    return dict_decoded_['data'],dict_decoded_['labels']

In [3]:
# loading dataset files
path = 'cifar-10-python/cifar-10-batches-py/'
files = os.listdir(path)
X_train=[]
y_train=[]
for name in files:
    if 'data_batch' in name:
        dict_ = unpickle(path+name)  # unpickling the data_batch file
        X_temp,y_temp = get_data_labels(dict_)  # getting data and labels from unpickled data
        X_train.append(X_temp)
        y_train.append(y_temp)
X_train = np.asarray(X_train).astype(np.uint8).reshape(50000,3072)
y_train = np.asarray(y_train).astype(np.int32)
X_train = X_train/255.0

In [4]:
# load test data
dict_ = unpickle(path+'test_batch')  # unpickling the test_batch file
X_test,y_test = get_data_labels(dict_)  # getting data and labels from unpickled data

X_test = np.asarray(X_test).astype(np.uint8)
y_test = np.asarray(y_test).astype(np.int32)
X_test = X_test/255.0

In [5]:
# scale the data
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(X_train)
X_train = torch.tensor(scaler.transform(X_train))
X_test = torch.tensor(scaler.transform(X_test))

y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)

In [6]:
# reshaping tensors for CNN
X_train = X_train.reshape(50000,3,32,32)
X_test = X_test.reshape(-1,3,32,32)
print(X_train.shape)
y_train = y_train.reshape(50000)
print(y_train.shape)
y_test = y_test.reshape(-1)
print(y_test.shape)

torch.Size([50000, 3, 32, 32])
torch.Size([50000])
torch.Size([10000])


In [7]:
# preparing the model
torch.manual_seed(0) # Ensure model weights initialized with same random numbers

num_filters = 5
filter_size_1 = 4
filter_size_2 = 6
pool_size = 2

model = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels=3,
                    out_channels=num_filters,
                    kernel_size=filter_size_1),
    torch.nn.ReLU(),
    torch.nn.Conv2d(in_channels=num_filters,
                    out_channels=filter_size_2,
                    kernel_size=filter_size_2),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=pool_size, stride=pool_size),
    torch.nn.Flatten(),
    torch.nn.Linear(filter_size_2 * 12**2, 10),
)

In [8]:
batch_size = 100
num_epoch = 13

loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0060,momentum=0.9)

In [9]:
# loss would be 1.0937 in the last epoch
for epoch in range(1, num_epoch+1):
    for i in range(0, len(X_train), batch_size):        
        X = X_train[i:i+batch_size]
        y = y_train[i:i+batch_size]

        y_pred = model(X.float())
        l = loss(y_pred, y.long())
        
        model.zero_grad()
        l.backward()
        optimizer.step()
        
    print("Epoch %d final minibatch had loss %.4f" % (epoch, l.item()))

Epoch 1 final minibatch had loss 1.6065
Epoch 2 final minibatch had loss 1.5060
Epoch 3 final minibatch had loss 1.4220
Epoch 4 final minibatch had loss 1.3652
Epoch 5 final minibatch had loss 1.3126
Epoch 6 final minibatch had loss 1.2573
Epoch 7 final minibatch had loss 1.1978
Epoch 8 final minibatch had loss 1.1647
Epoch 9 final minibatch had loss 1.1392
Epoch 10 final minibatch had loss 1.1192
Epoch 11 final minibatch had loss 1.0889
Epoch 12 final minibatch had loss 1.0945
Epoch 13 final minibatch had loss 1.0880


In [10]:
# calculate training accuracy
from sklearn.metrics import accuracy_score
with torch.no_grad():
    y_pred = model(X_train.float())
    
softmax = torch.exp(y_pred).cpu()
prob = list(softmax.numpy())
y_pred = np.argmax(prob, axis=1)

# accuracy on training set
print('Training accuracy is '+str(accuracy_score(y_train, y_pred)))

Training accuracy is 0.60452


In [11]:
# calculate test accuracy
with torch.no_grad():
    y_test_pred = model(X_test.float())
    
softmax = torch.exp(y_test_pred).cpu()
prob = list(softmax.numpy())
y_test_pred = np.argmax(prob, axis=1)

# accuracy on training set
print('Testing accuracy is '+str(accuracy_score(y_test, y_test_pred)))

Testing accuracy is 0.56


In [None]:
# decision tree classifier
model = DecisionTreeClassifier()
param_distribution = {'criterion':['gini','entropy'], 'max_depth': range(1,10,2)}

gridcv = GridSearchCV(model, param_distribution, verbose=1, n_jobs=4, cv=3)
gridcv.fit(X_train, Y_train)
grid_accuracy_test  = sklearn.metrics.accuracy_score(Y_test,  gridcv.best_estimator_.predict(X_test))
accuracy[counter] = grid_accuracy_test*100
print("Accuracy for " + names[counter] + ":",accuracy[counter])