In [82]:
pip install py7zr

[0mNote: you may need to restart the kernel to use updated packages.


In [83]:
import py7zr
import os
import glob
import random
import cv2
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt

In [84]:
temp_path = '/kaggle/temp/'
if not os.path.exists(temp_path):
    os.mkdir(temp_path)
train_file_path = '../input/cifar-10/train.7z'
archive = py7zr.SevenZipFile(train_file_path, mode='r')
archive.extractall(path=temp_path)
archive.close()

In [85]:
images = glob.glob('/kaggle/temp/train/*.png')
len(images)

50000

In [86]:
train_labels = pd.read_csv('../input/cifar-10/trainLabels.csv')
train_labels

Unnamed: 0,id,label
0,1,frog
1,2,truck
2,3,truck
3,4,deer
4,5,automobile
...,...,...
49995,49996,bird
49996,49997,frog
49997,49998,truck
49998,49999,automobile


In [114]:
LABELS = ['frog', 'truck', 'deer', 'automobile', 'bird', 'horse', 'ship',
          'cat', 'dog', 'airplane']

X_train = []
y_train = []

X_val = []
y_val = []


for img in images:
    prob = random.random()
    label = train_labels.iloc[int(img[19:-4])-1]['label']
    img_arr = cv2.imread(img)
    img_arr = cv2.resize(img_arr, (32, 32))
    if prob > 0.8:
        X_val.append(list(img_arr))
        y_val.append(LABELS.index(label))
    else:
        X_train.append(list(img_arr))
        y_train.append(LABELS.index(label))

In [115]:
X_train = np.array(X_train)
X_val = np.array(X_val)
X_train_org = X_train.copy()
X_val_org = X_val.copy()

In [116]:
print(X_train.shape, X_val.shape)

(40054, 32, 32, 3) (9946, 32, 32, 3)


In [117]:
X_train = np.array([X_train[i].reshape(32*32*3) for i in range(X_train.shape[0])]).T / 255.0
X_val = np.array([X_val[i].reshape(32*32*3) for i in range(X_val.shape[0])]).T / 255.0

In [118]:
print(X_train.shape, X_val.shape)

(3072, 40054) (3072, 9946)


In [119]:
y_train = np.array(y_train)
y_val = np.array(y_val)
print(y_train.shape, y_val.shape)

(40054,) (9946,)


In [120]:
X_train

array([[0.12156863, 0.42352941, 0.09803922, ..., 0.31372549, 0.51372549,
        0.81176471],
       [0.10196078, 0.21176471, 0.10196078, ..., 0.30980392, 0.58039216,
        0.77254902],
       [0.08235294, 0.17254902, 0.08627451, ..., 0.55686275, 0.55294118,
        0.72941176],
       ...,
       [0.54509804, 0.84313725, 0.42352941, ..., 0.99215686, 0.76470588,
        0.50196078],
       [0.8       , 0.63921569, 0.41176471, ..., 0.99215686, 0.84313725,
        0.48235294],
       [0.79215686, 0.41176471, 0.39607843, ..., 0.99215686, 0.90980392,
        0.48627451]])

In [121]:
class Layer:
    def __init__(self, nodes_in, nodes_out, activation):
        self.nodes_in = nodes_in
        self.nodes_out = nodes_out
        self.activation = activation
        
        self.prev_layer = None
        self.next_layer = None
        self.input = None
        self.output = None

        self.weights = np.random.rand(self.nodes_out, self.nodes_in) - 0.5
        self.biases = np.random.rand(self.nodes_out, 1) - 0.5
        self.velocities = np.zeros((self.nodes_out, self.nodes_in))
        
    def forward(self, input):
        self.Z = self.weights.dot(input) + self.biases
        self.A = activation_functions(self.Z, False, self.activation)
        self.output = self.A
        
        if self.next_layer: self.next_layer.input = self.output
        return self.output
    
    def backward(self, lr, output_error, one_hot_Y=None, momentum = 0.5):
        if self.next_layer == None:
            output_error = self.A - one_hot_Y
            dZ = output_error
        else:
            dA = self.next_layer.weights.T.dot(output_error)
            dZ = dA * activation_functions(self.Z, True, self.activation)
            
        # Finding gradient wrt weights and biases    
        self.dW = (1 / self.A.shape[1]) * dZ.dot(self.input.T)
        self.db = (1 / self.A.shape[1]) * np.sum(dZ)
        
        # Updating weights and biases
        self.velocities = momentum * self.velocities + lr * self.dW
        self.weights -= self.velocities
        self.biases -= lr * self.db
        
        return dZ

In [122]:
class NeuralNetwork:
    def __init__(self):
        self.layers = []
    
    def add_layer(self, layer):
        self.layers.append(layer)
        if len(self.layers) > 1:
            self.layers[-2].next_layer = self.layers[-1]
            self.layers[-1].prev_layer = self.layers[-2]
        
    def forward_prop(self, input, i=0):
        if self.layers[i].next_layer == None:
            return self.layers[i].forward(input)
        return self.forward_prop(self.layers[i].forward(input), i+1)
    
    def backward_prop(self, lr, Y, dZ=0):
        for i in self.layers[::-1]:
            if i.next_layer==None:
                one_hot_Y = one_hot(Y)
                dZ = i.backward(lr, dZ, one_hot_Y)
            else:
                dZ = i.backward(lr, dZ)
                
    def get_predictions(self):
        return np.argmax(self.layers[-1].A, 0)

    def get_accuracy(self, predictions, Y):
        return np.sum(predictions == Y) / Y.size
    
    def sparse_categorical_crossentropy(self, y_true, y_pred, grad=False):
        y_pred = np.clip(y_pred, np.finfo(float).eps, 1.0 - np.finfo(float).eps)
        num_classes = y_pred.shape[1]
        y_true = y_true.astype(int)
        neg_log_likelihood = -np.sum(y_true * np.log(y_pred), axis=1)
        average_loss = np.mean(neg_log_likelihood)
        if not grad:
            return average_loss
        gradients = (y_pred - y_true) / len(y_true)
        return average_loss, gradients

    
    def gradient_descent(self, X, Y, lr, iterations):
        for i in range(iterations):
            A_final = self.forward_prop(X)
            loss = self.sparse_categorical_crossentropy(one_hot(Y), A_final, False)
            dZ = self.sparse_categorical_crossentropy(one_hot(Y), A_final, True)
            self.backward_prop(lr, Y, dZ)
            if i % 10 == 0:
                print("Iteration: ", i, end=', ')
                predictions = self.get_predictions()
                accuracy = self.get_accuracy(predictions, Y)
                print("Loss: ", loss, end=', ')
                print('Accuracy: ', round(accuracy*100, 2), end='%, ')
                
                dev_predictions = ann.make_predictions(X_val)
                accuracy = ann.get_accuracy(dev_predictions, y_val)
                print('Test Data Accuracy: ', round(accuracy*100, 2), end='%\n')

    def make_predictions(self, X):
        A_final = self.forward_prop(X)
        predictions = self.get_predictions()
        return predictions

In [123]:
def activation_functions(Z, deriv, activation):
    def relu(Z, deriv):
        if deriv:
            return Z > 0
        return np.maximum(Z, 0)

    def sigmoid(Z, deriv):
        if deriv:
            return sigmoid(Z, False) * (1 - sigmoid(Z, False))
        Z = np.clip(Z, -500, 500)
        return 1/(1+np.exp(-Z))

    def softmax(Z, deriv):
        exps = np.exp(Z - Z.max())
        if deriv:
             return softmax(Z, False) * (1 - softmax(Z, False))
        Z = np.clip(Z, -500, 500)
        exps = np.exp(Z - np.max(Z, axis=0))
        sum_exps = np.sum(exps, axis=0)
        softmax_output = exps / (sum_exps + np.finfo(float).eps)
        return softmax_output
    
    def tanh(Z, deriv):
        if deriv:
            return 1 - np.tanh(Z) ** 2
        return np.tanh(Z)
    
    activation_outputs = {
        'relu': relu(Z, deriv),
        'sigmoid': sigmoid(Z, deriv),
        'softmax': softmax(Z, deriv),
        'tanh': tanh(Z, deriv)
    }   
    return activation_outputs[f'{activation}']


def one_hot(Y):
    one_hot_Y = np.zeros((len(Y), 10))
    one_hot_Y[np.arange(len(Y)), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [124]:
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(3072, 40054) (3072, 9946) (40054,) (9946,)


In [133]:
lr = 0.1
momentum = 0.5
num_hidden = 3
sizes = (100,100,100)
activation = ('tanh', 'sigmoid', 'sigmoid')
loss = 'sce'
batch_size = 20
anneal = True
epochs = 300
# X_train, y_train, X_val, y_val already given

ann = NeuralNetwork()
def train(lr, momentum, num_hidden, sizes, activation, loss, batch_size, anneal, epochs, X_train, y_train, X_val, y_val):
    layer1 = Layer(3*32*32, sizes[0], activation[0])
    ann.add_layer(layer1)
    layer1.input = X_train
    for i in range(1, num_hidden):
        ann.add_layer(Layer(sizes[i-1], sizes[i], activation[i]))
    ann.add_layer(Layer(sizes[-1], 10, 'softmax'))
                  
    ann.forward_prop(X_train)
    ann.backward_prop(lr, y_train)

    ann.gradient_descent(X_train, y_train, lr=lr, iterations=epochs)

In [134]:
train(lr, momentum, num_hidden, sizes, activation, loss, batch_size, anneal, epochs, X_train, y_train, X_val, y_val)

Iteration:  0, Loss:  12290.739019974148, Accuracy:  9.87%, Test Data Accuracy:  10.36%
Iteration:  10, Loss:  9180.49203155152, Accuracy:  13.4%, Test Data Accuracy:  13.63%
Iteration:  20, Loss:  9007.365803162482, Accuracy:  17.55%, Test Data Accuracy:  17.7%
Iteration:  30, Loss:  8874.34550542806, Accuracy:  19.97%, Test Data Accuracy:  20.35%
Iteration:  40, Loss:  8763.05120870997, Accuracy:  21.63%, Test Data Accuracy:  21.97%
Iteration:  50, Loss:  8664.468020096214, Accuracy:  22.83%, Test Data Accuracy:  23.34%
Iteration:  60, Loss:  8575.411714883154, Accuracy:  23.83%, Test Data Accuracy:  23.99%
Iteration:  70, Loss:  8495.050970265018, Accuracy:  24.47%, Test Data Accuracy:  24.43%
Iteration:  80, Loss:  8422.955974534063, Accuracy:  25.19%, Test Data Accuracy:  24.94%
Iteration:  90, Loss:  8357.956189348957, Accuracy:  25.69%, Test Data Accuracy:  25.52%
Iteration:  100, Loss:  8298.5233518091, Accuracy:  26.18%, Test Data Accuracy:  26.05%
Iteration:  110, Loss:  8243

In [101]:
temp_path = '/kaggle/temp/'
if not os.path.exists(temp_path):
    os.mkdir(temp_path)
test_file_path = '../input/cifar-10/test.7z'
archive = py7zr.SevenZipFile(test_file_path, mode='r')
archive.extractall(path=temp_path)
archive.close()

In [102]:
images = glob.glob('/kaggle/temp/train/*.png')
len(images)

50000

In [135]:
X_test = []

for img in images:
    label = train_labels.iloc[int(img[19:-4])-1]['label']
    img_arr = cv2.imread(img)
    img_arr = cv2.resize(img_arr, (32, 32))
    X_test.append(list(img_arr))
    

In [136]:
X_test = np.array(X_test)
X_test_org = X_test.copy()
X_test = np.array([X_test[i].reshape(32*32*3) for i in range(X_test.shape[0])]).T / 255.0
print(X_test_org.shape, X_test.shape)

(50000, 32, 32, 3) (3072, 50000)


In [137]:
pred = ann.make_predictions(X_test)
print(pred)

[2 4 7 ... 0 2 3]


In [139]:
pred = ann.make_predictions(X_test)
print(pred)

[2 4 7 ... 0 2 3]


In [143]:
pred_values = [LABELS[i] for i in pred]

In [144]:
result = pd.DataFrame({'id': range(1, len(pred_values) + 1), 'value': pred_values})
print(result)

          id       value
0          1        deer
1          2        bird
2          3         cat
3          4    airplane
4          5         dog
...      ...         ...
49995  49996        frog
49996  49997        frog
49997  49998        frog
49998  49999        deer
49999  50000  automobile

[50000 rows x 2 columns]


In [145]:
result.to_csv('data.csv', index=False)