In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
sns.set_theme(style='whitegrid')

#### Define global variables

In [4]:
NAMES = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Shirt']

## Load the data

In [5]:
train = np.load('fashion_train.npy')
test = np.load('fashion_test.npy')

X_train, y_train = train[:, :784], train[:, 784]
X_test, y_test = test[:, :784], test[:, 784]

# Exploratory Data Analysis

#### Numerical variables

In [6]:
#shape of the data
print(f'Shape of the training data: {X_train.shape}')
print(f'Shape of the testing data: {X_test.shape}')

#mean, std, min, max
print(f'\nMean of the training data: {X_train.mean()}')
print(f'Standard Deviation of the training data: {X_train.std()}')

print(f'\nMin of the training data: {X_train.min()}')
print(f'Max of the training data: {X_train.max()}')


Shape of the training data: (10000, 784)
Shape of the testing data: (5000, 784)

Mean of the training data: 77.02673711734694
Standard Deviation of the training data: 89.9969501937854

Min of the training data: 0
Max of the training data: 255


#### What is the distribution of the target variable?

In [7]:
def plot_y_distribution(y):
    ''' Plot the distribution of the labels '''
    classes, classes_counts = np.unique(y, return_counts=True)

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(x=classes, y=classes_counts, ax=ax);
    ax.set_title('Class distribution', fontsize=16, fontweight='bold');
    ax.set_ylabel('Count');
    ax.set_xticklabels(NAMES);


#plot_y_distribution(y_train)

#### What are the properties of images in the dataset?
We investigate mean, median, and stdev of the pixel values in the images.

In [8]:
def plot_images(X, y, measure='mean'):
    ''' Plot the mean, median, or std of the images'''
    classes = np.unique(y)
    fig, ax = plt.subplots(1, len(classes), figsize=(20, 8))
    for c in classes:
        imgs = X[y == c]

        if measure == 'mean': av_img = np.mean(imgs, axis=0).reshape(28, 28)
        elif measure == 'median': av_img = np.median(imgs, axis=0).reshape(28, 28)
        elif measure == 'std': av_img = np.std(imgs, axis=0).reshape(28, 28)

        ax[c].imshow(av_img, cmap='gray')
        ax[c].set_title(NAMES[c], fontsize=16, fontweight='bold')


#plot_images(X_train, y_train)
#plot_images(X_train, y_train, measure='std')
#plot_images(X_train, y_train, measure='median')

We also investigate the distribution of the pixel values in the images. As an input, we use the mean of the pixel values in the images.

In [9]:
def plot_color_distribution(X, y):
    ''' Plot the distribution of the colors '''
    classes = np.unique(y)
    av_imgs = np.zeros((len(classes), 28, 28))
    for c in classes:
        imgs = X[y == c]
        av_imgs[c] = np.mean(imgs, axis=0).reshape(28, 28)

    #plot the distribution of the average image
    fig, ax = plt.subplots(1, len(classes), figsize=(30, 6))
    for c in classes:
        sns.histplot(av_imgs[c].flatten(), ax=ax[c], kde=True)
        ax[c].set_title(NAMES[c], fontsize=16, fontweight='bold')
        ax[c].set_xlabel('Pixel value')
        ax[c].set_ylabel('Count')

#plot_color_distribution(X_train, y_train)

# Neural Network

In [118]:
class MiniBatchGD:
    def __init__(self, X, y, batch_size=32):
        self.X = X
        self.y = y
        self.batch_size = batch_size

    def sample(self):
        ''' Sample a batch of data '''
        idx = np.random.choice(self.X.shape[0], self.batch_size, replace=False)
        return self.X[idx], self.y[idx]

In [146]:
class ActivationFunction:
    def __init__(self, name, lr=0.01):
        self.name = name
        self.lr = lr

    def calculate(self, x, derivative=False):
        if self.name == 'sigmoid':
            return self.sigmoid(x, derivative=derivative)
        elif self.name == 'relu':
            return self.relu(x, derivative=derivative)
        elif self.name == 'softmax':
            return self.softmax(x, derivative=derivative)

    def sigmoid(self, x, derivative=False):
        ''' 
            sigmoid activation function and its derivative 
        '''
        if not derivative:
            return 1 / (1 + np.exp (-x))
        else:
            out = self.sigmoid(x)
            return out * (1 - out)

    def relu(self, x, derivative=False):
        ''' 
            relu activation function and its derivative 
        '''
        if not derivative:
            return np.where(x > 0, x, 0)
        else:
            return np.where(x > 0, 1, 0)
    
    def softmax(self, x, derivative=False):
        ''' 
            softmax activation function and its derivative 
        '''
        if not derivative:
            exps = np.exp(x - np.max(x))
            return exps / np.sum(exps)
        else:
            out = self.softmax(x)
            return out * (1 - out)

In [155]:

class NeuralNetwork:
    def __init__(self, input_size=784, hidden_size=300, output_size=5, layers_num=3, learning_rate=0.01, test=False, activation_name='sigmoid'):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size 
        self.layers_num = layers_num
        self.lr = learning_rate
        self.activation = ActivationFunction(activation_name, lr=learning_rate)

        self.weights = []
        self.bias = []

        self.weights.append(np.random.randn(self.input_size, self.hidden_size))
        self.bias.append(np.random.randn(1, self.hidden_size))

        self.weights.append(np.random.randn(self.hidden_size, self.output_size))
        self.bias.append(np.random.randn(1, self.output_size))
        
    def forward_pass(self, X):
        ''' 
            conduct the forward pass on the network 
        '''
        #X = X / 255
        self.z1 = np.dot(X, self.weights[0]) + self.bias[0]
        self.a1 = self.activation.calculate(self.z1)

        self.z2 = np.dot(self.a1, self.weights[1]) + self.bias[1]
        self.a2 = self.activation.calculate(self.z2)

        self.outputs = np.zeros((len(self.a2), self.output_size))
        for i in range(len(self.a2)):
            self.outputs[i][np.argmax(self.a2[i])] = 1

        self.outputs = np.array(self.outputs)


    def backward_pass(self, X, y):
        '''
            conduct the backward pass on the network
        '''
        #X = X / 255
        y_mtrix = np.zeros((len(y), int(self.output_size))) 
        #change y into 1-hot encoding by assigning 1 to the index of the label
        for i in range(len(y)):
            y_mtrix[i][y[i]] = 1

        #loss, used to check the accuracy of the network
        self.loss = np.sum((self.outputs - y_mtrix)**2) / (2*y_mtrix.size)

        #accuracy, used to check the accuracy of the network
        self.accuracy = np.sum(np.argmax(self.outputs, axis=1) == y) / len(y)

        #calculate the error of the hidden layer
        self.e1 = self.a2 - y_mtrix
        dw1 = self.e1 * self.activation.calculate(self.a2, True)
        
        #calculate the error of the input layer
        self.e2 = np.dot(dw1, self.weights[1].T)
        dw2 = self.e2 * self.activation.calculate(self.a1, True)

        #update the weights
        w2_update = np.dot(self.a1.T, dw1) / len(X)
        w1_update = np.dot(X.T, dw2) / len(X)

        #update the biases
        b2_update = self.lr * np.sum(dw1, axis=0, keepdims=True) / len(X)
        b1_update = self.lr * np.sum(dw2, axis=0, keepdims=True) / len(X) 

        self.weights[1] -= self.lr * w2_update
        self.weights[0] -= self.lr * w1_update

        self.bias[1] -= self.lr * b2_update
        self.bias[0] -= self.lr * b1_update

        
    def TRAIN(self, X, y, epochs=5, testing=False):
        '''
            train the network for a given number of epochs
        '''
        for epoch in range(epochs):
            X_sample, y_sample = MiniBatchGD(X, y, batch_size=64).sample()
            self.forward_pass(X_sample)
            self.backward_pass(X_sample, y_sample)
            if testing: print(f'Epoch {epoch}, loss: {self.loss}, accuracy: {self.accuracy}')

    def TEST(self, X, y):
        '''
            test the network
        '''
        self.forward_pass(X)
        self.backward_pass(X, y)
        print(f'loss: {self.loss}, accuracy: {self.accuracy}')
    

In [156]:
#activation function
# and regularization

nn = NeuralNetwork(test=True, activation_name='sigmoid')
nn.TRAIN(X_train, y_train, epochs=400, testing=True)
nn.TEST(X_test, y_test)


Epoch 0, loss: 0.153125, accuracy: 0.234375
Epoch 1, loss: 0.125, accuracy: 0.375
Epoch 2, loss: 0.15625, accuracy: 0.21875
Epoch 3, loss: 0.1625, accuracy: 0.1875
Epoch 4, loss: 0.121875, accuracy: 0.390625
Epoch 5, loss: 0.125, accuracy: 0.375
Epoch 6, loss: 0.1125, accuracy: 0.4375
Epoch 7, loss: 0.1375, accuracy: 0.3125
Epoch 8, loss: 0.078125, accuracy: 0.609375
Epoch 9, loss: 0.10625, accuracy: 0.46875
Epoch 10, loss: 0.078125, accuracy: 0.609375
Epoch 11, loss: 0.109375, accuracy: 0.453125
Epoch 12, loss: 0.08125, accuracy: 0.59375
Epoch 13, loss: 0.103125, accuracy: 0.484375
Epoch 14, loss: 0.08125, accuracy: 0.59375
Epoch 15, loss: 0.090625, accuracy: 0.546875
Epoch 16, loss: 0.059375, accuracy: 0.703125
Epoch 17, loss: 0.084375, accuracy: 0.578125
Epoch 18, loss: 0.1, accuracy: 0.5
Epoch 19, loss: 0.034375, accuracy: 0.828125
Epoch 20, loss: 0.059375, accuracy: 0.703125
Epoch 21, loss: 0.1, accuracy: 0.5
Epoch 22, loss: 0.071875, accuracy: 0.640625
Epoch 23, loss: 0.084375, a