In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
sns.set_theme(style='whitegrid')

#### Define global variables

In [4]:
NAMES = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Shirt']

## Load the data

In [5]:
train = np.load('fashion_train.npy')
test = np.load('fashion_test.npy')

X_train, y_train = train[:, :784], train[:, 784]
X_test, y_test = test[:, :784], test[:, 784]

# Exploratory Data Analysis

#### Numerical variables

In [6]:
#shape of the data
print(f'Shape of the training data: {X_train.shape}')
print(f'Shape of the testing data: {X_test.shape}')

#mean, std, min, max
print(f'\nMean of the training data: {X_train.mean()}')
print(f'Standard Deviation of the training data: {X_train.std()}')

print(f'\nMin of the training data: {X_train.min()}')
print(f'Max of the training data: {X_train.max()}')


Shape of the training data: (10000, 784)
Shape of the testing data: (5000, 784)

Mean of the training data: 77.02673711734694
Standard Deviation of the training data: 89.9969501937854

Min of the training data: 0
Max of the training data: 255


#### What is the distribution of the target variable?

In [7]:
def plot_y_distribution(y):
    ''' Plot the distribution of the labels '''
    classes, classes_counts = np.unique(y, return_counts=True)

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(x=classes, y=classes_counts, ax=ax);
    ax.set_title('Class distribution', fontsize=16, fontweight='bold');
    ax.set_ylabel('Count');
    ax.set_xticklabels(NAMES);


#plot_y_distribution(y_train)

#### What are the properties of images in the dataset?
We investigate mean, median, and stdev of the pixel values in the images.

In [8]:
def plot_images(X, y, measure='mean'):
    ''' Plot the mean, median, or std of the images'''
    classes = np.unique(y)
    fig, ax = plt.subplots(1, len(classes), figsize=(20, 8))
    for c in classes:
        imgs = X[y == c]

        if measure == 'mean': av_img = np.mean(imgs, axis=0).reshape(28, 28)
        elif measure == 'median': av_img = np.median(imgs, axis=0).reshape(28, 28)
        elif measure == 'std': av_img = np.std(imgs, axis=0).reshape(28, 28)

        ax[c].imshow(av_img, cmap='gray')
        ax[c].set_title(NAMES[c], fontsize=16, fontweight='bold')


#plot_images(X_train, y_train)
#plot_images(X_train, y_train, measure='std')
#plot_images(X_train, y_train, measure='median')

We also investigate the distribution of the pixel values in the images. As an input, we use the mean of the pixel values in the images.

In [9]:
def plot_color_distribution(X, y):
    ''' Plot the distribution of the colors '''
    classes = np.unique(y)
    av_imgs = np.zeros((len(classes), 28, 28))
    for c in classes:
        imgs = X[y == c]
        av_imgs[c] = np.mean(imgs, axis=0).reshape(28, 28)

    #plot the distribution of the average image
    fig, ax = plt.subplots(1, len(classes), figsize=(30, 6))
    for c in classes:
        sns.histplot(av_imgs[c].flatten(), ax=ax[c], kde=True)
        ax[c].set_title(NAMES[c], fontsize=16, fontweight='bold')
        ax[c].set_xlabel('Pixel value')
        ax[c].set_ylabel('Count')

#plot_color_distribution(X_train, y_train)

# Neural Network

In [147]:

class NeuralNetwork:
    def __init__(self, input_size=784, hidden_size=300, output_size=5, layers_num=3, learning_rate=0.01, test=False):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size 
        self.layers_num = layers_num
        self.lr = learning_rate

        self.weights = []
        self.bias = []

        self.weights.append(np.random.randn(self.input_size, self.hidden_size))
        self.bias.append(np.random.randn(1, self.hidden_size))

        self.weights.append(np.random.randn(self.hidden_size, self.output_size))
        self.bias.append(np.random.randn(1, self.output_size))


    def sigmoid(self, x, Derivative=False):
        ''' 
            sigmoid activation function and its derivative 
        '''
        if not Derivative:
            return 1 / (1 + np.exp (-x))
        else:
            out = self.sigmoid(x)
            return out * (1 - out)
        
    def forward_pass(self, X):
        ''' 
            conduct the forward pass on the network 
        '''
        self.z1 = np.dot(X, self.weights[0]) + self.bias[0]
        #print(X.shape, self.weights[0].shape, self.bias[0].shape)
        self.a1 = self.sigmoid(self.z1)

        self.z2 = np.dot(self.a1, self.weights[1]) + self.bias[1]
        self.a2 = self.sigmoid(self.z2)

        self.outputs = np.zeros((len(self.a2), self.output_size))
        for i in range(len(self.a2)):
            self.outputs[i][np.argmax(self.a2[i])] = 1

        self.outputs = np.array(self.outputs)
        #print(self.outputs)


    def backward_pass(self, X, y):
        '''
            conduct the backward pass on the network
        '''

        y_mtrix = np.zeros((len(y), int(self.output_size))) 
        #change y into 1-hot encoding by assigning 1 to the index of the label
        for i in range(len(y)):
            y_mtrix[i][y[i]] = 1

        #loss, used to check the accuracy of the network
        self.loss = np.sum((self.outputs - y_mtrix)**2) / (2*y_mtrix.size)

        #calculate the error of the output layer
        self.e1 = self.a2 - y_mtrix
        dw1 = self.e1 * self.sigmoid(self.a2, True)
        
        #calculate the error of the hidden layer
        self.e2 = np.dot(dw1, self.weights[1].T)
        dw2 = self.e2 * self.sigmoid(self.a1, True)

        #update the weights
        w2_update = np.dot(self.a1.T, dw1) / len(X)
        w1_update = np.dot(X.T, dw2) / len(X)

        #update the biases
        #TODO

        self.weights[1] = self.weights[1] - self.lr * w2_update
        self.weights[0] = self.weights[0] - self.lr * w1_update
        #self.dz2 = self.error * self.sigmoid(self.z2, True)

        
    def TRAIN(self, X, y, epochs=5, testing=False):
        '''
            train the network for a given number of epochs
        '''
        for epoch in range(epochs):
            self.forward_pass(X)
            self.backward_pass(X, y)
            if testing: print(f'Epoch {epoch}, loss: {self.loss}')


    

In [149]:
nn = NeuralNetwork(test=True)
nn.TRAIN(X_train, y_train, epochs=10, testing=True)


Epoch 0, loss: 0.15716
Epoch 1, loss: 0.12902
Epoch 2, loss: 0.15942
Epoch 3, loss: 0.12986
Epoch 4, loss: 0.1119
Epoch 5, loss: 0.08586
Epoch 6, loss: 0.12926
Epoch 7, loss: 0.09808
Epoch 8, loss: 0.0712
Epoch 9, loss: 0.08672
