In [30]:
import torch
from torchvision.models.feature_extraction import get_graph_node_names

import numpy as np

In [31]:
train = np.load('fashion_train.npy')
test = np.load('fashion_test.npy')

X_train, y_train = train[:, :784], train[:, 784]
X_test, y_test = test[:, :784], test[:, 784]

In [32]:
#reshape the X_train and X_test to 28x28
X_train = X_train.reshape(-1, 28, 28)
X_test = X_test.reshape(-1, 28, 28)

In [33]:
import torch.nn as nn
import torch.nn.functional as F

In [38]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, 3, padding =1)
        # linear layers
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 5) 
        # dropout
        self.dropout = nn.Dropout(p=0.2)
        # max pooling
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # flattening the image
        x = x.view(-1, 7*7*16)
        # linear layers
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.fc4(x)
        return x



net = Net()
print(net)


Net(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=5, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [39]:
#create a tensor from the X_train and X_test numpy arrays
#X_train = torch.from_numpy(X_train).float()
#X_test = torch.from_numpy(X_test).float()

net.forward(X_train)
    
#X_train = X_train.unsqueeze(1)
#flatten the X_train to 1D
X_train = torch.flatten(X_train, 1)

#X_test = X_test.unsqueeze(1)
X_test = torch.flatten(X_test, 1)

TypeError: conv2d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)


In [23]:
class MiniBatchGD:
    def __init__(self, X, y, batch_size=32):
        self.X = X
        self.y = y
        self.batch_size = batch_size

    def sample(self):
        ''' Sample a batch of data '''
        idx = np.random.choice(self.X.shape[0], self.batch_size, replace=False)
        return self.X[idx], self.y[idx]

In [8]:
class ActivationFunction:
    def __init__(self, name, lr=0.01):
        self.name = name
        self.lr = lr

    def calculate(self, x, derivative=False):
        if self.name == 'sigmoid':
            return self.sigmoid(x, derivative=derivative)
        elif self.name == 'relu':
            return self.relu(x, derivative=derivative)
        elif self.name == 'softmax':
            return self.softmax(x, derivative=derivative)

    def sigmoid(self, x, derivative=False):
        ''' 
            sigmoid activation function and its derivative 
        '''
        if not derivative:
            return 1 / (1 + np.exp (-x))
        else:
            out = self.sigmoid(x)
            return out * (1 - out)

    def relu(self, x, derivative=False):
        ''' 
            relu activation function and its derivative 
        '''
        if not derivative:
            return np.where(x > 0, x, 0)
        else:
            return np.where(x > 0, 1, 0)
    
    def softmax(self, x, derivative=False):
        ''' 
            softmax activation function and its derivative 
        '''
        if not derivative:
            exps = np.exp(x - np.max(x))
            return exps / np.sum(exps)
        else:
            out = self.softmax(x)
            return out * (1 - out)

In [9]:

class NeuralNetwork:
    def __init__(self, input_size=784, hidden_size=300, output_size=5, layers_num=3, learning_rate=0.01, test=False, activation_name='sigmoid'):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size 
        self.layers_num = layers_num
        self.lr = learning_rate
        self.activation = ActivationFunction(activation_name, lr=learning_rate)

        self.weights = []
        self.bias = []

        self.weights.append(np.random.randn(self.input_size, self.hidden_size))
        self.bias.append(np.random.randn(1, self.hidden_size))

        self.weights.append(np.random.randn(self.hidden_size, self.output_size))
        self.bias.append(np.random.randn(1, self.output_size))
        
    def forward_pass(self, X):
        ''' 
            conduct the forward pass on the network 
        '''
        #X = X / 255
        self.z1 = np.dot(X, self.weights[0]) + self.bias[0]
        self.a1 = self.activation.calculate(self.z1)

        self.z2 = np.dot(self.a1, self.weights[1]) + self.bias[1]
        self.a2 = self.activation.calculate(self.z2)

        self.outputs = np.zeros((len(self.a2), self.output_size))
        for i in range(len(self.a2)):
            self.outputs[i][np.argmax(self.a2[i])] = 1

        self.outputs = np.array(self.outputs)


    def backward_pass(self, X, y):
        '''
            conduct the backward pass on the network
        '''
        #X = X / 255
        y_mtrix = np.zeros((len(y), int(self.output_size))) 
        #change y into 1-hot encoding by assigning 1 to the index of the label
        for i in range(len(y)):
            y_mtrix[i][y[i]] = 1

        #loss, used to check the accuracy of the network
        self.loss = np.sum((self.outputs - y_mtrix)**2) / (2*y_mtrix.size)

        #accuracy, used to check the accuracy of the network
        self.accuracy = np.sum(np.argmax(self.outputs, axis=1) == y) / len(y)

        #calculate the error of the hidden layer
        self.e1 = self.a2 - y_mtrix
        dw1 = self.e1 * self.activation.calculate(self.a2, True)
        
        #calculate the error of the input layer
        self.e2 = np.dot(dw1, self.weights[1].T)
        dw2 = self.e2 * self.activation.calculate(self.a1, True)

        #update the weights
        w2_update = np.dot(self.a1.T, dw1) / len(X)
        w1_update = np.dot(X.T, dw2) / len(X)

        #update the biases
        b2_update = self.lr * np.sum(dw1, axis=0, keepdims=True) / len(X)
        b1_update = self.lr * np.sum(dw2, axis=0, keepdims=True) / len(X) 

        self.weights[1] -= self.lr * w2_update
        self.weights[0] -= self.lr * w1_update

        self.bias[1] -= self.lr * b2_update
        self.bias[0] -= self.lr * b1_update

        
    def TRAIN(self, X, y, epochs=5, testing=False):
        '''
            train the network for a given number of epochs
        '''
        for epoch in range(epochs):
            X_sample, y_sample = MiniBatchGD(X, y, batch_size=64).sample()
            self.forward_pass(X_sample)
            self.backward_pass(X_sample, y_sample)
            if testing: print(f'Epoch {epoch}, loss: {self.loss}, accuracy: {self.accuracy}')

    def TEST(self, X, y):
        '''
            test the network
        '''
        self.forward_pass(X)
        self.backward_pass(X, y)
        print(f'loss: {self.loss}, accuracy: {self.accuracy}')
    

In [10]:
nn = NeuralNetwork(activation_name='sigmoid')
nn.TRAIN(X_train, y_train, epochs=100, testing=True)
nn.TEST(X_test, y_test)


  return 1 / (1 + np.exp (-x))


Epoch 0, loss: 0.175, accuracy: 0.125
Epoch 1, loss: 0.15, accuracy: 0.25
Epoch 2, loss: 0.14375, accuracy: 0.28125
Epoch 3, loss: 0.134375, accuracy: 0.328125
Epoch 4, loss: 0.175, accuracy: 0.125
Epoch 5, loss: 0.115625, accuracy: 0.421875
Epoch 6, loss: 0.121875, accuracy: 0.390625
Epoch 7, loss: 0.071875, accuracy: 0.640625
Epoch 8, loss: 0.09375, accuracy: 0.53125
Epoch 9, loss: 0.0875, accuracy: 0.5625
Epoch 10, loss: 0.121875, accuracy: 0.390625
Epoch 11, loss: 0.125, accuracy: 0.375
Epoch 12, loss: 0.096875, accuracy: 0.515625
Epoch 13, loss: 0.08125, accuracy: 0.59375
Epoch 14, loss: 0.084375, accuracy: 0.578125
Epoch 15, loss: 0.071875, accuracy: 0.640625
Epoch 16, loss: 0.065625, accuracy: 0.671875
Epoch 17, loss: 0.06875, accuracy: 0.65625
Epoch 18, loss: 0.078125, accuracy: 0.609375
Epoch 19, loss: 0.078125, accuracy: 0.609375
Epoch 20, loss: 0.109375, accuracy: 0.453125
Epoch 21, loss: 0.0625, accuracy: 0.6875
Epoch 22, loss: 0.078125, accuracy: 0.609375
Epoch 23, loss: 0