In [36]:
import torch
from torch import nn
import numpy as np
from torch.utils.data.dataloader import DataLoader
from torchvision import datasets, transforms
from torch import optim


# Here is the parts for the dataloader
batchsize = 100
training_data = datasets.FashionMNIST(root="../fashion_mnist", train=True, transform=transforms.ToTensor(), download=True)
testing_data = datasets.FashionMNIST(root="../fashion_mnist", train=False, transform=transforms.ToTensor(), download=True)


train_dataloader = DataLoader(training_data, batch_size=batchsize)
test_dataloader = DataLoader(testing_data, batch_size=batchsize)



In [37]:

## Hyperparameters
sequence_length = 28
input_length = 28
hidden_size = 128 #this is the number of LSTM cells in each layer
num_layers = 2 #Number of LSTM layers in total.
num_classes = 10
num_epochs = 10
learning_rate = 0.01


In [38]:

## Defining the LSTM pytorch implementation
class LSTM(nn.Module):
    def __init__(self, input_length, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()

        #storing hyperparameter
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.input_length = input_length

        #The neural network.
        self.lstm = nn.LSTM(input_size=input_length, hidden_size=hidden_size, num_layers=num_layers, batch_first = True).to('cuda')
        self.output_layer = nn.Linear(hidden_size, num_classes)

    def forward(self, input):
        hidden_states = torch.zeros(self.num_layers, input.size(0), self.hidden_size)
        hidden_states = hidden_states.to('cuda')
        cell_states = torch.zeros(self.num_layers, input.size(0), self.hidden_size)
        cell_states = cell_states.to('cuda')

        out, _ = self.lstm(input, (hidden_states, cell_states))
        out = self.output_layer(out[:, -1, :])
        return out





In [39]:

## setting the model
model = LSTM(input_length, hidden_size, num_layers, num_classes)
model = model.to('cuda')
print(model)

# defining loss and optimization functions
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)


LSTM(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=128, out_features=10, bias=True)
)


In [40]:


# Training method
## Running the tests

def train(num_epochs, model, train_dataloader, loss_func):
    total_steps = len(train_dataloader)

    for epoch in range(num_epochs):
        for batch, (image, labels) in enumerate(train_dataloader):
            images = image.reshape(-1, sequence_length, input_length)
            images = images.to('cuda')
            labels = labels.to('cuda')
            output = model(images)
            loss = loss_func(output,labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            if(batch+1)%100 ==0:
                print(f"epoch: {epoch+1}, batch: {batch+1}/{total_steps}, loss: {loss.item():>4f}")


train(num_epochs, model, train_dataloader, loss_func)



epoch: 1, batch: 100/600, loss: 0.894352
epoch: 1, batch: 200/600, loss: 0.663611
epoch: 1, batch: 300/600, loss: 0.502366
epoch: 1, batch: 400/600, loss: 0.615144
epoch: 1, batch: 500/600, loss: 0.663134
epoch: 1, batch: 600/600, loss: 0.398422
epoch: 2, batch: 100/600, loss: 0.430365
epoch: 2, batch: 200/600, loss: 0.345645
epoch: 2, batch: 300/600, loss: 0.384777
epoch: 2, batch: 400/600, loss: 0.408946
epoch: 2, batch: 500/600, loss: 0.500300
epoch: 2, batch: 600/600, loss: 0.255813
epoch: 3, batch: 100/600, loss: 0.301776
epoch: 3, batch: 200/600, loss: 0.312696
epoch: 3, batch: 300/600, loss: 0.300857
epoch: 3, batch: 400/600, loss: 0.354687
epoch: 3, batch: 500/600, loss: 0.469427
epoch: 3, batch: 600/600, loss: 0.218314
epoch: 4, batch: 100/600, loss: 0.375630
epoch: 4, batch: 200/600, loss: 0.310765
epoch: 4, batch: 300/600, loss: 0.265705
epoch: 4, batch: 400/600, loss: 0.277101
epoch: 4, batch: 500/600, loss: 0.406030
epoch: 4, batch: 600/600, loss: 0.203700
epoch: 5, batch:

In [41]:
test_images, test_labels = next(iter(test_dataloader))

test_images = test_images.to('cuda')
test_labels

tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2])

In [42]:
test_output = model(test_images.view(-1, 28, 28))
test_output

tensor([[ -4.0447,  -4.0769,  -5.7869,  -6.3180,  -6.2221,   1.5258,  -4.5671,
           4.1538,  -2.8187,  10.2990],
        [ -0.0414,  -6.9361,   7.5540,  -6.2268,   0.4350,  -6.1794,   2.0721,
          -5.5404,  -5.7443,  -6.6442],
        [  0.0800,  16.1949,  -2.5659,  -0.3206,   2.8839,  -4.6103,  -1.7340,
          -8.0594,  -3.6529,  -4.5061],
        [ -0.9564,  16.1161,  -2.9847,   1.7447,   1.9916,  -5.1914,  -1.7775,
          -9.5939,  -3.8929,  -5.3975],
        [  1.1130,  -7.1868,   0.5320,   0.1699,   1.6714,  -6.4677,   5.2790,
          -6.9009,  -3.8740,  -7.0004],
        [ -0.7160,  10.8477,  -1.5630,  -1.0714,   0.3839,  -1.0044,  -2.2697,
          -4.7273,  -1.1557,  -1.8328],
        [ -1.5107,  -4.8968,   4.5134,  -5.6628,   4.9518,  -6.9696,   0.2419,
          -6.5179,  -3.8086,  -6.3888],
        [ -1.1669,  -7.8321,  -0.3948,  -3.5445,   1.4375,  -6.0183,   8.9003,
          -7.5286,  -4.3742,  -7.0778],
        [ -2.0071,  -3.8240,  -3.5592,  -3.5480,

In [43]:
predicted = torch.max(test_output, 1)[1]
predicted
test_labels

tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2])

In [44]:
# percentage_correct = 0 + 1 if prediced[i] == test_labels[i] for i in range(100)

correct = [1 for i in range(100) if predicted[i] == test_labels[i]]
percentage_correct = sum(correct)/100
print(f"percentage correct: {percentage_correct}%")

percentage correct: 0.92%


In [45]:
def test(model, dataloader):
    accuracy = 0
    with torch.no_grad():
        for test_images, test_labels in dataloader:
            test_images = test_images.to('cuda')
            #we send it to the model for inference
            test_output = model(test_images.view(-1, 28, 28))

            predicted = torch.max(test_output, 1)[1]
            num_correct = [1 for i in range(batchsize) if predicted[i] == test_labels[i]]
            accuracy += sum(num_correct)
    num_batches = len(dataloader)
    size = len(dataloader.dataset)
    accuracy = accuracy / size
    
    print(f"total accuracy of model: {100*accuracy}%")

In [46]:
test(model, test_dataloader)

total accuracy of model: 88.49000000000001%
