In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data.sampler import BatchSampler
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
device = torch.device('cpu')
from collections import Counter
from sklearn.model_selection import train_test_split
import random

In [2]:
# MNIST dataset 
#load training data set.
train_dataset = torchvision.datasets.FashionMNIST(root='./data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)
#Load testing data set.
test_dataset = torchvision.datasets.FashionMNIST(root='./data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

In [3]:
'''Checking the data set classes and amount of data in each class.'''
idx2class = {v: k for k, v in train_dataset.class_to_idx.items()}
def get_class_distribution(dataset_obj):
    count_dict = {k:0 for k,v in dataset_obj.class_to_idx.items()}
    
    for element in dataset_obj:
        y_lbl = element[1]
        y_lbl = idx2class[y_lbl]
        count_dict[y_lbl] += 1
            
    return count_dict
print("Distribution of classes: \n", get_class_distribution(train_dataset))

Distribution of classes: 
 {'T-shirt/top': 6000, 'Trouser': 6000, 'Pullover': 6000, 'Dress': 6000, 'Coat': 6000, 'Sandal': 6000, 'Shirt': 6000, 'Sneaker': 6000, 'Bag': 6000, 'Ankle boot': 6000}


In [4]:
input_size = 784 # 28x28
hidden_size = 500 
total_classes = 10
epochs_count = 2
batch_size = 100
learning_rate = 0.001

In [5]:
'''This method returns the subset of give dataset; all the classes will have same no of samples given as count.'''
def get_subset(dataset, count):
    targets = np.array(dataset.targets)
    classes, class_counts = np.unique(targets, return_counts=True)
    nb_classes = len(classes)
    # Get class indices
    class_indices = [np.where(targets == i)[0] for i in range(nb_classes)]
    fullindices = []
    for labels in class_indices:
        fullindices.extend(random.sample(list(labels), count))
    sub_dataset = torch.utils.data.Subset(dataset, fullindices)
    return sub_dataset

#print(len(train_dataset))

In [35]:
'''For different configuration just uncomment below line and execute all cells.'''
#train_dataset_subset = get_subset(train_dataset, 100)
train_dataset_subset = get_subset(train_dataset, 500)
#train_dataset_subset = get_subset(train_dataset, 1000)
#train_dataset_subset = get_subset(train_dataset, 5000)

In [36]:
'''Check the subset class distribution'''
train_classes = [label for _, label in train_dataset_subset]
Counter(train_classes)

Counter({0: 500,
         1: 500,
         2: 500,
         3: 500,
         4: 500,
         5: 500,
         6: 500,
         7: 500,
         8: 500,
         9: 500})

In [37]:
# Load the data.
train_loader = torch.utils.data.DataLoader(dataset=train_dataset_subset, batch_size=batch_size)

'''For running on the full dataset, uncomment this line and comment above line.'''
#train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [38]:
# MLP with 2 layers
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, total_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.n_images_per_class = 1000
        self.n_classes = 10
        self.l1 = torch.nn.Linear(input_size, hidden_size) 
        self.relu = torch.nn.ReLU()
        self.l2 = torch.nn.Linear(hidden_size, total_classes)  
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out

In [39]:
nnmodel = NeuralNet(input_size, hidden_size, total_classes).to(device)

In [40]:
# Using Cross Entropy Loss.
cross_entropy_loss = nn.CrossEntropyLoss()
#Using Adam optimizer
optimizer = torch.optim.Adam(nnmodel.parameters(), lr=learning_rate)  

In [41]:
# Training the NN
steps = len(train_loader)
for epoch in range(epochs_count):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = nnmodel(images)
        loss = cross_entropy_loss(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{epochs_count}], Step [{i+1}/{steps}], Loss: {loss.item():.4f}')


In [42]:
# Test the NN
with torch.no_grad():
    nn_correct = 0
    nn_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = nnmodel(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        nn_samples += labels.size(0)
        nn_correct += (predicted == labels).sum().item()

    acc = 100.0 * nn_correct / nn_samples
    print(f'Accuracy of the network on test images: {acc} %')


Accuracy of the network on test images: 24.78 %


In [43]:
# Classification Report
print(classification_report(labels, predicted, zero_division=1))

              precision    recall  f1-score   support

           0       1.00      0.00      0.00        11
           1       1.00      0.00      0.00        12
           2       1.00      0.00      0.00         6
           3       0.20      0.29      0.24         7
           4       0.11      0.25      0.15         8
           5       0.33      0.55      0.41        11
           6       0.20      0.67      0.30        12
           7       0.46      0.86      0.60         7
           8       1.00      0.00      0.00        14
           9       1.00      0.00      0.00        12

    accuracy                           0.24       100
   macro avg       0.63      0.26      0.17       100
weighted avg       0.67      0.24      0.15       100



In [44]:
# Confusion matrix
print(confusion_matrix(labels,predicted))

[[0 0 0 0 2 0 9 0 0 0]
 [0 0 0 8 3 0 1 0 0 0]
 [0 0 0 0 1 0 5 0 0 0]
 [0 0 0 2 5 0 0 0 0 0]
 [0 0 0 0 2 0 6 0 0 0]
 [0 0 0 0 0 6 1 4 0 0]
 [0 0 0 0 4 0 8 0 0 0]
 [0 0 0 0 0 1 0 6 0 0]
 [0 0 0 0 1 4 9 0 0 0]
 [0 0 0 0 0 7 2 3 0 0]]
