In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import  torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x79ed4fed23d0>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
file_location = "/content/drive/MyDrive/PyTorch/Dataset/fmnist_small.csv"

In [5]:
df = pd.read_csv(file_location)  # directly read the file by path
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


## For this notebook I am going to write everything on steps.

1. Converting everthing into train and test.

In [6]:
X = df.iloc[ : , 1 : ].values
y = df.iloc[ : , 0 ].values
print(X.shape)
print(y.shape)

(6000, 784)
(6000,)


2. Splitting the data into train and test.

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
print(X_train.shape)
print(X_test.shape)

(4800, 784)
(1200, 784)


3. Preprocessing:
- Data Standardization.
- Class labeling if needed.
- Making Dataset and DataLoader objects using pytorch.
  - Making CustomDataset class. (instructor, len, and getitem)
  - Defining train and test dataset.
  - Making train and test loader in batches.

In [8]:
# For scaling the features. We did this to make out NN stable.
X_train = X_train/255.0
X_test = X_test/255.0

In [9]:
# Making Dataset class.

class CustomDataset(Dataset):

  def __init__(self, feature, label):
    self.features = torch.tensor(feature, dtype = torch.float32)           # You can also convert the datatype here. torch.tensor(feature, dtype = torch.float32)
    self.labels = torch.tensor(label, dtype = torch.long)

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]

In [10]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [11]:
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = False)        # Shuffling is off here. Not beneficial for training.

4. Define your Model.
- Create a class.
  - define init.
    - Explain each layer in NN with activation function. Use sequential container.
  - define forward propagation.
    - Define how NN is moving.

In [12]:
class ANN(nn.Module):

  def __init__(self, num_features):
    super().__init__()

    self.linear_1 = nn.Linear(num_features, 128)
    self.relu = nn.ReLU()
    self.linear_2 = nn.Linear(128, 64)
    self.linear_3 = nn.Linear(64, 10)

  def forward(self, features):

    out = self.linear_1(features)
    out = self.relu(out)
    out = self.linear_2(out)
    out = self.relu(out)
    out = self.linear_3(out)
    return out

Using sequential container.

## Softmax layer.

- PyTorch does not automatically apply softmax in layers like nn.Linear or at the end of the model.

- If you add a softmax layer explicitly (e.g., nn.Softmax(dim=1)), it will convert your model’s raw outputs (logits) into probabilities.

- However, nn.CrossEntropyLoss expects raw logits, not probabilities. This loss function internally applies log_softmax and then computes the negative log-likelihood, so applying softmax yourself would be redundant and could harm numerical stability.

- For other loss functions like nn.NLLLoss, you must apply log_softmax (not softmax) to your outputs before passing them to the loss

In [13]:
class ANN(nn.Module):

  def __init__(self, num_features):
    super().__init__()

    self.network = nn.Sequential(
        nn.Linear(num_features, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
    )

  def forward(self, features):
    out = self.network(features)
    return out

5. Set important parameters (learning rate and epochs).

In [19]:
learning_rate = 0.1
epochs = 100

6. Create model object, optimizer, and loss function.

In [20]:
model = ANN(X_train.shape[1])

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

loss_fn = nn.CrossEntropyLoss()

Method -->   What it Shows
- model.parameters() --> 	All parameter tensors
- model.named_parameters() -->	Parameter names, shapes, and requires_grad

In [16]:
for name, param in model.named_parameters():
  print(f"Name: {name}, Shape: {param.shape}, Requires Grad: {param.requires_grad}")

Name: network.0.weight, Shape: torch.Size([128, 784]), Requires Grad: True
Name: network.0.bias, Shape: torch.Size([128]), Requires Grad: True
Name: network.2.weight, Shape: torch.Size([64, 128]), Requires Grad: True
Name: network.2.bias, Shape: torch.Size([64]), Requires Grad: True
Name: network.4.weight, Shape: torch.Size([10, 64]), Requires Grad: True
Name: network.4.bias, Shape: torch.Size([10]), Requires Grad: True


7. Training Pipeline.
- Two loops:
  - One on epochs.
  - One on train_loader
    - forward pass.
    - loss calculate.
    - clear gradient.
    - backward pass.
    - update parameter.

- Also calculate the avg loss for each epochs.

In [21]:
for epochs in range(epochs):

  total_epoch_loss = 0

  for batch_features, batch_labels in train_loader:

    # forward pass
    y_pred = model(batch_features)

    # loss calculate
    loss = loss_fn(y_pred, batch_labels)

    # clear gradient
    optimizer.zero_grad()

    # backward pass
    loss.backward()

    # parameter update
    optimizer.step()

    # Calculating the loss.
    total_epoch_loss += loss.item()

  avg_loss = total_epoch_loss/len(train_loader)

  if (epochs + 1) % 5 == 0:
    print(f"Epoch: {epochs + 1}, loss: {avg_loss}")

Epoch: 5, loss: 0.5459469095865885
Epoch: 10, loss: 0.4095153902967771
Epoch: 15, loss: 0.3222704704602559
Epoch: 20, loss: 0.2760940383623044
Epoch: 25, loss: 0.24114291965961457
Epoch: 30, loss: 0.20076761754850547
Epoch: 35, loss: 0.1733670210589965
Epoch: 40, loss: 0.1571024300530553
Epoch: 45, loss: 0.12653631653015812
Epoch: 50, loss: 0.11308247021709879
Epoch: 55, loss: 0.10075692878880849
Epoch: 60, loss: 0.0643804522479574
Epoch: 65, loss: 0.06033700716992219
Epoch: 70, loss: 0.06274835229230424
Epoch: 75, loss: 0.054476989873995386
Epoch: 80, loss: 0.053355338242836295
Epoch: 85, loss: 0.01973638540444275
Epoch: 90, loss: 0.012790894465676198
Epoch: 95, loss: 0.031099044116175114
Epoch: 100, loss: 0.004595219189456354


8. Evaluation code.
- Set your model in evaluation mode. This is because we use some functions differently while training and testing. For eg: dropout --> we use it during training but during testing turn it off. Similarly, normalization --> we use it during training and turn it off on testing.
- Analyze the model output and write the code.

In [18]:
model.eval()

ANN(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [22]:
len(test_loader)

38

**Model output analyses**
- For each image we will get 10 outputs (probability). Because last layer is softmax and we have 10 classes.
- In one batch in the test loader we have 32 images.
- From one batch we will get matric of 32*10.
- To extract the labels from the output we will use max function.

In [23]:
# evaluation code.
total = 0
correct = 0

with torch.no_grad():
  for batch_features, batch_labels in test_loader:

    outputs = model(batch_features)     # It will give tensor of size 32*10.
    max_vals, max_indices = torch.max(outputs, dim = 1)        # It will give tensor of shape 32.

    total = total + batch_features.shape[0]

    correct = correct + (max_indices == batch_labels).sum().item()

print(correct/total)

0.8316666666666667


## How to improve the accuracy?

- Use full dataset.
- Try different optimizer (adam, rmsprop, etc)
- Different learning rate.
- Different epochs.
- Different initilizing weights.
- Use concept of regularization.
- Use concept of drop out.
- Batch normalization.
- Play with model architecture (increasing nodes, layers etc)