In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
if torch.cuda.is_available():
    print(torch.cuda.get_device_name())

2.9.1+cu128
True
NVIDIA GeForce RTX 5060 Ti


In [3]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader , Dataset
import torch.nn as nn 
from torch import optim as optim
from torchvision import datasets, transforms

In [4]:
# load the dataset 
df = pd.read_csv("fmnist_small.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [5]:
# separate input features and target col 
X = df.drop(columns = ['label'] , axis = 1)
y = df['label']

In [6]:
X.shape , y.shape

((6000, 784), (6000,))

In [7]:
# do the train test split 
X_train, X_test, y_train, y_test = train_test_split(X , y , test_size = 0.2 , random_state = 23)

In [8]:
type(y_train)

pandas.core.series.Series

In [9]:
# convert all these into numpy array 
X_train = X_train.to_numpy()
X_test = X_test.to_numpy() 
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [10]:
(np.unique(y_train))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
# scale the images values
X_train = X_train / 255.0
X_test = X_test / 255.0

In [12]:
len(X_test)

1200

In [13]:
class CustomDataset(Dataset): 
    def __init__(self , features , labels): 
        self.features = torch.tensor(features , dtype = torch.float)
        self.labels = torch.tensor(labels , dtype = torch.long)

    def __len__(self):
        return len(self.features)
    def __getitem__(self , index): 
        return self.features[index] , self.labels[index]

In [14]:
# make the dataset 
train_dataset = CustomDataset(features = X_train , labels = y_train)
test_dataset = CustomDataset(features = X_test , labels = y_test)

In [15]:
# make the dataloader 
train_data_loader = DataLoader(
    dataset = train_dataset, 
    batch_size = 32, 
    shuffle = True,
    pin_memory = True
)

test_data_loader = DataLoader(
    dataset = test_dataset, 
    batch_size = 32, 
    pin_memory = True
)

In [16]:
num_labels = len(np.unique(y_train)) 
num_features = X_train.shape[1]
num_labels , num_features

(10, 784)

In [17]:
class NeuralNetwork(nn.Module): 
    def __init__(self , num_labels): 
        super().__init__() 
        self.model = nn.Sequential(
            nn.Linear(in_features = num_features , out_features = 128), 
            nn.BatchNorm1d(num_features = 128),
            nn.ReLU(),
            nn.Dropout(p = 0.3), 
            nn.Linear(in_features = 128 , out_features = 64), 
            nn.BatchNorm1d(num_features = 64),
            nn.ReLU(),
            nn.Dropout(p = 0.3),
            nn.Linear(in_features = 64 , out_features = num_labels)
        )
    def forward(self , x):
        return self.model(x)

In [18]:
learning_rate = 0.01
epochs = 100

In [19]:
# get the gpu 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [20]:
model = NeuralNetwork(num_labels = num_labels)
model.to(device)

NeuralNetwork(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters() , lr = learning_rate , weight_decay = 1e-4)
# weight decay is the regularization term's lamda value  

In [22]:
# train loop 
for epoch in range(epochs): 
    total_batch_loss = 0 
    for inputs , labels in train_data_loader: 
        # move the batch into gpu 
        inputs = inputs.to(device)
        labels = labels.to(device)
        # do forward pass 
        outputs = model(inputs)
        # calculate loss 
        loss = criterion(outputs , labels)
        total_batch_loss += loss 
        # clear gradients
        optimizer.zero_grad()
        # do backward pass 
        loss.backward()
        # update gradients
        optimizer.step()
    avg_batch_loss = total_batch_loss / len(train_data_loader)
    print(f"Epoch: {epoch + 1} , batch avg loss: {avg_batch_loss}")

Epoch: 1 , batch avg loss: 1.5219097137451172
Epoch: 2 , batch avg loss: 1.0451465845108032
Epoch: 3 , batch avg loss: 0.8827488422393799
Epoch: 4 , batch avg loss: 0.7826305031776428
Epoch: 5 , batch avg loss: 0.7238844037055969
Epoch: 6 , batch avg loss: 0.6839272975921631
Epoch: 7 , batch avg loss: 0.6470779180526733
Epoch: 8 , batch avg loss: 0.6042187213897705
Epoch: 9 , batch avg loss: 0.575336754322052
Epoch: 10 , batch avg loss: 0.5673938989639282
Epoch: 11 , batch avg loss: 0.5459654331207275
Epoch: 12 , batch avg loss: 0.524391770362854
Epoch: 13 , batch avg loss: 0.4983487129211426
Epoch: 14 , batch avg loss: 0.48236578702926636
Epoch: 15 , batch avg loss: 0.47192782163619995
Epoch: 16 , batch avg loss: 0.47890886664390564
Epoch: 17 , batch avg loss: 0.46302980184555054
Epoch: 18 , batch avg loss: 0.4497838020324707
Epoch: 19 , batch avg loss: 0.44074127078056335
Epoch: 20 , batch avg loss: 0.4439633786678314
Epoch: 21 , batch avg loss: 0.41681981086730957
Epoch: 22 , batch 

In [24]:
# set the model on evaluation mode 
model.eval()

NeuralNetwork(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.3, inplace=False)
    (8): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [25]:
correct = 0 
total = 0
with torch.no_grad(): 
    for inputs , labels in test_data_loader: 
        inputs , labels = inputs.to(device) , labels.to(device)

        outputs = model(inputs)
        preds = outputs.argmax(dim = 1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

accuracy = (correct / total) * 100
print(f"Accuracy on test data: {accuracy:.2f}%")

Accuracy on test data: 86.25%


In [26]:
correct = 0 
total = 0
with torch.no_grad(): 
    for inputs , labels in train_data_loader: 
        inputs , labels = inputs.to(device) , labels.to(device)

        outputs = model(inputs)
        preds = outputs.argmax(dim = 1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

accuracy = (correct / total) * 100
print(f"Accuracy on training data: {accuracy:.2f}%")

Accuracy on training data: 98.62%
