In [188]:
import torch
from torch import nn
from torch import optim
import pandas as pd
import numpy as np

In [189]:
# load the data
df = pd.read_csv('train.csv')

print(df.tail()) 

# creating tensor from targets_df 
df_x = df.iloc[:,1:]
df_y = df.iloc[:,0]
# normalize from 0:1
torch_x = torch.tensor(df_x.values).float() / 255
torch_y = torch.tensor(df_y.values).long()

# Convert to one-hot encoding
num_classes = 10  # Assuming you have 10 classes (0 to 9)
one_hot_encoded = torch.eye(num_classes)[torch_y]

print(one_hot_encoded.shape)

# printing out result
print(torch_x.shape)
print(torch_y.shape)

# split train, val, test set
split = 38000
train_x = torch_x[:split]
train_y = torch_y[:split]
# train_y = one_hot_encoded[:split]

val_x = torch_x[split:]
val_y = torch_y[split:]
# val_y = one_hot_encoded[split:]

# printing out result
print(train_x.shape)
print(val_x.shape)

# batching data
batch_size = 32
def train_loader():
    num_batches = train_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = train_x[batch_start:batch_end,:]
        batch_Y = train_y[batch_start:batch_end]
        yield batch_X, batch_Y

def val_loader():
    num_batches = val_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = val_x[batch_start:batch_end,:]
        batch_Y = val_y[batch_start:batch_end]
        yield batch_X, batch_Y

       label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
41995      0       0       0       0       0       0       0       0       0   
41996      1       0       0       0       0       0       0       0       0   
41997      7       0       0       0       0       0       0       0       0   
41998      6       0       0       0       0       0       0       0       0   
41999      9       0       0       0       0       0       0       0       0   

       pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  \
41995       0  ...         0         0         0         0         0   
41996       0  ...         0         0         0         0         0   
41997       0  ...         0         0         0         0         0   
41998       0  ...         0         0         0         0         0   
41999       0  ...         0         0         0         0         0   

       pixel779  pixel780  pixel781  pixel782  pixel783  
41995         0         0   

In [190]:
# from torchvision import datasets, transforms
# from torch.utils.data import random_split, DataLoader
# train_data = datasets.MNIST('data', train=True, download=False, transform=transforms.ToTensor())
# train, val = random_split(train_data, [55000, 5000])

# # Access a sample data point
# sample_data = train[2]

# print(sample_data)
# # Get the shape of the sample data
# sample_data_shape = sample_data[0].shape
# print("Shape of the sample data:", sample_data_shape)

# train = train[:42000]
# val = val[:4000]
# train_loader = DataLoader(train, batch_size=32)
# val_loader = DataLoader(val, batch_size=32)

In [216]:
# prepare network
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
).cuda()

In [217]:
# Define my optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [218]:
# Define loss
loss = nn.CrossEntropyLoss()

In [219]:
# My Training loops
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = list()
    accuracy = list()
    for batch in train_loader():
        x, y = batch

        # print(x.size())
        # print(y)

        # # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)

        # print(x.size())

        # batch b
        # x 28*28

        # 1. forward
        # print(x[0])
        l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        # 3. cleaning the gradients
        model.zero_grad()
            
        # 4. accumulate the partial derivatives of J wrt params
        J.backward()

        # 5. step in hte opposite direction of the gradient
        optimizer.step()

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, train loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' train accuracy: {torch.tensor(accuracy).mean()}')


    losses = list()
    accuracy = list()
    for batch in val_loader():
        x, y = batch
        
        # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)

        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, validation loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' val accuracy: {torch.tensor(accuracy).mean()}')
    

 Epoch 1, train loss: 0.4767756760120392,  train accuracy: 0.8518323302268982
 Epoch 1, validation loss: 0.2253873646259308,  val accuracy: 0.934499979019165
 Epoch 2, train loss: 0.18625009059906006,  train accuracy: 0.9430023431777954
 Epoch 2, validation loss: 0.1625669002532959,  val accuracy: 0.9514999985694885
 Epoch 3, train loss: 0.13127277791500092,  train accuracy: 0.9601674675941467
 Epoch 3, validation loss: 0.1419224888086319,  val accuracy: 0.956250011920929
 Epoch 4, train loss: 0.10100769996643066,  train accuracy: 0.9692765474319458
 Epoch 4, validation loss: 0.13105563819408417,  val accuracy: 0.9597499966621399
 Epoch 5, train loss: 0.08169876039028168,  train accuracy: 0.9748051762580872
 Epoch 5, validation loss: 0.12456286698579788,  val accuracy: 0.9612500071525574
