In [1]:
import torch
from torch import nn
from torch import optim
import pandas as pd
import numpy as np

In [2]:
# load the data
df = pd.read_csv('train.csv')

print(df.tail()) 

# creating tensor from targets_df 
df_x = df.iloc[:,1:]
df_y = df.iloc[:,0]
# normalize from 0:1
torch_x = torch.tensor(df_x.values).float() / 255
torch_y = torch.tensor(df_y.values).long()

# Convert to one-hot encoding
num_classes = 10  # Assuming you have 10 classes (0 to 9)
one_hot_encoded = torch.eye(num_classes)[torch_y]

print(one_hot_encoded.shape)

# printing out result
print(torch_x.shape)
print(torch_y.shape)

# split train, val, test set
split = 38000
train_x = torch_x[:split]
train_y = torch_y[:split]
# train_y = one_hot_encoded[:split]

val_x = torch_x[split:]
val_y = torch_y[split:]
# val_y = one_hot_encoded[split:]

# printing out result
print(train_x.shape)
print(val_x.shape)

# batching data
batch_size = 32
def train_loader():
    num_batches = train_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = train_x[batch_start:batch_end,:]
        batch_Y = train_y[batch_start:batch_end]
        yield batch_X, batch_Y

def val_loader():
    num_batches = val_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = val_x[batch_start:batch_end,:]
        batch_Y = val_y[batch_start:batch_end]
        yield batch_X, batch_Y

       label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
41995      0       0       0       0       0       0       0       0       0   
41996      1       0       0       0       0       0       0       0       0   
41997      7       0       0       0       0       0       0       0       0   
41998      6       0       0       0       0       0       0       0       0   
41999      9       0       0       0       0       0       0       0       0   

       pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  \
41995       0  ...         0         0         0         0         0   
41996       0  ...         0         0         0         0         0   
41997       0  ...         0         0         0         0         0   
41998       0  ...         0         0         0         0         0   
41999       0  ...         0         0         0         0         0   

       pixel779  pixel780  pixel781  pixel782  pixel783  
41995         0         0   

In [32]:
# process test set
# load the data
df_test = pd.read_csv('test.csv')

# creating tensor from targets_df 
df_test_x = df_test.iloc[:,:]

# normalize from 0:1
test_x = torch.tensor(df_test_x.values).float() / 255

# I dont have to do this; you are just testing, no need to batch
def test_loader():
    num_batches = test_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = test_x[batch_start:batch_end,:]
        yield batch_X

In [33]:
# from torchvision import datasets, transforms
# from torch.utils.data import random_split, DataLoader
# train_data = datasets.MNIST('data', train=True, download=False, transform=transforms.ToTensor())
# train, val = random_split(train_data, [55000, 5000])

# # Access a sample data point
# sample_data = train[2]

# print(sample_data)
# # Get the shape of the sample data
# sample_data_shape = sample_data[0].shape
# print("Shape of the sample data:", sample_data_shape)

# train = train[:42000]
# val = val[:4000]
# train_loader = DataLoader(train, batch_size=32)
# val_loader = DataLoader(val, batch_size=32)

In [48]:
# prepare network
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    # nn.Dropout(0.1),    
    nn.Linear(64, 10)
).cuda()

In [70]:
# Define my optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [71]:
# Define loss
loss = nn.CrossEntropyLoss()

In [72]:
# My Training loops
nb_epochs = 50
for epoch in range(nb_epochs):
    losses = list()
    accuracy = list()
    for batch in train_loader():
        x, y = batch

        # print(x.size())
        # print(y)

        # # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)

        # print(x.size())

        # batch b
        # x 28*28

        # 1. forward
        # print(x[0])
        l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        # 3. cleaning the gradients
        model.zero_grad()
            
        # 4. accumulate the partial derivatives of J wrt params
        J.backward()

        # 5. step in hte opposite direction of the gradient
        optimizer.step()

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, train loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' train accuracy: {torch.tensor(accuracy).mean()}')


    losses = list()
    accuracy = list()
    for batch in val_loader():
        x, y = batch
        
        # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)

        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, validation loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' val accuracy: {torch.tensor(accuracy).mean()}')
    

 Epoch 1, train loss: 0.390991747379303,  train accuracy: 0.8842670321464539
 Epoch 1, validation loss: 0.2302495688199997,  val accuracy: 0.9300000071525574
 Epoch 2, train loss: 0.17788442969322205,  train accuracy: 0.9457666277885437
 Epoch 2, validation loss: 0.17465655505657196,  val accuracy: 0.9449999928474426
 Epoch 3, train loss: 0.13184835016727448,  train accuracy: 0.9592986702919006
 Epoch 3, validation loss: 0.14148761332035065,  val accuracy: 0.9567499756813049
 Epoch 4, train loss: 0.10402491688728333,  train accuracy: 0.9681708216667175
 Epoch 4, validation loss: 0.145673006772995,  val accuracy: 0.9597499966621399
 Epoch 5, train loss: 0.0867791473865509,  train accuracy: 0.9739890694618225
 Epoch 5, validation loss: 0.13472117483615875,  val accuracy: 0.9605000019073486
 Epoch 6, train loss: 0.07387236505746841,  train accuracy: 0.9763584733009338
 Epoch 6, validation loss: 0.12762469053268433,  val accuracy: 0.9617499709129333
 Epoch 7, train loss: 0.0615948587656021

In [81]:
# evaluate on the test set
import csv
file_path = 'submission.csv'
# Open the CSV file in write mode
j = 0
with open(file_path, 'w', newline='') as txtfile:
    # Create a CSV writer object
    csv_writer = csv.writer(txtfile)
    csv_writer.writerow(['ImageId', 'Label'])
    for batch in test_loader():
        x = batch
        
        # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)
    
        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits
    
        out = l.detach().argmax(dim=1).cpu().float()
        # print('output', out)

        
        for i in range(0, len(x)):
            csv_writer.writerow([int(j+1), int(np.asarray(out[i]))])
            j = j+1

# Define the file path
file_path = 'submission.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
print(df.head())


   ImageId  Label
0        1      2
1        2      0
2        3      9
3        4      9
4        5      3


In [69]:
# A more flexible model
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)

    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h1 + h2)  # this allows for the partial gradients in the deeper layers (first ones) to update faster
        logits = self.l3(do)
        return logits

model = ResNet().cuda()