In [1]:
import torch
from torch import nn
from torch import optim
import pandas as pd
import numpy as np

In [2]:
# load the data
df = pd.read_csv('train.csv')

print(df.tail()) 

# creating tensor from targets_df 
df_x = df.iloc[:,1:]
df_y = df.iloc[:,0]
# normalize from 0:1
torch_x = torch.tensor(df_x.values).float() / 255
torch_y = torch.tensor(df_y.values).long()

# Convert to one-hot encoding
num_classes = 10  # Assuming you have 10 classes (0 to 9)
one_hot_encoded = torch.eye(num_classes)[torch_y]

print(one_hot_encoded.shape)

# printing out result
print(torch_x.shape)
print(torch_y.shape)

# split train, val, test set
split = 38000
train_x = torch_x[:split]
train_y = torch_y[:split]
# train_y = one_hot_encoded[:split]

val_x = torch_x[split:]
val_y = torch_y[split:]
# val_y = one_hot_encoded[split:]

# printing out result
print(train_x.shape)
print(val_x.shape)

# batching data
batch_size = 32
def train_loader():
    num_batches = train_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = train_x[batch_start:batch_end,:]
        batch_Y = train_y[batch_start:batch_end]
        yield batch_X, batch_Y

def val_loader():
    num_batches = val_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = val_x[batch_start:batch_end,:]
        batch_Y = val_y[batch_start:batch_end]
        yield batch_X, batch_Y

       label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
41995      0       0       0       0       0       0       0       0       0   
41996      1       0       0       0       0       0       0       0       0   
41997      7       0       0       0       0       0       0       0       0   
41998      6       0       0       0       0       0       0       0       0   
41999      9       0       0       0       0       0       0       0       0   

       pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  \
41995       0  ...         0         0         0         0         0   
41996       0  ...         0         0         0         0         0   
41997       0  ...         0         0         0         0         0   
41998       0  ...         0         0         0         0         0   
41999       0  ...         0         0         0         0         0   

       pixel779  pixel780  pixel781  pixel782  pixel783  
41995         0         0   

In [3]:
# process test set
# load the data
df_test = pd.read_csv('test.csv')

# creating tensor from targets_df 
df_test_x = df_test.iloc[:,:]

# normalize from 0:1
test_x = torch.tensor(df_test_x.values).float() / 255

# I dont have to do this; you are just testing, no need to batch
def test_loader():
    num_batches = test_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X = test_x[batch_start:batch_end,:]
        yield batch_X

In [4]:
# from torchvision import datasets, transforms
# from torch.utils.data import random_split, DataLoader
# train_data = datasets.MNIST('data', train=True, download=False, transform=transforms.ToTensor())
# train, val = random_split(train_data, [55000, 5000])

# # Access a sample data point
# sample_data = train[2]

# print(sample_data)
# # Get the shape of the sample data
# sample_data_shape = sample_data[0].shape
# print("Shape of the sample data:", sample_data_shape)

# train = train[:42000]
# val = val[:4000]
# train_loader = DataLoader(train, batch_size=32)
# val_loader = DataLoader(val, batch_size=32)

In [5]:
# prepare network
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    # nn.Dropout(0.1),    
    nn.Linear(64, 10)
).cuda()

In [55]:
# Define my optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [56]:
# Define loss
loss = nn.CrossEntropyLoss()

In [25]:
# My Training loops
nb_epochs = 50
for epoch in range(nb_epochs):
    losses = list()
    accuracy = list()
    for batch in train_loader():
        x, y = batch

        # print(x.size())
        # print(y)

        # # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)

        # print(x.size())

        # batch b
        # x 28*28

        # 1. forward
        # print(x[0])
        l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        # 3. cleaning the gradients
        model.zero_grad()
            
        # 4. accumulate the partial derivatives of J wrt params
        J.backward()

        # 5. step in hte opposite direction of the gradient
        optimizer.step()

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, train loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' train accuracy: {torch.tensor(accuracy).mean()}')


    losses = list()
    accuracy = list()
    for batch in val_loader():
        x, y = batch
        
        # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)

        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, validation loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' val accuracy: {torch.tensor(accuracy).mean()}')
    

torch.Size([784])


RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [32, 784]

In [9]:
# evaluate on the test set
import csv
file_path = 'submission.csv'
# Open the CSV file in write mode
j = 0
with open(file_path, 'w', newline='') as txtfile:
    # Create a CSV writer object
    csv_writer = csv.writer(txtfile)
    csv_writer.writerow(['ImageId', 'Label'])
    for batch in test_loader():
        x = batch
        
        # for torchvision dataset
        b = x.size(0)
        x = x.view(b, -1)
    
        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits
    
        out = l.detach().argmax(dim=1).cpu().float()
        # print('output', out)

        
        for i in range(0, len(x)):
            csv_writer.writerow([int(j+1), int(np.asarray(out[i]))])
            j = j+1

# Define the file path
file_path = 'submission.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
print(df.head())


   ImageId  Label
0        1      2
1        2      0
2        3      9
3        4      9
4        5      3


In [54]:
# printing out result
print(torch_x.shape)
print(torch_y.shape)

torch_x_image = torch_x.view(-1, 1, 28, 28)

# split train, val, test set
split = 38000
train_x_image = torch_x_image[:split]
train_y = torch_y[:split]
# train_y = one_hot_encoded[:split]

val_x_image = torch_x_image[split:]
val_y = torch_y[split:]
# val_y = one_hot_encoded[split:]

# printing out result
print(train_x_image.shape)
print(val_x_image.shape)

# batching data
batch_size = 32
def train_loader():
    num_batches = train_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X_image = train_x_image[batch_start:batch_end,:]
        batch_Y = train_y[batch_start:batch_end]
        yield batch_X_image, batch_Y

def val_loader():
    num_batches = val_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X_image = val_x_image[batch_start:batch_end,:]
        batch_Y = val_y[batch_start:batch_end]
        yield batch_X_image, batch_Y
        
# Creating a CNN class
class ConvNeuralNet(nn.Module):
	#  Determine what layers and their order in CNN object 
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        self.fc1 = nn.Linear(1024, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)
    
    # Progresses data across layers    
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = self.max_pool1(out)
        
        out = self.conv_layer3(out)
        out = self.conv_layer4(out)
        out = self.max_pool2(out)
                
        out = out.reshape(out.size(0), -1)
        
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

model = ConvNeuralNet(10).cuda()

torch.Size([42000, 784])
torch.Size([42000])
torch.Size([38000, 1, 28, 28])
torch.Size([4000, 1, 28, 28])


In [57]:
# My Training loops
nb_epochs = 50
for epoch in range(nb_epochs):
    losses = list()
    accuracy = list()
    for batch in train_loader():
        x, y = batch

        # print(x.size())
        # print(y)

        # # for torchvision dataset
        # b = x.size(0)
        # x = x.view(b, -1)

        # print(x.size())

        # batch b
        # x 28*28

        # 1. forward
        # print(x[0])
        l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        # 3. cleaning the gradients
        model.zero_grad()
            
        # 4. accumulate the partial derivatives of J wrt params
        J.backward()

        # 5. step in hte opposite direction of the gradient
        optimizer.step()

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, train loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' train accuracy: {torch.tensor(accuracy).mean()}')


    losses = list()
    accuracy = list()
    for batch in val_loader():
        x, y = batch
        
        # # for torchvision dataset
        # b = x.size(0)
        # x = x.view(b, -1)

        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits

        # 2. compute the objective function
        J = loss(l, y.cuda())

        losses.append(J.item())
        accuracy.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

    print(f' Epoch {epoch +1}, validation loss: {torch.tensor(losses).mean()}', end=', ')
    print(f' val accuracy: {torch.tensor(accuracy).mean()}')
    

 Epoch 1, train loss: 0.2350294440984726,  train accuracy: 0.9244155287742615
 Epoch 1, validation loss: 0.09493966400623322,  val accuracy: 0.9670000076293945
 Epoch 2, train loss: 0.06232268735766411,  train accuracy: 0.9809130430221558
 Epoch 2, validation loss: 0.07293308526277542,  val accuracy: 0.9782500267028809
 Epoch 3, train loss: 0.041058119386434555,  train accuracy: 0.9876000285148621
 Epoch 3, validation loss: 0.07747603207826614,  val accuracy: 0.9787499904632568
 Epoch 4, train loss: 0.03091043420135975,  train accuracy: 0.9905223250389099
 Epoch 4, validation loss: 0.0742223709821701,  val accuracy: 0.981249988079071
 Epoch 5, train loss: 0.022718852385878563,  train accuracy: 0.9927864074707031
 Epoch 5, validation loss: 0.09567372500896454,  val accuracy: 0.9769999980926514
 Epoch 6, train loss: 0.017699792981147766,  train accuracy: 0.9942607283592224
 Epoch 6, validation loss: 0.0882871225476265,  val accuracy: 0.9789999723434448
 Epoch 7, train loss: 0.01640133932

In [59]:
# process test set
# load the data
df_test = pd.read_csv('test.csv')

# creating tensor from targets_df 
df_test_x = df_test.iloc[:,:]

# normalize from 0:1
test_x = torch.tensor(df_test_x.values).float() / 255

test_x_image = test_x.view(-1, 1, 28, 28)

# I dont have to do this; you are just testing, no need to batch
def test_loader():
    num_batches = test_x.shape[0] // batch_size
    for i in range(num_batches):
        batch_start = i * batch_size
        batch_end = (i + 1) * batch_size
        batch_X_image = test_x_image[batch_start:batch_end,:]
        yield batch_X_image

# evaluate on the test set
import csv
file_path = 'submission.csv'
# Open the CSV file in write mode
j = 0
with open(file_path, 'w', newline='') as txtfile:
    # Create a CSV writer object
    csv_writer = csv.writer(txtfile)
    csv_writer.writerow(['ImageId', 'Label'])
    for batch in test_loader():
        x = batch
           
        # 1. forward
        with torch.no_grad():
            l = model(x.cuda())    # l:logits
    
        out = l.detach().argmax(dim=1).cpu().float()
        # print('output', out)

        
        for i in range(0, len(x)):
            csv_writer.writerow([int(j+1), int(np.asarray(out[i]))])
            j = j+1

# Define the file path
file_path = 'submission.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
print(df.head())

   ImageId  Label
0        1      2
1        2      0
2        3      9
3        4      0
4        5      3


In [69]:
# A more flexible model
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)

    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h1 + h2)  # this allows for the partial gradients in the deeper layers (first ones) to update faster
        logits = self.l3(do)
        return logits

model = ResNet().cuda()