In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import torch 
from torch import nn
device='cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
df=pd.read_csv('train.csv')

In [3]:
df

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## data

In [4]:
X=df.drop(['label'],axis=1).to_numpy()
y=df['label'].to_numpy()

In [5]:
X = X / 255
X = X.reshape(-1,1,28, 28) 

In [6]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader,TensorDataset
import os

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

train_dataset=TensorDataset(torch.tensor(X_train,dtype=torch.float32),torch.tensor(y_train,dtype=torch.long))
test_dataset=TensorDataset(torch.tensor(X_test,dtype=torch.float32),torch.tensor(y_test,dtype=torch.long))

train_dataloader=DataLoader(dataset=train_dataset,batch_size=32,num_workers=os.cpu_count(),shuffle=True)
test_dataloader=DataLoader(dataset=test_dataset,batch_size=32,num_workers=os.cpu_count(),shuffle=False)

In [7]:
X.shape

(42000, 1, 28, 28)

In [8]:
X_train.shape

(31500, 1, 28, 28)

In [9]:
batch,label=next(iter(train_dataloader))

In [10]:
batch.shape

torch.Size([32, 1, 28, 28])

## make model

In [11]:
class DigitRecModel(nn.Module):
    def __init__(self,hidden_unit,input_shape,output_shape):
        super().__init__()
        self.conv_block_1=nn.Sequential(
            nn.Conv2d(in_channels=input_shape,out_channels=hidden_unit,kernel_size=2,stride=1,padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_unit,out_channels=hidden_unit,kernel_size=2,stride=1,padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
            )
        self.conv_block_2=nn.Sequential(
            nn.Conv2d(in_channels=hidden_unit,out_channels=hidden_unit,kernel_size=2,stride=1,padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_unit,out_channels=hidden_unit,kernel_size=2,stride=1,padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
            )
        self.classifier=nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=250,out_features=10)##10 because 0 to 9 
            
        )
    def forward(self,x):
        x=self.conv_block_1(x)
        #print(x.shape)
        x=self.conv_block_2(x)
        #print(x.shape)
        x=self.classifier(x)
        #print(x.shape)
        return x
model_digit_rec=DigitRecModel(input_shape=1,output_shape=10,hidden_unit=10)        

In [12]:
model_digit_rec(batch).shape

torch.Size([32, 10])

## lets train our model

In [13]:
optimizer=torch.optim.SGD(lr=.01,params=model_digit_rec.parameters())
loss_fn=nn.CrossEntropyLoss()

In [None]:
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score as accuracy_fn
epochs=20
model=model_digit_rec
for epoch in tqdm(range(epochs)):
    model.train()
    train_loss,train_acc=0,0
    for batch,(X,y) in enumerate(train_dataloader):
        X,y=X.to(device),y.to(device)

        y_pred=model(X)
        loss=loss_fn(y_pred,y)
        train_acc+=accuracy_fn(y,y_pred.argmax(dim=1))
        train_loss+=loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_acc/=len(train_dataloader)
    train_loss/=len(train_dataloader)
    model.eval()
    
    with torch.inference_mode():
        
        test_loss,test_acc=0,0
        for batch,(X,y) in enumerate(test_dataloader):
            X,y=X.to(device),y.to(device)

            y_pred=model(X)
        
            loss=loss_fn(y_pred,y)
        
            test_loss+=loss.item()
            test_acc+=accuracy_fn(y,y_pred.argmax(dim=1))

        test_loss/=len(test_dataloader)                 
        test_acc/=len(test_dataloader)
    print(f"Epochs :{epoch} | Train loss :{train_loss:.4} | Train accuracy :{train_acc:.3} | Test loss :{test_loss:.4} | Test accuracy :{test_acc:.3} " )


  0%|          | 0/20 [00:00<?, ?it/s]

Epochs :0 | Train loss :2.3 | Train accuracy :0.112 | Test loss :2.295 | Test accuracy :0.11 
