In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import re  
import warnings
from pathlib import Path
warnings.filterwarnings("ignore")
%matplotlib inline
plt.rcParams['figure.figsize'] = [8,5]
plt.rcParams['font.size'] =14
plt.rcParams['font.weight']= 'bold'
sns.set()

In [2]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets 
import torchvision.transforms as transforms

In [3]:
import spacy
import torchtext

In [4]:
nlp = spacy.load(r'C:\Users\besho\anaconda3\envs\pytorch\Lib\site-packages\en_core_web_lg\en_core_web_lg-3.4.1')

In [5]:
class CNN(nn.Module):
    def __init__(self , in_channels , out_channels , num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = in_channels , out_channels = out_channels 
                               ,kernel_size = 3 , stride = 1 , padding = 1 )
        self.pool = nn.MaxPool2d(kernel_size= 2 , stride = 2)
        self.conv2 = nn.Conv2d(in_channels = out_channels , out_channels = 32 
                              ,kernel_size= 3 , stride = 1, padding = 1)
        
        self.fc = nn.Linear(32*7*7 , num_classes)
    
    def forward(self , x):
        
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0] , -1)
        
        return self.fc(x)
model = CNN(1 , 16 , 10)
#check if the model works well
model( torch.randn(64,1,28,28) ).shape

torch.Size([64, 10])

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
in_channels = 1
out_channels = 16
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 11


In [7]:
train = datasets.MNIST(root = r'C:\Users\besho\data' , train = True 
                       , transform = transforms.ToTensor() ,download = False )
test = datasets.MNIST(root = r'C:\Users\besho\data' , train = False 
                       , transform = transforms.ToTensor() ,download = False )


In [8]:
train_loader = DataLoader(dataset= train ,batch_size = batch_size , shuffle = True)
test_loader = DataLoader(dataset= test , batch_size = batch_size , shuffle = True)

In [9]:
from tqdm.auto import tqdm

model = CNN(in_channels , out_channels , num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters() , lr = learning_rate)

In [10]:
def train_model(model , train , n_epochs , device):
    
    for epoch in tqdm(range(n_epochs)):    
        train_loss = 0
        for X,y in train:
            model.train()
            X ,y = X.to(device) , y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred , y)
            train_loss += loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        train_loss /= len(train)
        if(epoch % 10 ==0):
            print(f' loss for train = {train_loss} ')

In [11]:
def test_model(model , test , device):
    
    n_correct = 0
    n_samples = 0
    eval_loss = 0
    
    model.eval()
    with torch.inference_mode():
        for X,y in test:
            X ,y = X.to(device) , y.to(device)
            
            y_pred = model(X)
            y_labels = y_pred.argmax(1)
            
            n_correct += (y_labels == y).sum()
            n_samples += len(y)
            eval_loss += criterion(y_pred , y)
        
        eval_loss /= len(test)
    
    return f'eval_loss = {eval_loss} , acc = { float(n_correct)/float(n_samples)*100 }'
            

In [12]:
train_model(model , train_loader , num_epochs , device)

  0%|          | 0/11 [00:00<?, ?it/s]

 loss for train = 0.26799699664115906 
 loss for train = 0.02060873620212078 


In [13]:
test_model(model , test_loader , device)

'eval_loss = 0.05227799713611603 , acc = 98.53'