In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import re  
import warnings
from pathlib import Path
warnings.filterwarnings("ignore")
%matplotlib inline
plt.rcParams['figure.figsize'] = [8,5]
plt.rcParams['font.size'] =14
plt.rcParams['font.weight']= 'bold'
sns.set()

In [2]:
import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets 
import torchvision.transforms as transforms

In [3]:
import spacy
import torchtext

In [4]:
nlp = spacy.load(r'C:\Users\besho\anaconda3\envs\pytorch\Lib\site-packages\en_core_web_lg\en_core_web_lg-3.4.1')

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class LSTM(nn.Module):
    def __init__(self , input_size , hidden_size , num_layers , n_classes):
        super().__init__()
        self.hidden_size = hidden_size 
        self.num_layers = num_layers 
        self.lstm = nn.LSTM(input_size , hidden_size , num_layers , batch_first = True)
        self.fc = nn.Linear(hidden_size , n_classes)
    
    def forward(self , x):
        h0 = torch.zeros(self.num_layers , x.size(0) , self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers , x.size(0) , self.hidden_size).to(device)
        
        out , _ = self.lstm(x , (h0,c0))
        out = self.fc(out[:,-1 ,:])
        
        return out
# check if the model works 
model = LSTM(28 , 128 , 3 , 10).to(device)
model( torch.randn(64 , 28 , 28).to(device) ).shape

torch.Size([64, 10])

In [6]:
input_size = 28
seq_length = 28
hidden_size = 128
n_layers = 3
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 11


In [7]:
train = datasets.MNIST(root = r'C:\Users\besho\data' , train = True 
                       , transform = transforms.ToTensor() ,download = False )
test = datasets.MNIST(root = r'C:\Users\besho\data' , train = False 
                       , transform = transforms.ToTensor() ,download = False )


In [8]:
train_loader = DataLoader(dataset= train ,batch_size = batch_size , shuffle = True)
test_loader = DataLoader(dataset= test , batch_size = batch_size , shuffle = True)

In [10]:
from tqdm.auto import tqdm

model = LSTM(input_size , hidden_size , n_layers , num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters() , lr = learning_rate)

In [11]:
def train_model(model , train , n_epochs , device):
    
    for epoch in tqdm(range(n_epochs)):    
        train_loss = 0
        for X,y in train:
            model.train()
            X ,y = X.to(device) , y.to(device)
            
            y_pred = model(X.squeeze(1))
            loss = criterion(y_pred , y)
            train_loss += loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        train_loss /= len(train)
        if(epoch % 10 ==0):
            print(f' loss for train = {train_loss} ')

In [12]:
def test_model(model , test , device):
    
    n_correct = 0
    n_samples = 0
    eval_loss = 0
    
    model.eval()
    with torch.inference_mode():
        for X,y in test:
            X ,y = X.to(device) , y.to(device)
            
            y_pred = model(X.squeeze(1))
            y_labels = y_pred.argmax(1)
            
            n_correct += (y_labels == y).sum()
            n_samples += len(y)
            eval_loss += criterion(y_pred , y)
        
        eval_loss /= len(test)
    
    return f'eval_loss = {eval_loss} , acc = { float(n_correct)/float(n_samples)*100 }'
            

In [13]:
train_model(model , train_loader , num_epochs , device)

  0%|          | 0/11 [00:00<?, ?it/s]

 loss for train = 0.47844573855400085 
 loss for train = 0.021126551553606987 


In [14]:
test_model(model , test_loader , device)

'eval_loss = 0.0391162745654583 , acc = 98.89'