In [1]:
import torch
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
from letters_dataset import LettersDataset
from text_encoder import TextEncoder
import torch.nn as nn
from train_collections import DS_ARABIC_LETTERS, DS_HARAKAT
import numpy as np
from tqdm import tqdm

# autoreload notebook
%load_ext autoreload

In [6]:
import matplotlib.pyplot as plt


In [7]:



dim_vocab = len(DS_ARABIC_LETTERS)
dim_out = len(DS_HARAKAT) + 2
embedding_dim = 64
n_epochs = 2
batch_size = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [8]:

dataset = LettersDataset(device=device)
loader = data.DataLoader(dataset, shuffle=True, batch_size=batch_size)

# load val data
# da = LettersDataset('clean_out/X_val.csv', 'clean_out/y_val.csv')


w = 495


In [9]:
class CharModel(nn.Module):
    def __init__(self):
        super().__init__()

        # embedding and LSTM layers
        self.embedding = nn.Embedding(dim_vocab, embedding_dim)

        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=256,
                            num_layers=1, batch_first=True )
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, dim_out)

    def forward(self, x):
        # pass thru embedding layer
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = self.linear(self.dropout(x))
        return x



model = CharModel().to(device)

optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()
num_batches = len(loader)
print("Number of batches:", num_batches)
best_model = None
best_loss = np.inf
# Training loop
losses = []
for epoch in range(n_epochs):
    model.train()
    for i, (X_batch,y_batch) in tqdm(enumerate(loader)):
        y_pred = ''
        y_pred = model(X_batch)
        y_pred = y_pred.transpose(1, 2) 
        # print(y_pred.shape)
        # print(y_batch.shape)
        loss = loss_fn(y_pred, y_batch)
        losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print("Epoch %d, batch %d: Loss = %.4f" % (epoch, i, loss))
        
    # Validation
    model.eval()
    loss = 0
    with torch.no_grad():
        for (X_batch,y_batch) in loader:
            y_pred = model(X_batch)
            y_pred = y_pred.transpose(1, 2) 
            
            loss += loss_fn(y_pred, y_batch)
        if loss < best_loss:
            best_loss = loss
            best_model = model.state_dict()
        print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))


Number of batches: 1295


3it [00:01,  3.29it/s]

Epoch 0, batch 0: Loss = 2.7165


103it [00:06, 18.23it/s]

Epoch 0, batch 100: Loss = 0.1564


203it [00:12, 18.40it/s]

Epoch 0, batch 200: Loss = 0.1511


303it [00:17, 18.13it/s]

Epoch 0, batch 300: Loss = 0.1147


403it [00:23, 17.97it/s]

Epoch 0, batch 400: Loss = 0.1037


503it [00:28, 18.22it/s]

Epoch 0, batch 500: Loss = 0.1060


603it [00:34, 18.08it/s]

Epoch 0, batch 600: Loss = 0.0773


703it [00:39, 18.12it/s]

Epoch 0, batch 700: Loss = 0.0844


803it [00:45, 18.09it/s]

Epoch 0, batch 800: Loss = 0.0764


903it [00:51, 18.07it/s]

Epoch 0, batch 900: Loss = 0.0954


1003it [00:56, 17.87it/s]

Epoch 0, batch 1000: Loss = 0.0901


1103it [01:02, 17.89it/s]

Epoch 0, batch 1100: Loss = 0.0950


1203it [01:07, 18.00it/s]

Epoch 0, batch 1200: Loss = 0.0744


1295it [01:12, 17.77it/s]


Epoch 0: Cross-entropy: 91.7820


2it [00:00, 17.34it/s]

Epoch 1, batch 0: Loss = 0.0760


104it [00:05, 17.92it/s]

Epoch 1, batch 100: Loss = 0.0927


204it [00:11, 17.62it/s]

Epoch 1, batch 200: Loss = 0.0833


304it [00:17, 17.84it/s]

Epoch 1, batch 300: Loss = 0.0781


404it [00:22, 18.15it/s]

Epoch 1, batch 400: Loss = 0.0614


504it [00:28, 17.98it/s]

Epoch 1, batch 500: Loss = 0.0712


604it [00:33, 17.82it/s]

Epoch 1, batch 600: Loss = 0.0557


704it [00:39, 17.94it/s]

Epoch 1, batch 700: Loss = 0.0700


804it [00:44, 18.09it/s]

Epoch 1, batch 800: Loss = 0.0528


904it [00:50, 18.18it/s]

Epoch 1, batch 900: Loss = 0.0664


1004it [00:55, 18.25it/s]

Epoch 1, batch 1000: Loss = 0.0522


1104it [01:01, 18.18it/s]

Epoch 1, batch 1100: Loss = 0.0632


1204it [01:07, 18.07it/s]

Epoch 1, batch 1200: Loss = 0.0763


1295it [01:12, 17.96it/s]


Epoch 1: Cross-entropy: 78.2119


In [10]:
# Plotting
plt.plot(losses)
plt.xlabel('Batch number')
plt.ylabel('Loss')
plt.title('Batch vs Loss during Training')
plt.show()

: 

In [None]:
val_dataset = LettersDataset('clean_out/X_val.csv', 'clean_out/y_val.csv', device=device)   

val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size=batch_size)

# evaluaate accuracy on validation set


model.eval()
correct = 0
total = 0

with torch.no_grad():
    for (X_batch,y_batch) in val_loader:
        is_padding = X_batch == val_dataset.char_encoder.get_pad_token()
        y_pred = model(X_batch)
        y_pred = y_pred.transpose(1, 2) 
        _, predicted = torch.max(y_pred.data, 1)
        # Count only non-padding characters
        total += torch.sum(~is_padding).item()
        
        # Count correct predictions
        correct += torch.sum((predicted == y_batch) & (~is_padding)).item()
print("Accuracy: %.2f%%" % (100 * correct / total))



w = 500
Accuracy: 86.98%


In [None]:
print('DER of the network on the validation set: %d %%' % (100 * (1 - correct / total)))


DER of the network on the validation set: 13 %
