In [1]:
import torch
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
from letters_dataset import LettersDataset
from text_encoder import TextEncoder
import torch.nn as nn
from train_collections import DS_ARABIC_LETTERS, DS_HARAKAT
import numpy as np
from tqdm import tqdm

In [9]:

dim_vocab = len(DS_ARABIC_LETTERS)
dim_out = len(DS_HARAKAT) + 2
embedding_dim = 64
n_epochs = 1
batch_size = 64


In [3]:

dataset = LettersDataset()
loader = data.DataLoader(dataset, shuffle=True, batch_size=batch_size)

# load val data
# da = LettersDataset('clean_out/X_val.csv', 'clean_out/y_val.csv')


w = 495


In [14]:




class CharModel(nn.Module):
    def __init__(self):
        super().__init__()

        # embedding and LSTM layers
        self.embedding = nn.Embedding(dim_vocab, embedding_dim)

        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=256,
                            num_layers=1, batch_first=True )
        self.dropout = nn.Dropout(0.2)
        self.linear = nn.Linear(256, dim_out)

    def forward(self, x):
        # pass thru embedding layer
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = self.linear(self.dropout(x))
        return x



model = CharModel()

optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()
num_batches = len(loader)
print("Number of batches:", num_batches)
best_model = None
best_loss = np.inf
for epoch in range(n_epochs):
    model.train()
    for i, batch in tqdm(enumerate(loader)):
        X_batch = batch["input"]
        y_batch = batch["output"]
        y_pred = ''
        y_pred = model(X_batch)
        print(X_batch)
        y_pred = y_pred.transpose(1, 2) 
        # print(y_pred.shape)
        # print(y_batch.shape)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print("Epoch %d, batch %d: Loss = %.4f" % (epoch, i, loss))
        
    # Validation
    model.eval()
    loss = 0
    with torch.no_grad():
        for batch in loader:
            X_batch = batch["input"]
            y_batch = batch["output"]
            y_pred = model(X_batch)
            loss += loss_fn(y_pred, y_batch)
        if loss < best_loss:
            best_loss = loss
            best_model = model.state_dict()
        print("Epoch %d: Cross-entropy: %.4f" % (epoch, loss))


Number of batches: 1295


0it [00:00, ?it/s]

tensor([[58, 31, 27,  ..., 59, 59, 59],
        [58, 19,  0,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        ...,
        [58,  3, 23,  ..., 59, 59, 59],
        [58, 19, 32,  ..., 59, 59, 59],
        [58, 23, 11,  ..., 59, 59, 59]])


1it [00:00,  1.08it/s]

Epoch 0, batch 0: Loss = 2.8541
tensor([[58, 20, 26,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        [58, 12,  9,  ..., 59, 59, 59],
        ...,
        [19, 31, 12,  ..., 59, 59, 59],
        [58, 18, 27,  ..., 59, 59, 59],
        [58, 26, 22,  ..., 59, 59, 59]])


2it [00:01,  1.11it/s]

tensor([[58, 20, 26,  ..., 59, 59, 59],
        [58,  1, 24,  ..., 59, 59, 59],
        [32,  8,  0,  ..., 59, 59, 59],
        ...,
        [58, 22, 31,  ..., 59, 59, 59],
        [58, 26, 11,  ..., 59, 59, 59],
        [26, 20, 26,  ..., 59, 59, 59]])


3it [00:02,  1.02s/it]

tensor([[58, 22, 22,  ..., 59, 59, 59],
        [58, 22, 20,  ..., 59, 59, 59],
        [58, 19, 27,  ..., 59, 59, 59],
        ...,
        [58, 31, 24,  ..., 59, 59, 59],
        [58, 31, 27,  ..., 59, 59, 59],
        [58, 23, 24,  ..., 59, 59, 59]])


4it [00:03,  1.02it/s]

tensor([[58, 19,  8,  ..., 59, 59, 59],
        [58,  9, 26,  ..., 59, 59, 59],
        [58, 27,  2,  ..., 59, 59, 59],
        ...,
        [58, 56, 36,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        [58, 19, 25,  ..., 59, 59, 59]])


5it [00:04,  1.11it/s]

tensor([[58, 36,  5,  ..., 59, 59, 59],
        [58, 26, 20,  ..., 59, 59, 59],
        [58, 56, 36,  ..., 59, 59, 59],
        ...,
        [58, 31, 26,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        [58, 30, 27,  ..., 59, 59, 59]])


6it [00:05,  1.18it/s]

tensor([[58, 14,  5,  ..., 59, 59, 59],
        [58, 31, 27,  ..., 59, 59, 59],
        [58, 26, 22,  ..., 59, 59, 59],
        ...,
        [58, 25,  8,  ..., 59, 59, 59],
        [56, 36, 26,  ..., 59, 59, 59],
        [58, 19, 27,  ..., 59, 59, 59]])


7it [00:06,  1.22it/s]

tensor([[58, 23,  0,  ..., 36, 23, 24],
        [58, 26, 32,  ..., 59, 59, 59],
        [58, 19, 22,  ..., 59, 59, 59],
        ...,
        [58, 25, 26,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        [58, 31,  5,  ..., 59, 59, 59]])


8it [00:06,  1.22it/s]

tensor([[ 3, 23, 36,  ..., 59, 59, 59],
        [20,  0, 22,  ..., 59, 59, 59],
        [58, 20, 22,  ..., 59, 59, 59],
        ...,
        [58, 23, 17,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        [58, 22, 26,  ..., 59, 59, 59]])


9it [00:07,  1.24it/s]

tensor([[26, 32, 24,  ..., 59, 59, 59],
        [26, 19, 27,  ..., 59, 59, 59],
        [20,  0, 22,  ..., 59, 59, 59],
        ...,
        [58, 31, 27,  ..., 59, 59, 59],
        [58, 20, 26,  ..., 59, 59, 59],
        [58, 56, 36,  ..., 59, 59, 59]])


10it [00:08,  1.18it/s]

tensor([[58,  1, 25,  ..., 59, 59, 59],
        [26, 17,  1,  ..., 59, 59, 59],
        [58, 37, 36,  ..., 59, 59, 59],
        ...,
        [58,  0, 22,  ..., 59, 59, 59],
        [26, 20,  7,  ..., 59, 59, 59],
        [22, 26, 36,  ..., 59, 59, 59]])


11it [00:09,  1.09it/s]

tensor([[20,  0, 22,  ..., 59, 59, 59],
        [58, 31, 26,  ..., 59, 59, 59],
        [58, 37, 36,  ..., 59, 59, 59],
        ...,
        [58, 36, 26,  ..., 59, 59, 59],
        [58, 22, 31,  ..., 59, 59, 59],
        [31,  2, 20,  ..., 59, 59, 59]])
