### Imports

In [28]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from tqdm import tqdm
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from TorchCRF import CRF
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split
import gensim
import numpy as np
from utilities import *

%run updatePreprocessing.ipynb

[['ق', 'و', 'ل', 'ه'], ['ل', 'ع', 'د', 'م'], ['م', 'ا'], ['ت', 'ت', 'ع', 'ل', 'ق'], ['إ', 'ل', 'خ'], ['أ', 'ي'], ['ا', 'ل', 'و', 'ص', 'ي', 'ة'], ['ق', 'و', 'ل', 'ه'], ['م', 'ا'], ['م', 'ر'], ['أ', 'ي'], ['ق', 'ب', 'ي', 'ل'], ['ق', 'و', 'ل'], ['ا', 'ل', 'م', 'ت', 'ن'], ['ل', 'غ', 'ت'], ['و', 'ل', 'و'], ['ا', 'ق', 'ت', 'ص', 'ر'], ['ع', 'ل', 'ى'], ['أ', 'و', 'ص', 'ي', 'ت'], ['ل', 'ه'], ['ب', 'ش', 'ا', 'ة'], ['أ', 'و'], ['أ', 'ع', 'ط', 'و', 'ه'], ['ش', 'ا', 'ة'], ['و', 'ل', 'ا'], ['غ', 'ن', 'م'], ['ل', 'ه'], ['ع', 'ن', 'د'], ['ا', 'ل', 'م', 'و', 'ت'], ['ه', 'ل'], ['ت', 'ب', 'ط', 'ل'], ['ا', 'ل', 'و', 'ص', 'ي', 'ة'], ['أ', 'و'], ['ي', 'ش', 'ت', 'ر', 'ى'], ['ل', 'ه'], ['ش', 'ا', 'ة'], ['و', 'ي', 'ؤ', 'خ', 'ذ'], ['م', 'ن'], ['ق', 'و', 'ل', 'ه'], ['ا', 'ل', 'آ', 'ت', 'ي'], ['ك', 'م', 'ا'], ['ل', 'و'], ['ل', 'م'], ['ي', 'ق', 'ل'], ['م', 'ن'], ['م', 'ا', 'ل', 'ي'], ['و', 'ل', 'ا'], ['م', 'ن'], ['غ', 'ن', 'م', 'ي'], ['أ', 'ن', 'ه', 'ا'], ['ل', 'ا'], ['ت', 'ب', 'ط', 'ل'], ['و', 'ع', 'ب', 'ا', 'ر',

### Constants

In [5]:
EMBEDDING_DIM = 300
HIDDEN_SIZE = 512
NUM_LAYERS = 1
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
BATCH_SIZE = 256
VOCAB_SIZE = len(basic_arabic_letters) + 1
LABELS_SIZE = len(DIACRITICS)
WINDOW_SIZE_BEFORE = 7
WINDOW_SIZE_AFTER = 3

TRAIN_PATH = "../dataset/train.txt"
VAL_PATH = "../dataset/val.txt"
LSTM_PATH="./models/lstm.pth"
RNN_PATH="./models/rnn.pth"
CNN_PATH = "./models/cnn.pth"
CRF_Val_PATH="./models/crf_val.pth"
CRF_PATH="./models/crf.pth"
CNN_val_PATH="./models/cnn_val.pth"

### Model building

### RNN

In [6]:
class RNN(nn.Module):
    def __init__(self, vocab_size, n_classes, embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS):
        """
        The constructor of our RNN model
        Inputs:
        - vacab_size: the number of unique characters
        - embedding_dim: the embedding dimension
        - n_classes: the number of final classes (diacritics)
        """
        super(RNN, self).__init__()

        # (1) Create an embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # (2) Create an LSTM layer with hidden size = hidden_size and batch_first = True
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        
        # (3) Create a linear layer with number of neorons = n_classes
        self.linear = nn.Linear(hidden_size * 2, n_classes)

    def forward(self, sentences):
        """
        This function does the forward pass of our model
        Inputs:
        - sentences: tensor of shape (batch_size, max_length)

        Returns:
        - final_output: tensor of shape (batch_size, max_length, n_classes)
        """

        final_output = None
        
        embeddings = self.embedding(sentences)
        lstm_out, _ = self.lstm(embeddings)
        output = self.linear(lstm_out)
        # final_output = F.softmax(output, dim=1)
        return output

### CNN

In [7]:
class CNN(nn.Module):
    def __init__(self, vocab_size, n_classes, embedding_dim=EMBEDDING_DIM):
        super(CNN, self).__init__()

        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # Convolutional layers
        self.conv1 = nn.Conv1d(embedding_dim, 256, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1)

        # Max pooling layers
        # self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(128, 256)
        self.fc2 = nn.Linear(256, n_classes)

    def forward(self, x):
        print("1", x.shape)
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        print("2", x.shape)
        # Convolutional layers with ReLU activation and max pooling
        x = F.relu(self.conv1(x))
        # x = self.pool(x)
        print("3", x.shape)
        x = F.relu(self.conv2(x))
        # x = self.pool(x)
        print("4", x.shape)
        x = F.relu(self.conv3(x))
        # x = self.pool(x)
        print("5", x.shape)

        # Fully connected layers with ReLU activation
        x = x.view(-1, 128)
        x = F.relu(self.fc1(x))
        print("6", x.shape)
        x = self.fc2(x)
        print("7", x.shape)

        return x

In [8]:
class RNN_CNN(nn.Module):
    def __init__(self, vocab_size, n_classes, embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS):
        super(RNN_CNN, self).__init__()

        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # CNN layer
        self.conv1d = nn.Conv1d(in_channels=embedding_dim, out_channels=64, kernel_size=3, padding=1)  # Adjust parameters as needed

        # LSTM layer
        self.lstm = nn.LSTM(64, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

        # Linear layer
        self.linear = nn.Linear(hidden_size * 2, n_classes)

    def forward(self, sentences):
        embeddings = self.embedding(sentences)

        # Pass embeddings through CNN layer
        conv_out = self.conv1d(embeddings.permute(0, 2, 1))  # Reshape for CNN
        conv_out = torch.relu(conv_out)
        conv_out = conv_out.permute(0, 2, 1)  # Reshape back for LSTM

        # Pass CNN output through LSTM layer
        lstm_out, _ = self.lstm(conv_out)

        # Final output layer
        output = self.linear(lstm_out)
        return output


### CRF

In [9]:
class LSTM_CRF(nn.Module):
    def __init__(self, vocab_size, n_classes, embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS):
        super(LSTM_CRF, self).__init__()
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # BiLSTM layer
        self.bilstm = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

        # CRF layer
        self.crf = CRF(n_classes)

        # Linear layer
        self.linear = nn.Linear(hidden_size * 2, n_classes)
        # Create a CRF layer
        self.crf = CRF(n_classes, batch_first=True)

    def forward(self, sentences):
        # Input x is a sequence of indices
        embedded = self.embedding(sentences)

        # BiLSTM layer
        lstm_out, _ = self.bilstm(embedded)

        # Linear layer for classification
        linear_out = self.linear(lstm_out)
        if labels is not None:
            # Calculate the negative log-likelihood loss using the CRF layer
            loss = self.crf(output, labels)
            return -loss  # Return negative log-likelihood as we usually minimize it during training
        else:
            # If labels are not provided, return the raw output
            return output


In [10]:
class RNN_CRF(nn.Module):
    def __init__(self, vocab_size, n_classes, embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, dropout=0.5):
        super(RNN_CRF, self).__init__()

        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # LSTM layer
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

        # Linear layer
        self.linear = nn.Linear(hidden_size * 2, n_classes)
        self.dropout = nn.Dropout(dropout)  # Apply dropout before the linear layer

        # CRF layer
        self.crf = CRF(n_classes)  # Place the CRF layer after the linear layer

    def forward(self, word):
        embeddings = self.embedding(word)
        lstm_out, _ = self.lstm(embeddings)
        dropout_out = self.dropout(lstm_out)  # Apply dropout
        output = self.linear(dropout_out)
        return output  # Return raw output for CRF loss calculation

    def predict(self, word):
        output = self.forward(word)
        predictions = self.crf.decode(output)
        return predictions


In [11]:
class RNN_CRF_Pre_Trained(nn.Module):
    def __init__(self, vocab_size, n_classes, embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, dropout=0.5, pretrained_embedding=None, freeze_embedding=False):
        super(RNN_CRF_Pre_Trained, self).__init__()

        # Embedding layer
        if pretrained_embedding is not None:
            self.embedding = nn.Embedding.from_pretrained(pretrained_embedding, freeze=freeze_embedding)
        else:
            self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # LSTM layer
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True).double()

        # Linear layer
        self.linear = nn.Linear(hidden_size * 2, n_classes).float()
        self.dropout = nn.Dropout(dropout)  # Apply dropout before the linear layer

        # CRF layer
        self.crf = CRF(n_classes)  # Place the CRF layer after the linear layer

    def forward(self, sentences):
        embeddings = self.embedding(sentences)
        lstm_out, _ = self.lstm(embeddings)
        dropout_out = self.dropout(lstm_out)  # Apply dropout
        output = self.linear(dropout_out.float())
        return output  # Return raw output for CRF loss calculation

    def predict(self, sentences):
        output = self.forward(sentences)
        predictions = self.crf.decode(output)
        return predictions


In [12]:
class RNN_CRF_MultiLayer(nn.Module):
    def __init__(self, vocab_size, word_vocab_size, n_classes, embedding_dim=EMBEDDING_DIM, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, dropout=0.5):
        super(RNN_CRF_MultiLayer, self).__init__()

        # Word embedding layer
        self.word_embedding = nn.Embedding(word_vocab_size, embedding_dim)

        # Word LSTM layer
        self.word_lstm = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

        # Character embedding layer (already exists)
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        # Character LSTM layer (already exists)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)

        # Linear layer after concatenation
        self.linear = nn.Linear(hidden_size * 4, n_classes)  # Combined output from both LSTMs
        self.dropout = nn.Dropout(dropout)

        # CRF layer
        self.crf = CRF(n_classes)

    def forward(self, sentences, words):
        # Word layer processing
        word_embeddings = self.word_embedding(words).float()
        word_lstm_out, _ = self.word_lstm(word_embeddings)

        # Character layer processing
        embeddings = self.embedding(sentences)
        lstm_out, _ = self.lstm(embeddings)

        # Concatenate outputs
        combined_output = torch.cat([lstm_out, word_lstm_out], dim=2)

        # Linear transformation and CRF
        dropout_out = self.dropout(combined_output)
        output = self.linear(dropout_out)
        return output


### Train

In [13]:

def train(model, path,val_path, train_dataset, train_labels, val_dataset,val_labels,batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, learning_rate=LEARNING_RATE):
    """
    This function implements the training logic
    Inputs:
    - model: the model to be trained
    - train_dataset: the training set
    - batch_size: integer represents the number of examples per step
    - epochs: integer represents the total number of epochs (full training pass)
    - learning_rate: the learning rate to be used by the optimizer
    """

    # (1) create the dataloader of the training set (make the shuffle=True)
    tensor_train_dataset = TensorDataset(train_dataset, train_labels)
    train_dataloader = DataLoader(tensor_train_dataset, batch_size=batch_size, shuffle=True)
    tensor_val_dataset = TensorDataset(val_dataset, val_labels)
    val_dataloader = DataLoader(tensor_val_dataset, batch_size=batch_size, shuffle=False)

    # (2) make the criterion cross entropy loss
    criterion = torch.nn.CrossEntropyLoss()

    # (3) create the optimizer (Adam)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    # (4) create a learning rate scheduler (optional but recommended)
    scheduler = StepLR(optimizer, step_size=5, gamma=0.1)  # Adjust parameters as needed

    # GPU configuration
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()
    
    best_accuracy = 0.0
    best_accuracy_val=0.0
    for epoch_num in range(epochs):
        total_acc_train = 0
        total_loss_train = 0
        
        for train_input, train_label in tqdm(train_dataloader):
            # Zero your gradients
            optimizer.zero_grad()

            # Move the train input to the device
            train_label = train_label.to(device)

            # Move the train label to the device
            train_input = train_input.to(device)
            print(train_input)
            # Do the forward pass
            output = model(train_input).float()

            # Loss calculation
            batch_loss = criterion(output.view(-1, output.shape[-1]), train_label.view(-1))

            # Append the batch loss to the total_loss_train
            total_loss_train += batch_loss.item()
            
            # Calculate the batch accuracy (just add the number of correct predictions)
            # Compare predicted diacritic with true diacritic and count correct predictions
            correct_predictions = (output.argmax(dim=2) == train_label)

            # Calculate accuracy for the current batch
            acc = correct_predictions.sum().item()
            total_acc_train += acc

            # Do the backward pass
            batch_loss.backward()

            # Update the weights with your optimizer
            optimizer.step()     
        # Step the learning rate scheduler
        scheduler.step()
        # Calculate the epoch loss
        epoch_loss = total_loss_train / len(train_dataset)

        # Calculate the accuracy
        epoch_acc = total_acc_train / (len(train_dataset) * len(train_dataset[0]))

        print(f'Epochs: {epoch_num + 1} | Train Loss: {epoch_loss} \
            | Train Accuracy: {epoch_acc}\n')
        
        if epoch_acc > best_accuracy:
            best_accuracy = epoch_acc
            torch.save(model.state_dict(), path)
            print(f'Saved the best model with accuracy: {best_accuracy} to {path}\n')
        # Validation
        model.eval()  # Set the model to evaluation mode
        total_acc_val = 0
        total_loss_val = 0

        with torch.no_grad():
            for val_input, val_label in tqdm(val_dataloader):
                val_label = val_label.to(device)
                val_input = val_input.to(device)

                output = model(val_input)
                batch_loss = criterion(output.view(-1, output.shape[-1]), val_label.view(-1))
                total_loss_val += batch_loss.item()

                correct_predictions = (output.argmax(dim=2) == val_label)
                acc = correct_predictions.sum().item()
                total_acc_val += acc

        epoch_loss_val = total_loss_val / len(val_dataloader)
        epoch_acc_val = total_acc_val / (len(val_dataset) * len(val_dataset[0]))

        print(f'Epochs: {epoch_num + 1} | Validation Loss: {epoch_loss_val} | Validation Accuracy: {epoch_acc_val}')

        if epoch_acc_val > best_accuracy_val:
            best_accuracy_val = epoch_acc_val
            torch.save(model.state_dict(), val_path)
            print(f'Saved the best model with validation accuracy: {best_accuracy_val} to {val_path}')
        model.train()


In [14]:
t_model = gensim.models.Word2Vec.load('models/full_grams_cbow_300_twitter.mdl')
embedding_dim = t_model.vector_size


In [30]:
aravec_embeddings_train = []
corpus=  readFile(TRAIN_PATH)

x_train = []
y_train = []

for sentence in corpus[:100]:
	# Clean each sentence in the corpus
	# Get the char list for each word in the sentence and its corresponding diacritics
	char_list, diacritics_list = separate_words_and_diacritics(sentence.strip())
	words = [''.join(sublist) for sublist in char_list]
	windows=get_all_windows(' '.join(words), WINDOW_SIZE_BEFORE, WINDOW_SIZE_AFTER)
	# print(windows)
	for window in windows:
		embeddings = [t_model.wv[clean_str(word)] if clean_str(word) in t_model.wv else np.zeros(embedding_dim) for word in window]
		# print(np.mean(embeddings, axis=0))
		aravec_embeddings_train.append(np.mean(embeddings, axis=0))
	# if(char_list)
	# print(char_list)
	x_train.append(char_list)
	y_train.append(diacritics_list)

X_train_padded = [torch.tensor([char_to_index[char] for char in word]) for sentence in x_train for word in sentence]
X_train_padded = pad_sequence(X_train_padded, batch_first=True)

y_train_padded = [torch.tensor([diacritic_to_index[char] for char in word]) for sentence in y_train for word in sentence]
y_train_padded = pad_sequence(y_train_padded, batch_first=True)

In [27]:
aravec_embeddings_train = []
corpus=  readFile(TRAIN_PATH)

x_train = []
y_train = []

for sentence in corpus[:5]:
	# Clean each sentence in the corpus
	# Get the char list for each word in the sentence and its corresponding diacritics
    char_list, diacritics_list = separate_words_and_diacritics(sentence.strip())
    # print(char_list)
    joined_lists = [''.join(sublist) for sublist in char_list if sublist != []]
    # print(joined_lists)
    for ele in joined_lists:
        ele_cleaned=clean_str(ele)
        if ele_cleaned in t_model.wv:
            aravec_embeddings_train.append(t_model.wv[ele_cleaned])
            # print(t_model.wv[ele_cleaned])
        else:
            aravec_embeddings_train.append(np.zeros(embedding_dim))
    x_train.append(char_list)
    y_train.append(diacritics_list)

X_train_padded = [torch.tensor([char_to_index[char] for char in word]) for sentence in x_train for word in sentence]
X_train_padded = pad_sequence(X_train_padded, batch_first=True)

y_train_padded = [torch.tensor([diacritic_to_index[char] for char in word]) for sentence in y_train for word in sentence]
y_train_padded = pad_sequence(y_train_padded, batch_first=True)

In [29]:
aravec_embeddings_val_test = []

valid_corpus = readFile(VAL_PATH)

X_val = []
y_val = []

for sentence in valid_corpus[:100]:
	# Clean each sentence in the corpus
	# Get the char list for each word in the sentence and its corresponding diacritics
	char_list, diacritics_list = separate_words_and_diacritics(sentence.strip())
	words = [''.join(sublist) for sublist in char_list]
	for ele in words:
		if ele in t_model.wv:
			aravec_embeddings_val_test.append(t_model.wv[ele])
		else:
			aravec_embeddings_val_test.append(np.zeros(embedding_dim))
	X_val.append(char_list)
	y_val.append(diacritics_list)

X_val_padded = [torch.tensor([char_to_index[char] for char in word]) for sentence in X_val for word in sentence ]
X_val_padded = pad_sequence(X_val_padded, batch_first=True)

y_val_padded = [torch.tensor([diacritic_to_index[char] for char in word]) for sentence in y_val for word in sentence ]
# print(y_val_padded)
y_val_padded = pad_sequence(y_val_padded, batch_first=True)
# print(y_val_padded)

In [None]:
# print(type(y_val_padded))
# print(type(aravec_embeddings_val_test))
aravec_embeddings_val_test=np.array(aravec_embeddings_val_test)
# print(type(aravec_embeddings_val_test))

# Create an index array
# indices = list(range(len(X_val_padded)))

# Split the indices into validation and test sets
indices_val, indices_test = train_test_split(indices, test_size=0.5, random_state=42)
# Use the indices to get the corresponding data for validation and test sets
x_val = X_val_padded[indices_val]
y_val = y_val_padded[indices_val]
aravec_embeddings_val=aravec_embeddings_val_test[indices_val].tolist()

x_test = X_val_padded[indices_test]
y_test = y_val_padded[indices_test]
aravec_embeddings_test=aravec_embeddings_val_test[indices_test].tolist()

In [None]:
def run_RNN():
    model=RNN(VOCAB_SIZE, LABELS_SIZE)
    print(model)
    train(model, LSTM_PATH, X_train_padded, y_train_padded)
    
def run_CNN():
    model=CNN(VOCAB_SIZE, LABELS_SIZE)
    print(model)
    train(model, CNN_PATH, X_train_padded, y_train_padded)
    
def run_CNN_eslam():
    model=RNN_CNN(VOCAB_SIZE, LABELS_SIZE)
    print(model)
    train(model,CNN_PATH,CNN_val_PATH, X_train_padded, y_train_padded,X_val,y_val)
    
def run_CRF():
    model=LSTM_CRF(VOCAB_SIZE, LABELS_SIZE)
    print(model)
    train(model, CNN_PATH, X_train_padded, y_train_padded)
def run_CRF_eslam():
    model=RNN_CRF(VOCAB_SIZE, LABELS_SIZE)
    print(model)
    train(model,CRF_PATH,CRF_Val_PATH, X_train_padded, y_train_padded,x_val,y_val)
def run_CRF_Pre_Trained():
    model=RNN_CRF_Pre_Trained(VOCAB_SIZE, LABELS_SIZE,pretrained_embedding=torch.tensor(aravec_embeddings_train), freeze_embedding=True)
    print(model)
    train(model,CRF_PATH,CRF_Val_PATH, X_train_padded, y_train_padded,x_val,y_val)


In [None]:
# run_RNN()
# run_CNN()
# run_CRF()
# run_CRF_eslam()
# run_CNN_eslam()
run_CRF_Pre_Trained()

RNN_CRF_Pre_Trained(
  (embedding): Embedding(4498, 300)
  (lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=1024, out_features=15, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (crf): CRF()
)


  0%|          | 0/18 [00:00<?, ?it/s]

tensor([[30, 30,  8,  ...,  0,  0,  0],
        [36, 12,  8,  ...,  0,  0,  0],
        [25, 30, 36,  ...,  0,  0,  0],
        ...,
        [ 7, 30, 14,  ...,  0,  0,  0],
        [29, 32, 10,  ...,  0,  0,  0],
        [20,  7,  1,  ...,  0,  0,  0]])


  6%|▌         | 1/18 [00:00<00:14,  1.19it/s]

tensor([[ 8, 25, 22,  ...,  0,  0,  0],
        [27, 10, 28,  ...,  0,  0,  0],
        [31, 32, 33,  ...,  0,  0,  0],
        ...,
        [ 7, 30, 28,  ...,  0,  0,  0],
        [ 7, 30, 31,  ...,  0,  0,  0],
        [30, 31,  0,  ...,  0,  0,  0]])


 11%|█         | 2/18 [00:01<00:13,  1.15it/s]

tensor([[25, 30, 31,  ...,  0,  0,  0],
        [25, 30, 35,  ...,  0,  0,  0],
        [ 7, 30,  3,  ...,  0,  0,  0],
        ...,
        [34, 10, 17,  ...,  0,  0,  0],
        [34, 30, 17,  ...,  0,  0,  0],
        [36, 13, 10,  ...,  0,  0,  0]])


 17%|█▋        | 3/18 [00:02<00:12,  1.16it/s]

tensor([[34, 36, 27,  ...,  0,  0,  0],
        [25, 17, 27,  ...,  0,  0,  0],
        [26, 36, 17,  ...,  0,  0,  0],
        ...,
        [30, 30, 20,  ...,  0,  0,  0],
        [ 5, 25,  7,  ...,  0,  0,  0],
        [15, 36, 32,  ...,  0,  0,  0]])


 22%|██▏       | 4/18 [00:03<00:11,  1.18it/s]

tensor([[28,  7, 30,  ...,  0,  0,  0],
        [ 7, 30, 27,  ...,  0,  0,  0],
        [25, 31, 30,  ...,  0,  0,  0],
        ...,
        [ 8, 32,  0,  ...,  0,  0,  0],
        [17,  3, 10,  ...,  0,  0,  0],
        [ 8, 30, 26,  ...,  0,  0,  0]])


 28%|██▊       | 5/18 [00:04<00:10,  1.18it/s]

tensor([[36, 12, 15,  ...,  0,  0,  0],
        [33, 16,  7,  ...,  0,  0,  0],
        [ 7, 30, 34,  ...,  0,  0,  0],
        ...,
        [18, 36,  7,  ...,  0,  0,  0],
        [ 7, 30, 31,  ...,  0,  0,  0],
        [ 3, 14,  8,  ...,  0,  0,  0]])


 33%|███▎      | 6/18 [00:05<00:10,  1.19it/s]

tensor([[27, 36,  0,  ...,  0,  0,  0],
        [30, 30, 34,  ...,  0,  0,  0],
        [ 7, 30, 32,  ...,  0,  0,  0],
        ...,
        [12, 25, 30,  ...,  0,  0,  0],
        [25, 30, 36,  ...,  0,  0,  0],
        [31, 25,  0,  ...,  0,  0,  0]])


 39%|███▉      | 7/18 [00:05<00:09,  1.20it/s]

tensor([[29,  7, 32,  ...,  0,  0,  0],
        [ 7, 30, 31,  ...,  0,  0,  0],
        [27, 36,  0,  ...,  0,  0,  0],
        ...,
        [34, 31, 32,  ...,  0,  0,  0],
        [34, 30, 30,  ...,  0,  0,  0],
        [ 8, 14, 30,  ...,  0,  0,  0]])


 44%|████▍     | 8/18 [00:06<00:08,  1.20it/s]

tensor([[29, 31,  7,  ...,  0,  0,  0],
        [31, 19, 30,  ...,  0,  0,  0],
        [34, 25,  0,  ...,  0,  0,  0],
        ...,
        [34,  7, 30,  ...,  0,  0,  0],
        [36, 28, 34,  ...,  0,  0,  0],
        [ 3, 32, 33,  ...,  0,  0,  0]])


 50%|█████     | 9/18 [00:07<00:07,  1.19it/s]

tensor([[36, 32, 27,  ...,  0,  0,  0],
        [ 8, 36, 32,  ...,  0,  0,  0],
        [ 3, 17, 19,  ...,  0,  0,  0],
        ...,
        [30,  7,  8,  ...,  0,  0,  0],
        [ 8, 30, 26,  ...,  0,  0,  0],
        [ 8, 36, 32,  ...,  0,  0,  0]])


 56%|█████▌    | 10/18 [00:08<00:06,  1.18it/s]

tensor([[31, 19, 25,  ...,  0,  0,  0],
        [ 5, 13, 15,  ...,  0,  0,  0],
        [31, 11, 30,  ...,  0,  0,  0],
        ...,
        [27, 29, 19,  ...,  0,  0,  0],
        [ 8, 33,  0,  ...,  0,  0,  0],
        [34, 28, 25,  ...,  0,  0,  0]])


 61%|██████    | 11/18 [00:09<00:05,  1.18it/s]

tensor([[ 7, 30, 32,  ...,  0,  0,  0],
        [ 3, 31,  7,  ...,  0,  0,  0],
        [ 7, 19, 31,  ...,  0,  0,  0],
        ...,
        [31, 32,  0,  ...,  0,  0,  0],
        [30, 30,  8,  ...,  0,  0,  0],
        [36, 29, 32,  ...,  0,  0,  0]])


 67%|██████▋   | 12/18 [00:10<00:05,  1.13it/s]

tensor([[14, 30, 36,  ...,  0,  0,  0],
        [30, 33,  0,  ...,  0,  0,  0],
        [34, 25, 30,  ...,  0,  0,  0],
        ...,
        [ 8,  7, 30,  ...,  0,  0,  0],
        [34,  7, 13,  ...,  0,  0,  0],
        [ 8, 20, 17,  ...,  0,  0,  0]])


 72%|███████▏  | 13/18 [00:11<00:04,  1.14it/s]

tensor([[31, 30, 29,  ...,  0,  0,  0],
        [36, 21, 34,  ...,  0,  0,  0],
        [ 8,  3, 25,  ...,  0,  0,  0],
        ...,
        [27, 30,  7,  ...,  0,  0,  0],
        [34, 12,  8,  ...,  0,  0,  0],
        [ 8, 32,  0,  ...,  0,  0,  0]])


 78%|███████▊  | 14/18 [00:11<00:03,  1.16it/s]

tensor([[27,  7,  6,  ...,  0,  0,  0],
        [19,  7, 30,  ...,  0,  0,  0],
        [ 8, 36, 32,  ...,  0,  0,  0],
        ...,
        [34, 30, 34,  ...,  0,  0,  0],
        [ 7, 30, 27,  ...,  0,  0,  0],
        [ 5, 19, 13,  ...,  0,  0,  0]])


 83%|████████▎ | 15/18 [00:12<00:02,  1.16it/s]

tensor([[30,  7,  0,  ...,  0,  0,  0],
        [ 7, 30, 28,  ...,  0,  0,  0],
        [10, 29, 11,  ...,  0,  0,  0],
        ...,
        [31, 34, 12,  ...,  0,  0,  0],
        [31, 25,  0,  ...,  0,  0,  0],
        [34,  8, 15,  ...,  0,  0,  0]])


 89%|████████▉ | 16/18 [00:13<00:01,  1.12it/s]

tensor([[ 3, 32,  0,  ...,  0,  0,  0],
        [26, 36, 17,  ...,  0,  0,  0],
        [36, 12, 15,  ...,  0,  0,  0],
        ...,
        [36, 19, 10,  ...,  0,  0,  0],
        [10, 19, 28,  ...,  0,  0,  0],
        [24, 33,  7,  ...,  0,  0,  0]])


 94%|█████████▍| 17/18 [00:14<00:00,  1.14it/s]

tensor([[10, 25,  7,  ...,  0,  0,  0],
        [29,  7, 32,  ...,  0,  0,  0],
        [25, 30, 31,  ...,  0,  0,  0],
        ...,
        [31, 10,  7,  ...,  0,  0,  0],
        [31, 32, 33,  ...,  0,  0,  0],
        [31, 11, 30,  ...,  0,  0,  0]])


100%|██████████| 18/18 [00:15<00:00,  1.18it/s]


Epochs: 1 | Train Loss: 0.003586470219441232             | Train Accuracy: 0.7199967662395408

Saved the best model with accuracy: 0.7199967662395408 to ./models/crf.pth



100%|██████████| 1/1 [00:00<00:00,  9.06it/s]


Epochs: 1 | Validation Loss: 0.812017560005188 | Validation Accuracy: 0.7060185185185185
Saved the best model with validation accuracy: 0.7060185185185185 to ./models/crf_val.pth


  0%|          | 0/18 [00:00<?, ?it/s]

tensor([[ 7, 30,  8,  ...,  0,  0,  0],
        [34,  7, 30,  ...,  0,  0,  0],
        [34, 30, 34,  ...,  0,  0,  0],
        ...,
        [25, 30, 35,  ...,  0,  0,  0],
        [ 7, 30, 16,  ...,  0,  0,  0],
        [ 5, 13, 15,  ...,  0,  0,  0]])


  6%|▌         | 1/18 [00:00<00:14,  1.18it/s]

tensor([[13, 31, 30,  ...,  0,  0,  0],
        [16, 30, 29,  ...,  0,  0,  0],
        [30, 13, 31,  ...,  0,  0,  0],
        ...,
        [ 7,  8, 32,  ...,  0,  0,  0],
        [ 7, 30, 22,  ...,  0,  0,  0],
        [ 5, 30,  7,  ...,  0,  0,  0]])


 11%|█         | 2/18 [00:01<00:13,  1.18it/s]

tensor([[31, 18,  7,  ...,  0,  0,  0],
        [36, 10, 25,  ...,  0,  0,  0],
        [36, 10, 21,  ...,  0,  0,  0],
        ...,
        [27, 28, 23,  ...,  0,  0,  0],
        [ 7, 30, 31,  ...,  0,  0,  0],
        [34, 30,  7,  ...,  0,  0,  0]])


 17%|█▋        | 3/18 [00:02<00:12,  1.18it/s]

tensor([[34, 30, 30,  ...,  0,  0,  0],
        [34,  7, 30,  ...,  0,  0,  0],
        [36, 25, 30,  ...,  0,  0,  0],
        ...,
        [ 8, 33,  0,  ...,  0,  0,  0],
        [ 3, 32,  0,  ...,  0,  0,  0],
        [31, 17,  0,  ...,  0,  0,  0]])


 22%|██▏       | 4/18 [00:03<00:11,  1.18it/s]

tensor([[30,  7,  0,  ...,  0,  0,  0],
        [34, 30,  7,  ...,  0,  0,  0],
        [30, 31,  0,  ...,  0,  0,  0],
        ...,
        [25, 30, 35,  ...,  0,  0,  0],
        [17, 13, 31,  ...,  0,  0,  0],
        [34, 36, 12,  ...,  0,  0,  0]])


 28%|██▊       | 5/18 [00:04<00:10,  1.19it/s]

tensor([[ 8, 25, 15,  ...,  0,  0,  0],
        [16,  7,  0,  ...,  0,  0,  0],
        [10, 32, 10,  ...,  0,  0,  0],
        ...,
        [ 5, 16,  7,  ...,  0,  0,  0],
        [34, 30, 26,  ...,  0,  0,  0],
        [34, 30,  7,  ...,  0,  0,  0]])


 33%|███▎      | 6/18 [00:05<00:10,  1.17it/s]

tensor([[ 3, 36,  0,  ...,  0,  0,  0],
        [34, 30,  7,  ...,  0,  0,  0],
        [28,  7, 30,  ...,  0,  0,  0],
        ...,
        [33, 16,  7,  ...,  0,  0,  0],
        [27,  5, 16,  ...,  0,  0,  0],
        [27, 36,  0,  ...,  0,  0,  0]])


 39%|███▉      | 7/18 [00:05<00:09,  1.16it/s]

tensor([[33, 30,  0,  ...,  0,  0,  0],
        [13, 22, 17,  ...,  0,  0,  0],
        [ 3, 36,  0,  ...,  0,  0,  0],
        ...,
        [34, 30, 34,  ...,  0,  0,  0],
        [ 7, 30, 34,  ...,  0,  0,  0],
        [27, 30, 36,  ...,  0,  0,  0]])


 44%|████▍     | 8/18 [00:07<00:09,  1.08it/s]

tensor([[36, 28, 34,  ...,  0,  0,  0],
        [ 8, 36, 32,  ...,  0,  0,  0],
        [14, 36,  7,  ...,  0,  0,  0],
        ...,
        [27, 36,  0,  ...,  0,  0,  0],
        [ 7, 19, 10,  ...,  0,  0,  0],
        [13, 10, 35,  ...,  0,  0,  0]])


 50%|█████     | 9/18 [00:07<00:08,  1.08it/s]

tensor([[25, 30, 35,  ...,  0,  0,  0],
        [ 7, 30, 19,  ...,  0,  0,  0],
        [27, 30,  7,  ...,  0,  0,  0],
        ...,
        [ 7, 30, 30,  ...,  0,  0,  0],
        [25,  8, 15,  ...,  0,  0,  0],
        [13, 15, 11,  ...,  0,  0,  0]])


 50%|█████     | 9/18 [00:08<00:08,  1.02it/s]


KeyboardInterrupt: 