## Multilayer Perceptron Models

### Imports and Utils

In [1]:
"""
Importing the necessary libraries
"""
import os
from time import time
import pickle

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import numpy as np

# Remove all the warnings
import warnings
warnings.filterwarnings('ignore')

# Set env CUDA_LAUNCH_BLOCKING=1
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Function to load the data
def load_text_data(file_path):
    """
    Function to load the text dataset

    file_path: str: The path to the file

    Returns: str: The data in the file
    """
    # Load the data
    with open(file_path, 'rb') as file:
        data = file.read().decode('utf-8')

    return data

### Dataset Loading and Preprocessing

In [None]:
# Load the data
file_path = '../Datasets/Corpus/Shakespeare.txt'
file_data = load_text_data(file_path)

# Get the unique characters
chars = sorted(list(set(file_data)))

# Create encoding and decoding dictionaries
encodings = {char: idx for idx, char in enumerate(chars)}
decodings = {idx: char for char, idx in encodings.items()}

In [None]:
block_size = 10

# Create the dataset with the encoding
dataset = [encodings[char] for char in file_data]

# Create the input and target sequences
input_seq = [dataset[i:i+block_size] for i in range(len(dataset)-block_size)]
target_seq = [dataset[i+block_size] for i in range(len(dataset)-block_size)]

# Convert the input and target sequences to tensors
input_seq = torch.tensor(input_seq).to(device)
target_seq = torch.tensor(target_seq).to(device)

### Model Creation and Training

In [None]:
class MLP(nn.Module):
    """
    A Multi-Layer Perceptron.
    """

    def __init__(self, block_size: int, vocab_size: int, emb_dim: int, learning_rate: float = 0.01, random_state: int = None):
        """
        Constructor for Multi-Layer Perceptron.

        block_size: int: input block size
        vocab_size: int: vocabulary of the embedded words
        emd_dim: int: embedding dimension of the characters
        learning_rate: float: learning rate of the optimizer
        random_state: int: random state for reproducibility
        """
        
        super(MLP, self).__init__()
        if random_state is not None:
            torch.manual_seed(random_state)
        self.block_size = block_size
        self.vocab_size = vocab_size
        self.lr = learning_rate
        self.embeddings = nn.Sequential(
            nn.Embedding(vocab_size, emb_dim),
            nn.Flatten()
        )
        self.layers = nn.Sequential(
            nn.Linear(block_size * emb_dim, 16),
            nn.SiLU(),
            nn.Linear(16, vocab_size)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        x: torch.Tensor: The input tensor.
        """

        x = self.embeddings(x)
        x = self.layers(x)
        return x
    
    def fit(self, X: torch.Tensor, y: torch.Tensor, epochs: int = 1000, batch_size: int = 4096, print_cost: bool = False):
        """
        X: torch.Tensor: The input tensor
        y: torch.Tensor: The target tensor
        epochs: int: The number of epochs
        batch_size: int: The batch size while applying mini-batch gradient descent
        print_cost: bool: Whether to print the cost or not
        """

        X, y = X.reshape(-1, self.block_size).to(device), y.reshape(-1, self.vocab_size).to(device)
        dataset = TensorDataset(X, y)
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)

        Losses = []
        for i in range(epochs):
            for batch_X, batch_y in dataloader:
                # Forward pass
                predictions = self.forward(batch_X)
                loss = criterion(predictions, batch_y)
                Losses.append(loss.item())

                # Backward pass
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            # Print the cost
            if print_cost and (i+1) % 100 == 0:
                print(f'Loss at epoch {i+1}: {loss.item():.3f}')
                print("\n------------------------------------------------------------\n")

        return Losses
    
    def predict(self, X: torch.Tensor, decodings: dict, context_len: int):
        """
        X: torch.Tensor: The input tensor
        decodings: dict: The dictionary containing decoding of the characters
        context_len: int: The length of the context
        """

        X = X.reshape(1, self.block_size).to(device)

        for _ in range(context_len):
            y_pred = self.forward(X)
            id_pred = torch.distributions.Categorical(logits=y_pred).sample().item()
            decode = decodings[id_pred]
            X = torch.cat((X[:, 1:], torch.tensor([[id_pred]], device=device)), 1)
            yield decode

    def save_model(self, path):
        """
        Save the model parameters.

        path: str: The path where the model parameters should be saved.
        """

        torch.save(self.state_dict(), path)

    def load_model(self, path):
        """
        Load the model parameters.

        path: str: The path from where the model parameters should be loaded.
        """

        self.load_state_dict(torch.load(path))

In [None]:
# Defining the model
model = MLP(block_size=block_size, vocab_size=len(chars), emb_dim=32, learning_rate=0.01, random_state=42).to(device)

In [None]:
# Training the model
model.fit(input_seq, target_seq, epochs=1000, batch_size=4096, print_cost=True)

In [None]:
for char in model.predict(input_seq[0], decodings, 100):
    print(char, end='')

### Testing and Plotting

### Generating and Saving Models

## Convolutional Neural Network Models

### Imports and Utils

### Dataset Loading and Preprocessing

### Model Creation and Training

### Testing and Plotting