In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import requests
import os



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

with open('/home/csalitre/school/ecgr-5106/intro-to-deeplearning/Datasets/E2F.txt', 'r') as file:
    text = file.read()


# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 40  # Maximum length of input sequences
X = []
y = []

for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the Transformer model
class CharTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, nhead):
        super(CharTransformer, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        encoder_layers = nn.TransformerEncoderLayer(hidden_size, nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        transformer_output = self.transformer_encoder(embedded)
        output = self.fc(transformer_output[:, -1, :])  # Get the output of the last Transformer block
        return output

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


Using device: cuda:0


In [5]:
# Hyperparameters
hidden_size = 32
num_layers = 1
nhead = 2
learning_rate = 0.001
epochs = 15

# Model, loss, and optimizer
model = CharTransformer(len(chars), hidden_size, len(chars), num_layers, nhead).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        model.train()
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    total_val_loss = 0
    total_val_accuracy = 0
    num_batches = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)        
            val_output = model(inputs)
            val_loss = criterion(val_output, labels)
            total_val_loss += val_loss.item()

            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == labels).float().mean()
            total_val_accuracy += val_accuracy.item()
            num_batches += 1
    
    average_val_loss = total_val_loss / num_batches
    average_val_accuracy = total_val_accuracy / num_batches

    if (epoch+1) % 1 == 0:
     print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {average_val_loss}, Validation Accuracy: {average_val_accuracy}')

Epoch 1, Loss: 2.29953932762146, Validation Loss: 2.486211024224758, Validation Accuracy: 0.3369140625
Epoch 2, Loss: 2.0030689239501953, Validation Loss: 2.2578173503279686, Validation Accuracy: 0.36328125
Epoch 3, Loss: 1.7614461183547974, Validation Loss: 2.1955532394349575, Validation Accuracy: 0.3759765625
Epoch 4, Loss: 2.4393246173858643, Validation Loss: 2.1527660712599754, Validation Accuracy: 0.388671875
Epoch 5, Loss: 1.109269142150879, Validation Loss: 2.1218010783195496, Validation Accuracy: 0.4013671875
Epoch 6, Loss: 2.048635721206665, Validation Loss: 2.078886866569519, Validation Accuracy: 0.3974609375
Epoch 7, Loss: 2.1765213012695312, Validation Loss: 2.0673487707972527, Validation Accuracy: 0.4033203125
Epoch 8, Loss: 1.9105520248413086, Validation Loss: 2.0692314356565475, Validation Accuracy: 0.3974609375
Epoch 9, Loss: 2.3229010105133057, Validation Loss: 2.031998623162508, Validation Accuracy: 0.4140625
Epoch 10, Loss: 1.8472232818603516, Validation Loss: 2.0536

In [6]:
# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
    model.to(device)
    model.eval()
    with torch.no_grad():
        # Ensure the input is not shorter than expected
        if len(initial_str) < max_length:
            initial_str = (' ' * (max_length - len(initial_str))) + initial_str
        
        # Convert characters to indices, handling characters not in the dictionary
        initial_indices = [char_to_ix.get(c, char_to_ix[' ']) for c in initial_str[-max_length:]]
        
        initial_input = torch.tensor(initial_indices, dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        last_timestep_pred = prediction.squeeze(0)[-1]
        predicted_index = torch.argmax(last_timestep_pred, dim=0).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, max_length)
print(f"Predicted next character: '{predicted_char}'")


Predicted next character: '
'
