<a href="https://colab.research.google.com/github/Scaglione-Nick/ECGR4106/blob/main/5_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import time
import os
import numpy as np

# Check if CUDA is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# List files in the 'sample_data' directory
os.listdir('/content/sample_data')
file_path = '/content/sample_data/tiny-shakespeare.txt'
# Example usage: load text data (assuming you've already loaded the Shakespeare text)
with open(file_path, 'r') as f:
    text = f.read()
chars = sorted(set(text))
char_to_idx = {ch: idx for idx, ch in enumerate(chars)}
idx_to_char = {idx: ch for idx, ch in enumerate(chars)}
def prepare_sequences(text, seq_length):
    sequences = []
    targets = []
    for i in range(0, len(text) - seq_length):
        seq = text[i:i + seq_length]
        target = text[i + seq_length]  # Next character after the sequence
        sequences.append([char_to_idx[c] for c in seq])
        targets.append(char_to_idx[target])  # Target is the next character

    sequences = np.array(sequences)
    targets = np.array(targets)

    # Print shapes to debug
    print(f"Input shape (X_train): {sequences.shape}")
    print(f"Target shape (y_train): {targets.shape}")

    return sequences, targets
seq_lengths = [ 20, 30]
data = {length: prepare_sequences(text, length) for length in seq_lengths}

Input shape (X_train): (1115374, 20)
Target shape (y_train): (1115374,)
Input shape (X_train): (1115364, 30)
Target shape (y_train): (1115364,)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model
import torch
from torch.optim import Adam
criterion = nn.CrossEntropyLoss()

# Define Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, num_layers, seq_length, hidden_size):
        super(TransformerModel, self).__init__()

        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.positional_encoding = nn.Parameter(torch.zeros(1, seq_length, embed_size))

        # Transformer layers
        self.transformer_layers = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_size, nhead=num_heads),
            num_layers=num_layers
        )

        # Output layer
        self.fc = nn.Linear(embed_size, vocab_size)

    def forward(self, x):
        # Embedding + positional encoding
        x = self.embedding(x) + self.positional_encoding

        # Pass through transformer layers
        x = self.transformer_layers(x)

        # Output layer (predict next character based on last token)
        x = x[:, -1, :]
        output = self.fc(x)
        return output

In [None]:
def train(model, train_loader, val_loader, epochs=10, lr=0.001):
    model.to(device)
    optimizer = Adam(model.parameters(), lr=lr)
    best_val_acc = 0

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for batch in train_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            optimizer.zero_grad()
            output = model(inputs)

            # Calculate loss
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {avg_train_loss:.4f}")

        # Validation
        val_accuracy = evaluate(model, val_loader)
        print(f"Validation Accuracy: {val_accuracy:.4f}")

        # Save the best model based on validation accuracy
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            torch.save(model.state_dict(), 'best_transformer_model.pth')

In [None]:
def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            output = model(inputs)
            _, predicted = torch.max(output, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = correct / total
    return accuracy

In [None]:
class ShakespeareDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = torch.tensor(sequences, dtype=torch.long)
        self.targets = torch.tensor(targets, dtype=torch.long)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.targets[idx]

# Prepare data for both sequence lengths (20, 30)
seq_lengths = [20, 30]
train_data = {}
for length in seq_lengths:
    sequences, targets = data[length]
    dataset = ShakespeareDataset(sequences, targets)
    train_data[length] = DataLoader(dataset, batch_size=32, shuffle=True)

# For simplicity, let's use a subset of the data for validation here.
val_data = train_data[20]  # You can define separate validation data if you like.

In [None]:
# Model hyperparameters
embed_size = 4
hidden_size = 32
num_heads = 2
num_layers = 2

# Train transformer models for different configurations
for seq_length in seq_lengths:
    print(f"Training model for sequence length: {seq_length}")
    model = TransformerModel(vocab_size=len(chars), embed_size=embed_size, num_heads=num_heads, num_layers=num_layers,
                             seq_length=seq_length, hidden_size=hidden_size)

    start_time = time.time()
    train(model, train_data[seq_length], val_data, epochs=10)
    end_time = time.time()

    print(f"Training time for sequence length {seq_length}: {end_time - start_time:.2f} seconds")


Training model for sequence length: 20




Epoch [1/10], Train Loss: 2.8337
Validation Accuracy: 0.2421


KeyboardInterrupt: 