# Problem 1
1) Train and validate rnn.RNN, rnn.LSTM and rnn.GRU for learning the above sequence. Use sequences of 10, 20, and 30 for your training. Feel free to adjust other network parameters. Report and compare training loss, validation accuracy, execution time for training, and computational and mode size complexities across the three models over various lengths of sequence.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import time

# Define the text sequence
f = open("text.txt", "r")
text= f.read()
# Unique characters in the text
chars = sorted(list(set(text)))
vocab_size = len(chars)

# Create a mapping from characters to integers and vice versa
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

# Length of sequences for training
sequence_lengths = [10,20,30]

# Function to generate input-output pairs
def generate_sequences(text, sequence_length):
    sequences = []
    next_chars = []
    for i in range(0, len(text) - sequence_length):
        sequences.append(text[i:i + sequence_length])
        next_chars.append(text[i + sequence_length])
    return sequences, next_chars

# Generate sequences for each length
sequences_data = {}
for length in sequence_lengths:
    sequences, next_chars = generate_sequences(text, length)
    sequences_data[length] = {'sequences': sequences, 'next_chars': next_chars}

# Convert sequences to numerical representation
for length, data in sequences_data.items():
    sequences = data['sequences']
    next_chars = data['next_chars']
    X = np.zeros((len(sequences), length, vocab_size), dtype=np.float32)
    y = np.zeros((len(sequences), vocab_size), dtype=np.float32)
    for i, sequence in enumerate(sequences):
        for t, char in enumerate(sequence):
            X[i, t, char_to_int[char]] = 1
        y[i, char_to_int[next_chars[i]]] = 1
    sequences_data[length]['X'] = torch.from_numpy(X)
    sequences_data[length]['y'] = torch.from_numpy(y)

# Define RNN, LSTM, and GRU models
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CharRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  
        return out

class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CharLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  
        return out

class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CharGRU, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])  
        return out

# Define training parameters
epochs = 50
batch_size = 128
hidden_size = 128

# Train and evaluate models for each sequence length
results = {}
for length, data in sequences_data.items():
    X = data['X']
    y = data['y']
    input_size = X.shape[-1]

    for model_type, Model in [('RNN', CharRNN), ('LSTM', CharLSTM), ('GRU', CharGRU)]:
        model = Model(input_size, hidden_size)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters())

        print(f"Training {model_type} model with sequence length {length}...")
        start_time = time.time()
        for epoch in range(epochs):
            running_loss = 0.0
            for i in range(0, len(X), batch_size):
                inputs = X[i:i+batch_size]
                labels = torch.argmax(y[i:i+batch_size], dim=1)

                optimizer.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(X)
        end_time = time.time()
        execution_time = end_time - start_time

    # Evaluate model
        with torch.no_grad():
            outputs = model(X)
            _, predicted = torch.max(outputs, 1)
            total = y.size(0)
            correct = (predicted == torch.argmax(y, dim=1)).sum().item()
            accuracy = correct / total
        # Model complexity
        model_size = sum(p.numel() for p in model.parameters())

        results.setdefault(model_type, {}).setdefault(length, {})
        results[model_type][length]['loss'] = epoch_loss
        results[model_type][length]['accuracy'] = accuracy
        results[model_type][length]['execution_time'] = execution_time,
        

        print(f"Model {model_type} with sequence length {length} - Training Loss: {epoch_loss:.4f}, Training Accuracy: {accuracy:.4f}, Execution Time: {execution_time:.2f} seconds, Model Size: {model_size} parameters\n")

        # Predict next character for a sequence
        test_sequence = sequences_data[length]['sequences'][0]  
        X_test = torch.unsqueeze(sequences_data[length]['X'][0], 0)  
        with torch.no_grad():
            outputs = model(X_test)
            _, predicted_index = torch.max(outputs, 1)
            predicted_char = int_to_char[int(predicted_index)]
            print(f"Next predicted character for sequence '{test_sequence}': {predicted_char}\n")

# Convert results to DataFrame for better visualization
results_df = pd.DataFrame(results)
results_df.index.name = 'Sequence Length'

# Display the results
print(results_df)


Training RNN model with sequence length 10...
Model RNN with sequence length 10 - Training Loss: 0.9861, Training Accuracy: 0.7156, Execution Time: 27.37 seconds, Model Size: 28205 parameters

Next predicted character for sequence 'Next chara': c

Training LSTM model with sequence length 10...
Model LSTM with sequence length 10 - Training Loss: 1.3095, Training Accuracy: 0.6109, Execution Time: 36.08 seconds, Model Size: 95405 parameters

Next predicted character for sequence 'Next chara': c

Training GRU model with sequence length 10...
Model GRU with sequence length 10 - Training Loss: 1.0053, Training Accuracy: 0.7308, Execution Time: 57.57 seconds, Model Size: 73005 parameters

Next predicted character for sequence 'Next chara': c

Training RNN model with sequence length 20...
Model RNN with sequence length 20 - Training Loss: 0.9853, Training Accuracy: 0.7305, Execution Time: 51.87 seconds, Model Size: 28205 parameters

Next predicted character for sequence 'Next character predi':

# Problem 2
2) Build the model for.LSTM and rnn.GRU for the tiny Shakespeare dataset.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
import time

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text 

# Step 2: Prepare the dataset
sequence_lengths = [20, 30]

# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Define dataset class
class CharDataset(Dataset):
    def __init__(self, sequence, target):
        self.sequence = sequence
        self.target = target

    def __len__(self):
        return len(self.sequence)

    def __getitem__(self, idx):
        return self.sequence[idx], self.target[idx]

# Define LSTM and GRU models
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, rnn_type='lstm'):
        super(CharRNN, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        if rnn_type == 'lstm':
            self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        elif rnn_type == 'gru':
            self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embed = self.embedding(x)
        out, _ = self.rnn(embed)
        out = self.fc(out[:, -1, :])
        return out

# Define training function
def train_model(model, train_loader, test_loader, criterion, optimizer, epochs):
    train_losses = []
    test_losses = []
    accuracies = []
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in test_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        test_loss = test_loss / len(test_loader)
        test_losses.append(test_loss)

        accuracy = correct / total
        accuracies.append(accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}, Accuracy: {accuracy:.4f}")

    end_time = time.time()
    execution_time = end_time - start_time
    return train_losses, test_losses, accuracies, execution_time

# Step 3: Create data loaders
batch_size = 128
train_loaders = []
test_loaders = []
for seq_length in sequence_lengths:
    sequences = []
    targets = []
    for i in range(0, len(encoded_text) - seq_length):
        seq = encoded_text[i:i+seq_length]
        target = encoded_text[i+seq_length]
        sequences.append(seq)
        targets.append(target)

    sequences = torch.tensor(sequences, dtype=torch.long)
    targets = torch.tensor(targets, dtype=torch.long)

    dataset = CharDataset(sequences, targets)
    train_size = int(len(dataset) * 0.8)
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

    train_loaders.append(train_loader)
    test_loaders.append(test_loader)

# Step 4: Define hyperparameters
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_layers = 2
epochs = 1
learning_rate = 0.001

# Step 5: Initialize and train LSTM models
lstm_train_losses = []
lstm_test_losses = []
lstm_accuracies = []
lstm_execution_times = []
for i, seq_length in enumerate(sequence_lengths):
    print(f"\nTraining LSTM for sequence length {seq_length}")
    lstm_model = CharRNN(input_size, hidden_size, output_size, num_layers, rnn_type='lstm')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)
    lstm_train_loss, lstm_test_loss, lstm_accuracy, lstm_execution_time = train_model(lstm_model, train_loaders[i], test_loaders[i], criterion, optimizer, epochs)
    lstm_train_losses.append(lstm_train_loss)
    lstm_test_losses.append(lstm_test_loss)
    lstm_accuracies.append(lstm_accuracy)
    lstm_execution_times.append(lstm_execution_time)

# Step 6: Initialize and train GRU models
gru_train_losses = []
gru_test_losses = []
gru_accuracies = []
gru_execution_times = []
for i, seq_length in enumerate(sequence_lengths):
    print(f"\nTraining GRU for sequence length {seq_length}")
    gru_model = CharRNN(input_size, hidden_size, output_size, num_layers, rnn_type='gru')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(gru_model.parameters(), lr=learning_rate)
    gru_train_loss, gru_test_loss, gru_accuracy, gru_execution_time = train_model(gru_model, train_loaders[i], test_loaders[i], criterion, optimizer, epochs)
    gru_train_losses.append(gru_train_loss)
    gru_test_losses.append(gru_test_loss)
    gru_accuracies.append(gru_accuracy)
    gru_execution_times.append(gru_execution_time)

# Print results
print("\nLSTM Model Results:")
for i, seq_length in enumerate(sequence_lengths):
    print(f"\nResults for sequence length {seq_length}:")
    print("Train Loss:", lstm_train_losses[i])
    print("Test Loss:", lstm_test_losses[i])
    print("Accuracy:", lstm_accuracies[i])
    print("Execution Time:", lstm_execution_times[i])

print("\n\nGRU Model Results:")
for i, seq_length in enumerate(sequence_lengths):
    print(f"\nResults for sequence length {seq_length}:")
    print("Train Loss:", gru_train_losses[i])
    print("Test Loss:", gru_test_losses[i])
    print("Accuracy:", gru_accuracies[i])
    print("Execution Time:", gru_execution_times[i])



Training LSTM for sequence length 20
Epoch 1/1, Train Loss: 1.676694, Test Loss: 1.490935, Accuracy: 0.5449

Training LSTM for sequence length 30
Epoch 1/1, Train Loss: 1.663291, Test Loss: 1.469452, Accuracy: 0.5517

Training GRU for sequence length 20
Epoch 1/1, Train Loss: 1.640184, Test Loss: 1.496257, Accuracy: 0.5427

Training GRU for sequence length 30
Epoch 1/1, Train Loss: 1.632545, Test Loss: 1.484360, Accuracy: 0.5477

LSTM Model Results:

Results for sequence length 20:
Train Loss: [1.676693968265411]
Test Loss: [1.490935357356988]
Accuracy: [0.5449198699988793]
Execution Time: 1706.71955037117

Results for sequence length 30:
Train Loss: [1.6632906275785866]
Test Loss: [1.4694518446580336]
Accuracy: [0.5517476341825321]
Execution Time: 4389.573019742966


GRU Model Results:

Results for sequence length 20:
Train Loss: [1.6401844349012795]
Test Loss: [1.4962571951690515]
Accuracy: [0.54270088535246]
Execution Time: 17468.664876699448

Results for sequence length 30:
Train 