<a href="https://colab.research.google.com/github/TimotheeeNiven/IntroML_TNiven/blob/main/Homework3_TNiven.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time
from torch.utils.data import Dataset, DataLoader
import requests

In [3]:
# Given Text
text = """Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."""


# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
maxlen_values = [10, 20, 30]  # Maximum length of input sequences
X = []
y = []
for maxlen in maxlen_values:
    for i in range(len(text) - maxlen):
        sequence = text[i:i + maxlen]
        label = text[i + maxlen]
        X.append([char_to_ix[char] for char in sequence])
        y.append(char_to_ix[label])

# Pad sequences to ensure fixed length
maxlen = max(maxlen_values)
X = np.array([x + [0] * (maxlen - len(x)) for x in X])  # Padding with zeros
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Define the RNN, LSTM, and GRU models
class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type='RNN'):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == 'RNN':
            self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        elif model_type == 'LSTM':
            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        elif model_type == 'GRU':
            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        else:
            raise ValueError("Invalid model type. Choose among 'RNN', 'LSTM', 'GRU'.")
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100


In [4]:
# Train and evaluate function
def train_evaluate(model_type, X_train, y_train, X_val, y_val):
    model = CharModel(len(chars), hidden_size, len(chars), model_type)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()

        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

    end_time = time.time()
    execution_time = end_time - start_time

    return loss.item(), val_loss.item(), val_accuracy.item(), execution_time


In [None]:
# Train and evaluate models for different sequence lengths
results = {}
for maxlen in maxlen_values:
    print(f"\nTraining models for sequence length: {maxlen}")
    results[maxlen] = {}
    for model_type in ['RNN', 'LSTM', 'GRU']:
        print(f"\nTraining {model_type} model...")
        loss, val_loss, val_accuracy, execution_time = train_evaluate(model_type, X_train, y_train, X_val, y_val)
        results[maxlen][model_type] = {
            'loss': loss,
            'val_loss': val_loss,
            'val_accuracy': val_accuracy,
            'execution_time': execution_time
        }

# Print and compare results
for maxlen, models_data in results.items():
    print(f"\nResults for sequence length: {maxlen}")
    for model_type, data in models_data.items():
        print(f"\n{model_type} Model:")
        print(f"Training Loss: {data['loss']}")
        print(f"Validation Loss: {data['val_loss']}")
        print(f"Validation Accuracy: {data['val_accuracy']}")
        print(f"Execution Time: {data['execution_time']} seconds")



Training models for sequence length: 10

Training RNN model...
Epoch 10, Loss: 2.9129414558410645, Validation Loss: 2.909475803375244, Validation Accuracy: 0.2044975459575653
Epoch 20, Loss: 2.788285732269287, Validation Loss: 2.814077138900757, Validation Accuracy: 0.22909346222877502
Epoch 30, Loss: 2.6973841190338135, Validation Loss: 2.756165027618408, Validation Accuracy: 0.23682361841201782
Epoch 40, Loss: 2.6224026679992676, Validation Loss: 2.7148773670196533, Validation Accuracy: 0.25017568469047546
Epoch 50, Loss: 2.5546953678131104, Validation Loss: 2.686769485473633, Validation Accuracy: 0.2543921172618866
Epoch 60, Loss: 2.49066424369812, Validation Loss: 2.6685307025909424, Validation Accuracy: 0.26282501220703125
Epoch 70, Loss: 2.4297573566436768, Validation Loss: 2.6530256271362305, Validation Accuracy: 0.26914969086647034
Epoch 80, Loss: 2.3712873458862305, Validation Loss: 2.644559144973755, Validation Accuracy: 0.2803935408592224
Epoch 90, Loss: 2.32064151763916, V

Problem 2

In [5]:
# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 20
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    device_name = torch.cuda.get_device_name(0)
    print(device_name)

print(f"Using device: {device}")

NVIDIA A100-SXM4-40GB
Using device: cuda:0


In [7]:
# Train and evaluate function
def train_evaluate2(model_type, train_loader, val_loader, device):
    model = CharModel(len(chars), hidden_size, len(chars), model_type).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device, non_blocking=True), targets.to(device, non_blocking=True)  # Move data to device
            optimizer.zero_grad()
            output = model(inputs)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)

        epoch_train_loss = train_loss / len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device, non_blocking=True), targets.to(device, non_blocking=True)  # Move data to device
                val_output = model(inputs)
                loss = criterion(val_output, targets)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(val_output, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_accuracy = correct / total

        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {epoch_train_loss}, Validation Loss: {epoch_val_loss}, Validation Accuracy: {epoch_val_accuracy}')

    end_time = time.time()
    execution_time = end_time - start_time

    return epoch_train_loss, epoch_val_loss, epoch_val_accuracy, execution_time


In [None]:
# Hyperparameters
hidden_size = 128
learning_rate = 0.0005
epochs = 100
# Train and evaluate the LSTM model
print("Training and evaluating LSTM model...")
train_loss, val_loss, val_accuracy, execution_time = train_evaluate2('LSTM', train_loader, train_loader, device)

# Train and evaluate the GRU model
print("Training and evaluating GRU model...")
train_loss, val_loss, val_accuracy, execution_time = train_evaluate2('GRU', train_loader, train_loader, device)

Training and evaluating LSTM model...
Epoch 10, Train Loss: 1.4109611365043617, Validation Loss: 1.396414201729656, Validation Accuracy: 0.5721176421804799
Epoch 20, Train Loss: 1.350513165075595, Validation Loss: 1.3374089255055213, Validation Accuracy: 0.5872717553196855
Epoch 30, Train Loss: 1.3226823735595084, Validation Loss: 1.3071825022536858, Validation Accuracy: 0.5948241564766967
Epoch 40, Train Loss: 1.3048663755977816, Validation Loss: 1.290861330392085, Validation Accuracy: 0.5991029912618976
Epoch 50, Train Loss: 1.292621596908453, Validation Loss: 1.277245602721109, Validation Accuracy: 0.603527517121503
Epoch 60, Train Loss: 1.2839527627860696, Validation Loss: 1.2708290662367745, Validation Accuracy: 0.6046089931738128
Epoch 70, Train Loss: 1.2770805845022826, Validation Loss: 1.2627250504150982, Validation Accuracy: 0.6070790172352541
Epoch 80, Train Loss: 1.2721755948655573, Validation Loss: 1.2578885117500354, Validation Accuracy: 0.6085628247930346
Epoch 90, Train 

In [None]:
#This is for Sequence Length 30
# Create sequences and targets
sequence_length = 30;
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [None]:
# Hyperparameters
print("Evaluating for Sequence Length 30")
hidden_size = 128
learning_rate = 0.0005
epochs = 100
# Train and evaluate the LSTM model
print("Training and evaluating LSTM model...")
train_loss, val_loss, val_accuracy, execution_time = train_evaluate2('LSTM', train_loader, train_loader, device)

# Train and evaluate the GRU model
print("Training and evaluating GRU model...")
train_loss, val_loss, val_accuracy, execution_time = train_evaluate2('GRU', train_loader, train_loader, device)

Evaluating for Sequence Length 30
Training and evaluating LSTM model...
Epoch 10, Train Loss: 1.4059924463451103, Validation Loss: 1.3916189729705737, Validation Accuracy: 0.5735180563291572
Epoch 20, Train Loss: 1.3464286461756092, Validation Loss: 1.3326051385636923, Validation Accuracy: 0.5895554253040768
Epoch 30, Train Loss: 1.3171198329919274, Validation Loss: 1.3045299024550492, Validation Accuracy: 0.596733576826394
Epoch 40, Train Loss: 1.2991325035313928, Validation Loss: 1.2868888624251587, Validation Accuracy: 0.6012354713876975
Epoch 50, Train Loss: 1.2865187881583107, Validation Loss: 1.2746312235356538, Validation Accuracy: 0.6037951744442116
Epoch 60, Train Loss: 1.2779357782344112, Validation Loss: 1.2621053675526352, Validation Accuracy: 0.6078039563326314
Epoch 70, Train Loss: 1.2707720231837865, Validation Loss: 1.2557206259084277, Validation Accuracy: 0.6099590828552569
Epoch 80, Train Loss: 1.2650661514629618, Validation Loss: 1.253538179849629, Validation Accurac

In [8]:
#This is for Sequence Length 30
# Create sequences and targets
sequence_length = 50;
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [9]:
# Hyperparameters
print("Evaluating for Sequence Length 50")
hidden_size = 128
learning_rate = 0.0005
epochs = 100
# Train and evaluate the LSTM model
print("Training and evaluating LSTM model...")
train_loss, val_loss, val_accuracy, execution_time = train_evaluate2('LSTM', train_loader, train_loader, device)

# Train and evaluate the GRU model
print("Training and evaluating GRU model...")
train_loss, val_loss, val_accuracy, execution_time = train_evaluate2('GRU', train_loader, train_loader, device)

Evaluating for Sequence Length 50
Training and evaluating LSTM model...
Epoch 10, Train Loss: 1.397958844171168, Validation Loss: 1.3794347694712326, Validation Accuracy: 0.5785783530862122
Epoch 20, Train Loss: 1.3352014689072613, Validation Loss: 1.320722958589154, Validation Accuracy: 0.5932913059314673
Epoch 30, Train Loss: 1.3057113839622665, Validation Loss: 1.2921570134337252, Validation Accuracy: 0.601083746602785
Epoch 40, Train Loss: 1.2876403753938708, Validation Loss: 1.2716826912913959, Validation Accuracy: 0.6066324843798157
Epoch 50, Train Loss: 1.2753978400326467, Validation Loss: 1.2597750826992988, Validation Accuracy: 0.6100305399120226
Epoch 60, Train Loss: 1.2662330212565958, Validation Loss: 1.2526395235100123, Validation Accuracy: 0.611923454092068
Epoch 70, Train Loss: 1.2592829140245803, Validation Loss: 1.2485970693239663, Validation Accuracy: 0.6128536605867025
Epoch 80, Train Loss: 1.2533658096536537, Validation Loss: 1.2396157487546107, Validation Accuracy: