<a href="https://colab.research.google.com/github/SelenaNahra/DL/blob/main/HW3P2GRU30.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**GRU - 30 Sequences**

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import requests
import time

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 30
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
start_time = time.time()

for epoch in range(epochs):
    model.train()
    for sequences, targets in train_loader:  # Iterate over batches
        sequences, targets = sequences.to(device), targets.to(device)  # Move data to device
        optimizer.zero_grad()
        output = model(sequences)
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_loss = 0
        correct = 0
        total = 0
        for sequences, targets in test_loader:
            sequences, targets = sequences.to(device), targets.to(device)  # Move data to device
            val_output = model(sequences)
            val_loss += criterion(val_output, targets).item()
            _, predicted = torch.max(val_output.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        val_loss /= len(test_loader)
        val_accuracy = correct / total

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')

end_time = time.time()
training_time = end_time - start_time

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-sequence_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Counting model complexity
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")
print(f"Training time: {training_time} seconds")
print(f"Number of trainable parameters in the model: {count_parameters(model)}")

# Compute computational complexity
embedding_complexity = sequences.shape[1] * hidden_size
rnn_complexity = sequences.shape[1] * hidden_size * hidden_size
linear_complexity = hidden_size * len(chars)

total_complexity = embedding_complexity + rnn_complexity + linear_complexity
print(f"Total computational complexity: {total_complexity}")

Epoch 10, Loss: 2.476332426071167, Validation Loss: 1.8488357415229373, Validation Accuracy: 0.46166501548820343
Epoch 20, Loss: 5.015228748321533, Validation Loss: 1.8463738201954416, Validation Accuracy: 0.46163363562600584
Epoch 30, Loss: 4.003642559051514, Validation Loss: 1.8344574705048395, Validation Accuracy: 0.4642740268880591
Epoch 40, Loss: 1.4753845930099487, Validation Loss: 1.8383937730778241, Validation Accuracy: 0.46582060581065393
Epoch 50, Loss: 2.3481836318969727, Validation Loss: 1.8447738989153197, Validation Accuracy: 0.4643950634993926
Epoch 60, Loss: 1.3075231313705444, Validation Loss: 1.8469849214838348, Validation Accuracy: 0.46296503835067443
Epoch 70, Loss: 1.456678867340088, Validation Loss: 1.8648766013171163, Validation Accuracy: 0.4568594137345174
Epoch 80, Loss: 1.6012907028198242, Validation Loss: 1.8752124498988867, Validation Accuracy: 0.4550886929390827
Epoch 90, Loss: 1.3872917890548706, Validation Loss: 1.8959803076496222, Validation Accuracy: 0.

**GRU Adjusted Hyperparameters- 30 Sequences**

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import requests
import time

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 30
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, fc_hidden_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, fc_hidden_size)
        self.fc2 = nn.Linear(fc_hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        output = self.fc2(output)
        return output

# Hyperparameters with adjustable FCNN, hidden layers, and hidden states
hidden_size = 256  # Increase the hidden size
num_layers = 3  # Increase the number of hidden layers
fc_hidden_size = 512  # Increase the hidden size of the fully connected layer

learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars), num_layers, fc_hidden_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
start_time = time.time()

for epoch in range(epochs):
    model.train()
    for sequences, targets in train_loader:  # Iterate over batches
        sequences, targets = sequences.to(device), targets.to(device)  # Move data to device
        optimizer.zero_grad()
        output = model(sequences)
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_loss = 0
        correct = 0
        total = 0
        for sequences, targets in test_loader:
            sequences, targets = sequences.to(device), targets.to(device)  # Move data to device
            val_output = model(sequences)
            val_loss += criterion(val_output, targets).item()
            _, predicted = torch.max(val_output.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        val_loss /= len(test_loader)
        val_accuracy = correct / total

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')

end_time = time.time()
training_time = end_time - start_time

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-sequence_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")
print(f"Training time: {training_time} seconds")
print(f"Number of trainable parameters in the model: {count_parameters(model)}")

# Compute computational complexity
embedding_complexity = sequences.shape[1] * hidden_size
rnn_complexity = sequences.shape[1] * hidden_size * hidden_size * num_layers
linear_complexity = fc_hidden_size * hidden_size + fc_hidden_size * len(chars)

total_complexity = embedding_complexity + rnn_complexity + linear_complexity
print(f"Total computational complexity: {total_complexity}")

Epoch 10, Loss: 3.4925758838653564, Validation Loss: 2.597985153969717, Validation Accuracy: 0.2719692656663962
Epoch 20, Loss: 1.1913331747055054, Validation Loss: 2.727972574091751, Validation Accuracy: 0.23195545852702926
Epoch 30, Loss: 0.6273004412651062, Validation Loss: 2.7101681964259274, Validation Accuracy: 0.2580590210379562
Epoch 40, Loss: 2.2036917209625244, Validation Loss: 2.8645099682569914, Validation Accuracy: 0.2061791431504485
Epoch 50, Loss: 2.128560781478882, Validation Loss: 2.7398984344677206, Validation Accuracy: 0.2555127693624957
Epoch 60, Loss: 4.226850986480713, Validation Loss: 2.564301550012174, Validation Accuracy: 0.2829163547358936
Epoch 70, Loss: 3.2446682453155518, Validation Loss: 2.4981800935354577, Validation Accuracy: 0.3023494551111071
Epoch 80, Loss: 2.836771011352539, Validation Loss: 2.5314204576572195, Validation Accuracy: 0.29149202279074565
Epoch 90, Loss: 1.8073350191116333, Validation Loss: 2.535394457381003, Validation Accuracy: 0.28903

NameError: name 'count_parameters' is not defined

In [2]:
# Define the count_parameters function
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Call the function with your model
parameters_count = count_parameters(model)

# Output the parameters count
print(f"Number of trainable parameters in the model: {parameters_count}")

Number of trainable parameters in the model: 1365825


In [3]:
# Compute computational complexity
embedding_complexity = sequences.shape[1] * hidden_size
rnn_complexity = sequences.shape[1] * hidden_size * hidden_size * num_layers
linear_complexity = fc_hidden_size * hidden_size + fc_hidden_size * len(chars)

total_complexity = embedding_complexity + rnn_complexity + linear_complexity
print(f"Total computational complexity: {total_complexity}")

Total computational complexity: 6070272
