<a href="https://colab.research.google.com/github/BrockH3/Intro_to_deep_Learning/blob/main/Homework_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import requests


In [2]:
torch.cuda.is_available()

True

In [3]:
#select gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda:0


In [4]:
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

In [8]:
chars = sorted(list(set(text)))

ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}
chars = sorted(list(set(text)))

# Preparing the dataset
max_length = 30
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)

# Convert to pytorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


In [9]:
# Defining the LSTM model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [10]:
hidden_size = 128
learning_rate = 0.01
epochs = 100

# Model, loss, and optimizer
model = RNNModel(len(chars), hidden_size, len(chars)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        #The use of the underscore _ is a common Python convention to indicate that the actual maximum values returned by torch.max are not needed and can be disregarded.
        #What we are interested in is the indices of these maximum values, which are captured by the variable predicted. These indices represent the model's predictions for each example in the validation set.
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


Epoch 10, Loss: 2.225863218307495, Validation Loss: 2.4574451446533203, Validation Accuracy: 0.3411017060279846
Epoch 20, Loss: 1.6268959045410156, Validation Loss: 2.133866548538208, Validation Accuracy: 0.4300847351551056
Epoch 30, Loss: 1.1361559629440308, Validation Loss: 2.015730381011963, Validation Accuracy: 0.4682203531265259
Epoch 40, Loss: 0.7343453168869019, Validation Loss: 2.045931339263916, Validation Accuracy: 0.4830508530139923
Epoch 50, Loss: 0.4360255300998688, Validation Loss: 2.146634340286255, Validation Accuracy: 0.49788135290145874
Epoch 60, Loss: 0.23831747472286224, Validation Loss: 2.2760772705078125, Validation Accuracy: 0.49152541160583496
Epoch 70, Loss: 0.14199554920196533, Validation Loss: 2.4347126483917236, Validation Accuracy: 0.4894067943096161
Epoch 80, Loss: 0.07668814808130264, Validation Loss: 2.5792384147644043, Validation Accuracy: 0.48516950011253357
Epoch 90, Loss: 0.0454573929309845, Validation Loss: 2.6731324195861816, Validation Accuracy: 0

In [11]:
# Defining the RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output


hidden_size = 128
learning_rate = 0.01
epochs = 100

# Model, loss, and optimizer
model = RNNModel(len(chars), hidden_size, len(chars)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        #The use of the underscore _ is a common Python convention to indicate that the actual maximum values returned by torch.max are not needed and can be disregarded.
        #What we are interested in is the indices of these maximum values, which are captured by the variable predicted. These indices represent the model's predictions for each example in the validation set.
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


Epoch 10, Loss: 1.9092665910720825, Validation Loss: 2.2649757862091064, Validation Accuracy: 0.40042373538017273
Epoch 20, Loss: 1.319973111152649, Validation Loss: 2.069046974182129, Validation Accuracy: 0.47457626461982727
Epoch 30, Loss: 0.8501211404800415, Validation Loss: 2.098813533782959, Validation Accuracy: 0.49788135290145874
Epoch 40, Loss: 0.4903351962566376, Validation Loss: 2.266392946243286, Validation Accuracy: 0.5
Epoch 50, Loss: 0.26094070076942444, Validation Loss: 2.5374670028686523, Validation Accuracy: 0.4957627058029175
Epoch 60, Loss: 0.1291925013065338, Validation Loss: 2.8426644802093506, Validation Accuracy: 0.48516950011253357
Epoch 70, Loss: 0.09086263924837112, Validation Loss: 3.0460381507873535, Validation Accuracy: 0.4788135588169098
Epoch 80, Loss: 0.05952737480401993, Validation Loss: 3.1959269046783447, Validation Accuracy: 0.4830508530139923
Epoch 90, Loss: 0.038359384983778, Validation Loss: 3.3519158363342285, Validation Accuracy: 0.4703389704227

In [12]:
# Defining the GRU model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output


hidden_size = 128
learning_rate = 0.01
epochs = 100

model = RNNModel(len(chars), hidden_size, len(chars)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


Epoch 10, Loss: 2.0185775756835938, Validation Loss: 2.3338141441345215, Validation Accuracy: 0.38771185278892517
Epoch 20, Loss: 1.3454917669296265, Validation Loss: 2.083854913711548, Validation Accuracy: 0.4618644118309021
Epoch 30, Loss: 0.8087945580482483, Validation Loss: 2.0465424060821533, Validation Accuracy: 0.5042372941970825
Epoch 40, Loss: 0.4036268889904022, Validation Loss: 2.1761839389801025, Validation Accuracy: 0.5169491767883301
Epoch 50, Loss: 0.18009166419506073, Validation Loss: 2.3765764236450195, Validation Accuracy: 0.5275423526763916
Epoch 60, Loss: 0.07571472972631454, Validation Loss: 2.60168194770813, Validation Accuracy: 0.5190678238868713
Epoch 70, Loss: 0.039570536464452744, Validation Loss: 2.804356813430786, Validation Accuracy: 0.5127118825912476
Epoch 80, Loss: 0.023731088265776634, Validation Loss: 2.929187774658203, Validation Accuracy: 0.508474588394165
Epoch 90, Loss: 0.03242560848593712, Validation Loss: 3.0149919986724854, Validation Accuracy: 

In [25]:
# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 30
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


In [26]:
#LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [27]:
hidden_size = 64
learning_rate = 0.005
epochs = 50

model = LSTMModel(len(chars), hidden_size, len(chars)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    for seq, labels in train_loader:
      seq = seq.to(device)
      labels = labels.to(device)
      optimizer.zero_grad()
      y_pred = model(seq)
      loss = criterion(y_pred, labels)
      loss.backward()
      optimizer.step()

# Predict future values
    with torch.no_grad():
        for seq, labels in test_loader:
          seq = seq.to(device)
          labels = labels.to(device)
          val_output = model(seq)
          val_loss = criterion(val_output, labels)
          _, predicted = torch.max(val_output, 1)
          val_accuracy = (predicted == labels).float().mean()
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

Epoch 10, Loss: 2.679677963256836, Validation Loss: 1.6076472997665405, Validation Accuracy: 0.5051546096801758
Epoch 20, Loss: 1.4840112924575806, Validation Loss: 1.6756868362426758, Validation Accuracy: 0.4639175236225128
Epoch 30, Loss: 1.7531142234802246, Validation Loss: 1.65836763381958, Validation Accuracy: 0.42268040776252747
Epoch 40, Loss: 1.2279257774353027, Validation Loss: 1.6670217514038086, Validation Accuracy: 0.44329896569252014
Epoch 50, Loss: 2.200448989868164, Validation Loss: 1.7713879346847534, Validation Accuracy: 0.4536082446575165


In [28]:
#GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [29]:
hidden_size = 64
learning_rate = 0.005
epochs = 50

model = GRUModel(len(chars), hidden_size, len(chars)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    for seq, labels in train_loader:
      seq = seq.to(device)
      labels = labels.to(device)
      optimizer.zero_grad()
      y_pred = model(seq)
      loss = criterion(y_pred, labels)
      loss.backward()
      optimizer.step()

# Predict future values
    with torch.no_grad():
        for seq, labels in test_loader:
            seq = seq.to(device)
            labels = labels.to(device)
            val_output = model(seq)
            val_loss = criterion(val_output, labels)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == labels).float().mean()
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

Epoch 10, Loss: 4.2449445724487305, Validation Loss: 1.9027674198150635, Validation Accuracy: 0.39175257086753845
Epoch 20, Loss: 1.9570497274398804, Validation Loss: 1.9007139205932617, Validation Accuracy: 0.4639175236225128
Epoch 30, Loss: 2.086622476577759, Validation Loss: 1.8944710493087769, Validation Accuracy: 0.39175257086753845
Epoch 40, Loss: 1.2223998308181763, Validation Loss: 1.9960131645202637, Validation Accuracy: 0.3711340129375458
Epoch 50, Loss: 1.7271795272827148, Validation Loss: 1.9356003999710083, Validation Accuracy: 0.3814432919025421
