1A. 10 RNN

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split

# Sample text
text =("Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting "
       "the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, "
       "including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. "
       "At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and "
       "predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of "
       "text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent "
       "Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for"
       " sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character."
       " LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character "
       "prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the "
       "probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters "
       "to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, "
       "the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. "
       "This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and "
       "enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in "
       "enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use"
       " of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future "
       "of text-based technology.")

chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)} 
chars = sorted(list(set(text)))




# Preparing the dataset
max_length = 10  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output

class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output
# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.538947820663452, Validation Loss: 2.482860803604126, Validation Accuracy: 0.32773110270500183
Epoch 20, Loss: 2.029510259628296, Validation Loss: 2.1493940353393555, Validation Accuracy: 0.4264705777168274
Epoch 30, Loss: 1.6401114463806152, Validation Loss: 1.9770076274871826, Validation Accuracy: 0.47478991746902466
Epoch 40, Loss: 1.2970045804977417, Validation Loss: 1.8995535373687744, Validation Accuracy: 0.49159663915634155
Epoch 50, Loss: 0.9919137954711914, Validation Loss: 1.8426530361175537, Validation Accuracy: 0.5189075469970703
Epoch 60, Loss: 0.7165911793708801, Validation Loss: 1.8662142753601074, Validation Accuracy: 0.5126050710678101
Epoch 70, Loss: 0.4867504835128784, Validation Loss: 1.9188909530639648, Validation Accuracy: 0.5147058963775635
Epoch 80, Loss: 0.30940911173820496, Validation Loss: 2.0147786140441895, Validation Accuracy: 0.506302535533905
Epoch 90, Loss: 0.18833713233470917, Validation Loss: 2.1203010082244873, Validation Accuracy: 0

20 RNN

In [6]:
max_length = 20
# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.5436341762542725, Validation Loss: 2.4962899684906006, Validation Accuracy: 0.32983192801475525
Epoch 20, Loss: 2.044827461242676, Validation Loss: 2.1697237491607666, Validation Accuracy: 0.42016807198524475
Epoch 30, Loss: 1.6352908611297607, Validation Loss: 1.9800901412963867, Validation Accuracy: 0.47478991746902466
Epoch 40, Loss: 1.2673521041870117, Validation Loss: 1.897784948348999, Validation Accuracy: 0.5105041861534119
Epoch 50, Loss: 0.9275962710380554, Validation Loss: 1.9047890901565552, Validation Accuracy: 0.5252100825309753
Epoch 60, Loss: 0.6253383159637451, Validation Loss: 1.9581834077835083, Validation Accuracy: 0.5231092572212219
Epoch 70, Loss: 0.38950324058532715, Validation Loss: 2.0432615280151367, Validation Accuracy: 0.506302535533905
Epoch 80, Loss: 0.22826512157917023, Validation Loss: 2.129739999771118, Validation Accuracy: 0.4978991448879242
Epoch 90, Loss: 0.1372307986021042, Validation Loss: 2.242154359817505, Validation Accuracy: 0.

30 RNN

In [7]:
max_length = 30
# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.488182544708252, Validation Loss: 2.4525768756866455, Validation Accuracy: 0.3403361439704895
Epoch 20, Loss: 1.9660340547561646, Validation Loss: 2.118163585662842, Validation Accuracy: 0.4264705777168274
Epoch 30, Loss: 1.568363070487976, Validation Loss: 1.974371075630188, Validation Accuracy: 0.4831932783126831
Epoch 40, Loss: 1.20578134059906, Validation Loss: 1.8996363878250122, Validation Accuracy: 0.5
Epoch 50, Loss: 0.8849911689758301, Validation Loss: 1.8792941570281982, Validation Accuracy: 0.5231092572212219
Epoch 60, Loss: 0.604091465473175, Validation Loss: 1.941532015800476, Validation Accuracy: 0.5315126180648804
Epoch 70, Loss: 0.38006526231765747, Validation Loss: 2.0621864795684814, Validation Accuracy: 0.5084033608436584
Epoch 80, Loss: 0.22453683614730835, Validation Loss: 2.1934454441070557, Validation Accuracy: 0.506302535533905
Epoch 90, Loss: 0.13568560779094696, Validation Loss: 2.3115172386169434, Validation Accuracy: 0.506302535533905
Epoch

1O LSTM

In [8]:
max_length = 10
# Model, loss, and optimizer
model = CharLSTM(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.532536029815674, Validation Loss: 2.4758641719818115, Validation Accuracy: 0.31512606143951416
Epoch 20, Loss: 2.0218567848205566, Validation Loss: 2.1617367267608643, Validation Accuracy: 0.4327731132507324
Epoch 30, Loss: 1.6215181350708008, Validation Loss: 2.008790969848633, Validation Accuracy: 0.45798319578170776
Epoch 40, Loss: 1.2695313692092896, Validation Loss: 1.9236398935317993, Validation Accuracy: 0.48739495873451233
Epoch 50, Loss: 0.9612050652503967, Validation Loss: 1.9155203104019165, Validation Accuracy: 0.48739495873451233
Epoch 60, Loss: 0.6820419430732727, Validation Loss: 1.928607702255249, Validation Accuracy: 0.5042017102241516
Epoch 70, Loss: 0.4530436396598816, Validation Loss: 1.9936124086380005, Validation Accuracy: 0.5189075469970703
Epoch 80, Loss: 0.28237447142601013, Validation Loss: 2.087482213973999, Validation Accuracy: 0.5189075469970703
Epoch 90, Loss: 0.17366564273834229, Validation Loss: 2.1961309909820557, Validation Accuracy: 

20 LSTM

In [9]:
max_length = 20
# Model, loss, and optimizer
model = CharLSTM(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.5772600173950195, Validation Loss: 2.5070526599884033, Validation Accuracy: 0.325630247592926
Epoch 20, Loss: 2.0863804817199707, Validation Loss: 2.199014902114868, Validation Accuracy: 0.4285714328289032
Epoch 30, Loss: 1.6980786323547363, Validation Loss: 2.032346248626709, Validation Accuracy: 0.4432772994041443
Epoch 40, Loss: 1.3486571311950684, Validation Loss: 1.936607003211975, Validation Accuracy: 0.4852941036224365
Epoch 50, Loss: 1.019872784614563, Validation Loss: 1.9033173322677612, Validation Accuracy: 0.4810924232006073
Epoch 60, Loss: 0.7257246375083923, Validation Loss: 1.9383385181427002, Validation Accuracy: 0.4957983195781708
Epoch 70, Loss: 0.48039859533309937, Validation Loss: 2.0262200832366943, Validation Accuracy: 0.4957983195781708
Epoch 80, Loss: 0.2987724840641022, Validation Loss: 2.120030164718628, Validation Accuracy: 0.4957983195781708
Epoch 90, Loss: 0.1826467365026474, Validation Loss: 2.2285513877868652, Validation Accuracy: 0.49159

30 LSTM

In [10]:
max_length = 30
# Model, loss, and optimizer
model = CharLSTM(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.542482852935791, Validation Loss: 2.4931578636169434, Validation Accuracy: 0.3445378243923187
Epoch 20, Loss: 2.0392627716064453, Validation Loss: 2.1558632850646973, Validation Accuracy: 0.41806721687316895
Epoch 30, Loss: 1.65337073802948, Validation Loss: 1.9960606098175049, Validation Accuracy: 0.4600840210914612
Epoch 40, Loss: 1.3040956258773804, Validation Loss: 1.9099016189575195, Validation Accuracy: 0.5084033608436584
Epoch 50, Loss: 0.9864965081214905, Validation Loss: 1.8779065608978271, Validation Accuracy: 0.5252100825309753
Epoch 60, Loss: 0.7067068219184875, Validation Loss: 1.9204643964767456, Validation Accuracy: 0.5105041861534119
Epoch 70, Loss: 0.4737590551376343, Validation Loss: 1.9726073741912842, Validation Accuracy: 0.5252100825309753
Epoch 80, Loss: 0.3022885322570801, Validation Loss: 2.0723044872283936, Validation Accuracy: 0.5147058963775635
Epoch 90, Loss: 0.19074365496635437, Validation Loss: 2.1873555183410645, Validation Accuracy: 0.5

10 GRU

In [11]:
max_length = 10
# Model, loss, and optimizer
model = CharGRU(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.3777806758880615, Validation Loss: 2.3752284049987793, Validation Accuracy: 0.3781512677669525
Epoch 20, Loss: 1.8709954023361206, Validation Loss: 2.1087470054626465, Validation Accuracy: 0.43487393856048584
Epoch 30, Loss: 1.4659206867218018, Validation Loss: 1.9687341451644897, Validation Accuracy: 0.48949578404426575
Epoch 40, Loss: 1.1053141355514526, Validation Loss: 1.8806748390197754, Validation Accuracy: 0.5168067216873169
Epoch 50, Loss: 0.7785309553146362, Validation Loss: 1.8826634883880615, Validation Accuracy: 0.5315126180648804
Epoch 60, Loss: 0.5035293698310852, Validation Loss: 1.9526100158691406, Validation Accuracy: 0.5105041861534119
Epoch 70, Loss: 0.2990594506263733, Validation Loss: 2.073343515396118, Validation Accuracy: 0.5147058963775635
Epoch 80, Loss: 0.17187370359897614, Validation Loss: 2.2228550910949707, Validation Accuracy: 0.48949578404426575
Epoch 90, Loss: 0.10461628437042236, Validation Loss: 2.3675670623779297, Validation Accuracy

20 GRU

In [12]:
max_length = 20
# Model, loss, and optimizer
model = CharGRU(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.4077155590057373, Validation Loss: 2.391218900680542, Validation Accuracy: 0.3403361439704895
Epoch 20, Loss: 1.8928576707839966, Validation Loss: 2.0913000106811523, Validation Accuracy: 0.45378151535987854
Epoch 30, Loss: 1.4780752658843994, Validation Loss: 1.9101999998092651, Validation Accuracy: 0.49159663915634155
Epoch 40, Loss: 1.1139681339263916, Validation Loss: 1.850730299949646, Validation Accuracy: 0.529411792755127
Epoch 50, Loss: 0.7867543697357178, Validation Loss: 1.8516132831573486, Validation Accuracy: 0.5462185144424438
Epoch 60, Loss: 0.5065733194351196, Validation Loss: 1.9372183084487915, Validation Accuracy: 0.5357142686843872
Epoch 70, Loss: 0.2967461049556732, Validation Loss: 2.0838730335235596, Validation Accuracy: 0.5399159789085388
Epoch 80, Loss: 0.16768525540828705, Validation Loss: 2.2396023273468018, Validation Accuracy: 0.5525209903717041
Epoch 90, Loss: 0.10197245329618454, Validation Loss: 2.371074914932251, Validation Accuracy: 0.

30 GRU

In [13]:
max_length = 30
# Model, loss, and optimizer
model = CharGRU(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Next character prediction is "
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.3838353157043457, Validation Loss: 2.3537120819091797, Validation Accuracy: 0.3214285671710968
Epoch 20, Loss: 1.849308729171753, Validation Loss: 2.054267406463623, Validation Accuracy: 0.4432772994041443
Epoch 30, Loss: 1.4371122121810913, Validation Loss: 1.8946009874343872, Validation Accuracy: 0.48949578404426575
Epoch 40, Loss: 1.069371223449707, Validation Loss: 1.8227763175964355, Validation Accuracy: 0.5231092572212219
Epoch 50, Loss: 0.7419759035110474, Validation Loss: 1.8512152433395386, Validation Accuracy: 0.529411792755127
Epoch 60, Loss: 0.47285255789756775, Validation Loss: 1.9473810195922852, Validation Accuracy: 0.5462185144424438
Epoch 70, Loss: 0.2747008204460144, Validation Loss: 2.0933473110198975, Validation Accuracy: 0.5357142686843872
Epoch 80, Loss: 0.15243537724018097, Validation Loss: 2.2739641666412354, Validation Accuracy: 0.5273109078407288
Epoch 90, Loss: 0.09358999878168106, Validation Loss: 2.410339832305908, Validation Accuracy: 0.5

2 20

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import time
import os

dataset_path = 'tiny_shakespeare.txt'

# Load and preprocess the dataset
def load_dataset(dataset_path):
    with open(dataset_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Create a mapping for char to index and index to char
    chars = sorted(list(set(text)))
    char_to_idx = {ch: idx for idx, ch in enumerate(chars)}
    idx_to_char = {idx: ch for idx, ch in enumerate(chars)}
    
    # Convert all text to integers
    data = [char_to_idx[ch] for ch in text]
    return data, char_to_idx, idx_to_char

data, char_to_idx, idx_to_char = load_dataset(dataset_path)
vocab_size = len(char_to_idx)  # Update vocab_size based on the actual dataset

# Now you can define your CharDataset class and proceed with the rest of the setup
class CharDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, index):
        return (torch.tensor(self.data[index:index+self.seq_length]), 
                torch.tensor(self.data[index+1:index+self.seq_length+1]))

# Example: Creating the dataset with a sequence length
seq_length = 30  # Or any other sequence length you want to experiment with
dataset = CharDataset(data, seq_length)
data_loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Load and preprocess the dataset
def load_dataset(dataset_path):
    with open(dataset_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Create a mapping for char to index and index to char
    chars = sorted(list(set(text)))
    char_to_idx = {ch: idx for idx, ch in enumerate(chars)}
    idx_to_char = {idx: ch for idx, ch in enumerate(chars)}
    
    # Convert all text to integers
    data = [char_to_idx[ch] for ch in text]
    return data, char_to_idx, idx_to_char

data, char_to_idx, idx_to_char = load_dataset(dataset_path)
vocab_size = len(char_to_idx)  # Update vocab_size based on the actu

# Example: Creating the dataset with a sequence length
seq_length = 30  # Or any other sequence length you want to experiment with
dataset = CharDataset(data, seq_length)
data_loader = DataLoader(dataset, batch_size=64, shuffle=True)

# Assuming the dataset is already tokenized and available as a list of integers
class CharDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, index):
        return (torch.tensor(self.data[index:index+self.seq_length]), 
                torch.tensor(self.data[index+1:index+self.seq_length+1]))

# Model Definitions
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers, rnn_type="LSTM"):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.embed = nn.Embedding(vocab_size, embed_dim)
        if rnn_type == "LSTM":
            self.rnn = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
        else:  # Default to GRU
            self.rnn = nn.GRU(embed_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embed(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out.reshape(-1, self.hidden_dim))
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        if isinstance(self.rnn, nn.LSTM):
            return (weight.new(self.num_layers, batch_size, self.hidden_dim).zero_(),
                    weight.new(self.num_layers, batch_size, self.hidden_dim).zero_())
        else:
            return weight.new(self.num_layers, batch_size, self.hidden_dim).zero_()

# Assuming `data` is your dataset loaded and processed into a list of tokenized integer values
vocab_size = 65  # Example vocab size, adjust based on your dataset
seq_length = 30  # Starting with sequence length of 30
batch_size = 64
dataset = CharDataset(data, seq_length)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize models
embed_dim = 256
hidden_dim = 512
num_layers = 2
lr = 0.001
epochs = 10

models = {
    "LSTM": RNNModel(vocab_size, embed_dim, hidden_dim, num_layers, "LSTM"),
    "GRU": RNNModel(vocab_size, embed_dim, hidden_dim, num_layers, "GRU")
}

# Training Function
def train_model(model, epochs, lr):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()

    for epoch in range(epochs):
        start_time = time.time()
        hidden = model.init_hidden(batch_size)
        total_loss = 0

        for x, y in data_loader:
            optimizer.zero_grad()
            x, y = x.to(device), y.to(device)
            y_pred, hidden = model(x, hidden)
            loss = criterion(y_pred, y.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f'Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}, Time: {time.time() - start_time}s')

# Training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for name, model in models.items():
    print(f"Training {name} model")
    model.to(device)
    train_model(model, epochs, lr)

# Note: You need to adjust this code to add validation, adjust hyperparameters, and change sequence lengths.


Training LSTM model


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.