In [22]:

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Input text
file_path = "trainable_text.txt"

# Open the file and read its contents
with open(file_path, 'r') as file:
    text = file.read()

In [23]:

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
X = []
y = []
max_length = 30  # Maximum length of input sequences
for i in range(len(text) - max_length - 1):
    sequence = text[i:i + max_length]
    label_sequence = text[i+1:i + max_length + 1]  # Shift by one for the next character sequence
    X.append([char_to_ix[char] for char in sequence])
    y.append([char_to_ix[char] for char in label_sequence])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


In [24]:
# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction[-1][-1],dim=0).item()         
        return ix_to_char[predicted_index]        
    
# Positional Encoding
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.encoding = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        self.encoding[:, 0::2] = torch.sin(position * div_term)
        self.encoding[:, 1::2] = torch.cos(position * div_term)
        self.encoding = self.encoding.unsqueeze(0)

    def forward(self, x):
        return x + self.encoding[:, :x.size(1)].detach().to(device)
    
# Defining the Transformer model
class CharTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, nhead):
        super(CharTransformer, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.pos_encoder = PositionalEncoding(hidden_size)
        encoder_layers = nn.TransformerEncoderLayer(hidden_size, nhead, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=2)  # Softmax layer over the feature dimension

    def forward(self, x):
        embedded = self.embedding(x)
        embedded = self.pos_encoder(embedded)
        transformer_output = self.transformer_encoder(embedded)
        output = self.fc(transformer_output)
        return self.softmax(output)  # Apply softmax to the linear layer output


In [25]:
# Hyperparameters
hidden_size = 48
num_layers = 4
nhead = 2
learning_rate = 0.001
epochs = 1000
# Model, loss, and optimizer
model = CharTransformer(len(chars), hidden_size, len(chars), num_layers, nhead).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Training loop
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output.transpose(1, 2), y_train)  # Reshape output to match the CrossEntropyLoss expectations
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output.transpose(1, 2), y_val)  # Same transpose for validation
        _, predicted = torch.max(val_output, 2)  # Adjust dimension for prediction
        val_accuracy = (predicted == y_val).float().mean()  # Calculate accuracy

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

Epoch 10, Loss: 3.75653338432312, Validation Loss: 3.754070281982422, Validation Accuracy: 0.10656780004501343
Epoch 20, Loss: 3.7456068992614746, Validation Loss: 3.7440004348754883, Validation Accuracy: 0.10656780004501343
Epoch 30, Loss: 3.713531732559204, Validation Loss: 3.7029104232788086, Validation Accuracy: 0.18975989520549774
Epoch 40, Loss: 3.658271312713623, Validation Loss: 3.6496307849884033, Validation Accuracy: 0.2254943549633026
Epoch 50, Loss: 3.615905284881592, Validation Loss: 3.6135361194610596, Validation Accuracy: 0.251694917678833
Epoch 60, Loss: 3.5933468341827393, Validation Loss: 3.5926027297973633, Validation Accuracy: 0.2624293863773346
Epoch 70, Loss: 3.584261655807495, Validation Loss: 3.586662769317627, Validation Accuracy: 0.26320621371269226
Epoch 80, Loss: 3.5760574340820312, Validation Loss: 3.5788660049438477, Validation Accuracy: 0.27005651593208313
Epoch 90, Loss: 3.57084584236145, Validation Loss: 3.575500965118408, Validation Accuracy: 0.2718220

In [26]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"

prediction_range = 2 * max_length
# print(f"Predicted next character: '{predicted_char}'")

for i in range(prediction_range):
    predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
    for j in predicted_char:
        test_str = test_str + j
    
print(sum(p.numel() for p in model.parameters()))
print(f"Predicted string: '{test_str}'")

837581
Predicted string: 'This is a simple example to demonstrate how to predict the next characte te t cacacte t t t t t t t cr t te ct t cct t cct t ccc'
