In [4]:
#importing libraries
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch
import numpy as np
import os

In [5]:
block_size = 10

In [12]:
stoi = {' ': 0,
 '&': 1,
 "'": 2,
 '(': 3,
 ')': 4,
 ',': 5,
 '-': 6,
 '.': 7,
 '/': 8,
 '0': 9,
 '1': 10,
 '2': 11,
 '3': 12,
 '4': 13,
 '5': 14,
 '6': 15,
 '7': 16,
 '8': 17,
 '9': 18,
 ':': 19,
 ';': 20,
 '[': 21,
 ']': 22,
 'a': 23,
 'b': 24,
 'c': 25,
 'd': 26,
 'e': 27,
 'f': 28,
 'g': 29,
 'h': 30,
 'i': 31,
 'j': 32,
 'k': 33,
 'l': 34,
 'm': 35,
 'n': 36,
 'o': 37,
 'p': 38,
 'q': 39,
 'r': 40,
 's': 41,
 't': 42,
 'u': 43,
 'v': 44,
 'w': 45,
 'x': 46,
 'y': 47,
 'z': 48,
 '|': 49,
 '~': 50}

In [11]:
itos = {0: ' ',
 1: '&',
 2: "'",
 3: '(',
 4: ')',
 5: ',',
 6: '-',
 7: '.',
 8: '/',
 9: '0',
 10: '1',
 11: '2',
 12: '3',
 13: '4',
 14: '5',
 15: '6',
 16: '7',
 17: '8',
 18: '9',
 19: ':',
 20: ';',
 21: '[',
 22: ']',
 23: 'a',
 24: 'b',
 25: 'c',
 26: 'd',
 27: 'e',
 28: 'f',
 29: 'g',
 30: 'h',
 31: 'i',
 32: 'j',
 33: 'k',
 34: 'l',
 35: 'm',
 36: 'n',
 37: 'o',
 38: 'p',
 39: 'q',
 40: 'r',
 41: 's',
 42: 't',
 43: 'u',
 44: 'v',
 45: 'w',
 46: 'x',
 47: 'y',
 48: 'z',
 49: '|',
 50: '~'}

In [15]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Forget Gate
        self.wf = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.bf = nn.Parameter(torch.Tensor(hidden_size, 1))

        # Input Gate
        self.wi = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.bi = nn.Parameter(torch.Tensor(hidden_size, 1))

        # Candidate Gate
        self.wc = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.bc = nn.Parameter(torch.Tensor(hidden_size, 1))

        # Output Gate
        self.wo = nn.Parameter(torch.Tensor(hidden_size, input_size))
        self.bo = nn.Parameter(torch.Tensor(hidden_size, 1))

        # Final Gate
        self.wy = nn.Parameter(torch.Tensor(output_size, hidden_size))
        self.by = nn.Parameter(torch.Tensor(output_size, 1))

        # Initialize parameters
        self.init_parameters()

    def init_parameters(self):
        # Initialize weights with Xavier initialization
        nn.init.xavier_uniform_(self.wf)
        nn.init.xavier_uniform_(self.wi)
        nn.init.xavier_uniform_(self.wc)
        nn.init.xavier_uniform_(self.wo)
        nn.init.xavier_uniform_(self.wy)

        # Initialize biases to zeros
        nn.init.constant_(self.bf, 0)
        nn.init.constant_(self.bi, 0)
        nn.init.constant_(self.bc, 0)
        nn.init.constant_(self.bo, 0)
        nn.init.constant_(self.by, 0)
        

    def forward(self, X):
        outputs = []
        seq_length = X.size(0)
        hidden_state = torch.zeros(self.hidden_size,1,dtype=torch.float32)
        cell_state = torch.zeros(self.hidden_size,1,dtype = torch.float32)

        for q in range(seq_length):
            concat_input = torch.cat((hidden_state, X[q].unsqueeze(1)), dim=0)
            forget_gate = torch.sigmoid(torch.matmul(self.wf, concat_input) + self.bf)
            input_gate = torch.sigmoid(torch.matmul(self.wi, concat_input) + self.bi)
            candidate_gate = torch.tanh(torch.matmul(self.wc, concat_input) + self.bc)
            output_gate = torch.sigmoid(torch.matmul(self.wo, concat_input) + self.bo)

            cell_state = forget_gate * cell_state + input_gate * candidate_gate
            hidden_state = output_gate * torch.tanh(cell_state)

            output = torch.matmul(self.wy, hidden_state) + self.by
            outputs.append(output)
        outputs = torch.stack(outputs)
        return outputs
    
    def predict(self, X):
        out = [self.forward(x) for x in X]
        return torch.stack(out)
    
    def train(self, X, y, epochs, lr, model_path):
        optimizer = optim.Adam(self.parameters(), lr=lr)
        criterion = torch.nn.CrossEntropyLoss()  

        for epoch in range(epochs):
            epoch_loss = 0.0
            optimizer.zero_grad() 

            prediction = self.predict(X)
            prediction = prediction.reshape(-1, self.output_size)
            target = y.reshape(-1)
            loss = criterion(prediction, target)
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()


            current_loss = epoch_loss / len(X)
            print(f"Epoch {epoch+1}/{epochs}, Loss: {current_loss:.4f}")

            if (epoch % 100 == 0):
                torch.save({
                            'epoch': epoch,
                            'model_state_dict': self.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict(),
                            'loss': loss,
                            }, model_path)

In [13]:
input_size = 51
hidden_size = 50

In [17]:
model_read = LSTM(input_size+hidden_size, hidden_size, input_size)
model_path = "/Users/nimitt/Documents/DigitalSystems/Project/NextWordPredictor-Python/LargeModelStates/model_10_50_5000.pt"
checkpoint = torch.load(model_path,map_location=torch.device('cpu'))
model_read.load_state_dict(checkpoint['model_state_dict'])

# Testing

def convert_to_X(prompt):
    X_ = np.zeros((len(prompt),input_size))
    for i in range(len(prompt)):
        X_[i][stoi[prompt[i]]] = 1
    return torch.tensor(X_,dtype = torch.float32)
        
prompt = "basi"
max_len = 50
context = []
for j in range(len(prompt)):
    context = context + [stoi[prompt[j]]]
context = context[-block_size:]

generated_text = prompt
for i in range(max_len):
    x = convert_to_X(generated_text)
    y_pred = model_read(x)[-1]
    # ix = torch.distributions.categorical.Categorical(logits=y_pred.squeeze()).sample()
    ix = torch.argmax(y_pred)
    ch = itos[ix.item()]
    generated_text += ch
    context = context[1:] + [ix]

genrated_text = generated_text.replace('|','\n')
print("|",generated_text,"|")

| basic proble advisory 5 8 (version 1.0, november  2022 |
