In [2]:
import torch
import mmap
import random
import pickle
import re
import os

In [7]:
class smaLLLanguageModel:
    def __init__(self,vocabulary_file,train_opFile,valid_opFile):
        if torch.cuda.is_available():
            self.device='cuda'
            
        self.vocab_file=vocabulary_file
        self.train_outputFile=train_opFile
        self.valid_outputFile=valid_opFile
        
        self.block_size=8
        self.batch_size=32

        self.iters=100
        self.learning_rate=3e-4
        self.eval_iters=100
        self.dropout=0.2

        self.n_embd=50
        self.n_layer=1
        self.n_head=1
        
        self.vocab=[]
        self.word_to_int={}
        self.int_to_word={}
        
    def _strip_special_characters(self,word):
        pattern=r'[^a-zA-Z0-9\s]+'
        stripped_word=re.sub(pattern,'',word)
        return stripped_word
    
    def _get_words(self,text):
        words=text.split()
        list_of_words=[]
        for word in words:
            word=self._strip_special_characters(word)
            word=word.strip("~`!@#$%^&*()1234567890_-+={[]}\|'';:""/?.><,")
            if word:
                list_of_words.append(word)
        return list_of_words
    
    def _update_the_vocab(self,text):
        new_words=self._get_words(text)
        for word in new_words:
            if word not in self.vocab:
                self.vocab.append(word)
        self.vocab=sorted(self.vocab)
        new_word_indices={word:i for i,word in enumerate(self.vocab)}
        self.word_to_int.update(new_word_indices)
        self.int_to_word={i:word for word,i in self.word_to_int.items()}
            
    def _encoder(self,text):
        self._update_the_vocab(text)
        encoded_text=[]
        for word in self._get_words(text):
            encoded_text.append(self.word_to_int[word])
        return encoded_text
    
    def _decoder(self,arr):
        decoded_text=""
        for int_val in arr:
            word=self.int_to_word[int_val]
            decoded_text=decoded_text+word+" "
        return decoded_text
        
    def _get_vocabulary(self):
        with open(self.vocab_file,"r",encoding="utf-8") as f:
            text=f.read()
            self.vocab=sorted(set(self._get_words(text)))
        vocab_size=len(self.vocab)
        return vocab_size
    
    def load(self):
        vocab_size=self._get_vocabulary()
        model=SLM(vocab_size)
        print("Loading Model...\n")
        with open("D:/LLM_Dataset/Model/Model_01.pkl","rb") as f:
            model=pickle.load(f)
        print("Model successfully loaded!")
        self.model=model.to(self.device)
        
    def talk(self):
        rep=0
        while True:
            prompt=input("Prompt: ")
            context=torch.tensor(self._encoder(prompt),dtype=torch.long,device=self.device)
            generated_chars=self._decoder(self.model.generate(context.unsqueeze(0),max_new_tokens=5)[0].tolist())
            print(f'Completion: {generated_chars}\n')
            if rep==4:
                repeat=int(input("More? 1:YES  2:NO"))
                print(f"\n")
                if repeat==2:
                    break
    
class Head(torch.nn.Module):
    def __init__(self,head_size):
        super(Head,self).__init__()
        self.obj_smaLLLanguageModel=smaLLLanguageModel("D:/LLM_Dataset/vocab.txt","D:/LLM_Dataset/output_train.txt","D:/LLM_Dataset/output_valid.txt")
        self.key=torch.nn.Linear(self.obj_smaLLLanguageModel.n_embd,head_size,bias=False)
        self.query=torch.nn.Linear(self.obj_smaLLLanguageModel.n_embd,head_size,bias=False)
        self.value=torch.nn.Linear(self.obj_smaLLLanguageModel.n_embd,head_size,bias=False)
        self.register_buffer('tril',torch.tril(torch.ones(self.obj_smaLLLanguageModel.block_size,self.obj_smaLLLanguageModel.block_size)))
        self.dropout=torch.nn.Dropout(self.obj_smaLLLanguageModel.dropout)
        
    def forward(self,x):
        B,T,C=x.shape
        k=self.key(x)
        q=self.query(x)
        wei=q@k.transpose(-2,-1)*k.shape[-1]**-0.5
        wei=wei.masked_fill(self.tril[:T,:T]==0,float('-inf'))
        wei=torch.nn.functional.softmax(wei,dim=-1)
        wei=self.dropout(wei)
        v=self.value(x)
        out=wei@v
        return out

class MultiHeadAttention(torch.nn.Module):
    def __init__(self,head_size):
        super(MultiHeadAttention,self).__init__()
        self.obj_smaLLLanguageModel=smaLLLanguageModel("D:/LLM_Dataset/vocab.txt","D:/LLM_Dataset/output_train.txt","D:/LLM_Dataset/output_valid.txt")
        self.heads=torch.nn.ModuleList([Head(head_size) for _ in range(self.obj_smaLLLanguageModel.n_head)])
        self.proj=torch.nn.Linear(head_size*self.obj_smaLLLanguageModel.n_head,self.obj_smaLLLanguageModel.n_embd)
        self.dropout=torch.nn.Dropout(self.obj_smaLLLanguageModel.dropout)
        
    def forward(self,x):
        out=torch.cat([h(x) for h in self.heads],dim=-1)
        out=self.dropout(self.proj(out))
        return out

class FeedForward(torch.nn.Module):
    def __init__(self):
        super(FeedForward,self).__init__()
        self.obj_smaLLLanguageModel=smaLLLanguageModel("D:/LLM_Dataset/vocab.txt","D:/LLM_Dataset/output_train.txt","D:/LLM_Dataset/output_valid.txt")
        self.net=torch.nn.Sequential(
            torch.nn.Linear(self.obj_smaLLLanguageModel.n_embd,4*self.obj_smaLLLanguageModel.n_embd),
            torch.nn.ReLU(),
            torch.nn.Linear(4*self.obj_smaLLLanguageModel.n_embd,self.obj_smaLLLanguageModel.n_embd),
            torch.nn.Dropout(self.obj_smaLLLanguageModel.dropout),
        )
        
    def forward(self,x):
        return self.net(x)

class Block(torch.nn.Module):
    def __init__(self):
        super(Block,self).__init__()
        self.obj_smaLLLanguageModel=smaLLLanguageModel("D:/LLM_Dataset/vocab.txt","D:/LLM_Dataset/output_train.txt","D:/LLM_Dataset/output_valid.txt")
        head_size=self.obj_smaLLLanguageModel.n_embd//self.obj_smaLLLanguageModel.n_head
        self.aa=MultiHeadAttention(head_size)
        self.ffwd=FeedForward()
        self.ln1=torch.nn.LayerNorm(self.obj_smaLLLanguageModel.n_embd)
        self.ln2=torch.nn.LayerNorm(self.obj_smaLLLanguageModel.n_embd)
        
    def forward(self,x):
        y=self.aa(x)
        x=self.ln1(x+y)
        y=self.ffwd(x)
        x=self.ln2(x+y)
        return x

class SLM(torch.nn.Module):
    def __init__(self,vocab_size):
        super(SLM,self).__init__()
        self.obj_smaLLLanguageModel=smaLLLanguageModel("D:/LLM_Dataset/vocab.txt","D:/LLM_Dataset/output_train.txt","D:/LLM_Dataset/output_valid.txt")
        self.embedding_Table=torch.nn.Embedding(vocab_size,self.obj_smaLLLanguageModel.n_embd)
        self.pos_embedding_Table=torch.nn.Embedding(self.obj_smaLLLanguageModel.block_size,self.obj_smaLLLanguageModel.n_embd)
        
        self.blocks=torch.nn.Sequential(*[Block() for _ in range(self.obj_smaLLLanguageModel.n_layer)])
        self.ln_f=torch.nn.LayerNorm(self.obj_smaLLLanguageModel.n_embd)
        self.lm_head=torch.nn.Linear(self.obj_smaLLLanguageModel.n_embd,vocab_size)
        
        self.apply(self._init_weights)
        
    def _init_weights(self,module):
        if isinstance(module,torch.nn.Linear):
            torch.nn.init.normal_(module.weight,mean=0.0,std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module,torch.nn.Embedding):
            torch.nn.init.normal_(module.weight,mean=0.0,std=0.02)
        
    def forward(self,index,targets=None):
        B,T=index.shape
            
        token_embeddings=self.embedding_Table(index)
        pos_embeddings=self.pos_embedding_Table(torch.arange(T,device=self.obj_smaLLLanguageModel.device))
        x=token_embeddings+pos_embeddings
        x=self.blocks(x)
        x=self.ln_f(x)
        logits=self.lm_head(x)
        
        if targets is None:
            loss=None
        else:
            B,T,C=logits.shape
            logits=logits.view(B*T,C)
            targets=targets.view(B*T)
            loss=torch.nn.functional.cross_entropy(logits,targets)
        return logits,loss
    
    def generate(self,index,max_new_tokens):
        for _ in range(max_new_tokens):
            logits,loss=self.forward(index)
            logits=logits[:,-1,:]
            probs=torch.nn.functional.softmax(logits,dim=-1)
            index_next=torch.multinomial(probs,num_samples=1)
            index=torch.cat((index,index_next),dim=1)
        return index

In [8]:
chatbot=smaLLLanguageModel("D:/LLM_Dataset/vocab.txt","D:/LLM_Dataset/output_train.txt","D:/LLM_Dataset/output_valid.txt")

In [9]:
chatbot.load()

Loading Model...

Model successfully loaded!


In [10]:
chatbot.talk()

Prompt: drinking
Completion: drinking fluctuations waqf Emit saturator STELLION 

Prompt: maybe
Completion: maybe carnosit GRENE Twinling Subaqueous espleitrevenue 

Prompt: Game playing
Completion: Game playing vented Ditches WHITTRET unlabored CONFORTATION 

Prompt: cat ball
Completion: cat ball Majestatic CIRCUMVOLUTION circumventio executorius vettilei 

Prompt: samarth singh
Completion: samarth singh KANTTRY dhara PEPTONE Boldface wannish 

Prompt: is it okay
Completion: is it okay Benefited Nourice UNAMBITION Uveous Psychian 

Prompt: demogorgon
Completion: demogorgon desert Renewed CURIALISTIC furbelow mase 

Prompt: stranger things
Completion: stranger things MANTCHOO celerity Gurt BEECHNUT AFFAIR 

Prompt: netflix
Completion: netflix presribed panda creyera medlar Rheocrat 



KeyboardInterrupt: Interrupted by user