<a href="https://colab.research.google.com/github/Sathvik21S21Rao/Pytorch_practice/blob/main/generate_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.4.0


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Training the model
Using tiktoken(gpt-2) tokenizing model to tokenize the text file
Using pytorch to train the model to learn to generate the next token based on the previous token.

In [16]:
import tiktoken
import torch
import torch.nn as nn
import copy

In [5]:
device="cuda" if torch.cuda.is_available() else "cpu"

In [6]:
enc=tiktoken.get_encoding("gpt2")

In [7]:
vector=enc.encode(open("Hamming.txt").read().lower().replace("\n",""))

In [72]:
len(vector)

508

In [14]:
import random
def generate_batch(batch_size,vector_size):
  index=[random.randrange(0,len(vector)-vector_size-2) for i in range(batch_size)]
  input_vectors=torch.tensor([vector[index[j]:index[j]+vector_size] for j in range(batch_size)]).to(device)
  target_vectors=torch.tensor([vector[index[j]+1:index[j]+vector_size+1] for j in range(batch_size)]).to(device)
  return input_vectors,target_vectors

In [9]:
class SelfAttention(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(SelfAttention, self).__init__()
        self.query = nn.Linear(in_dim, out_dim)
        self.key = nn.Linear(in_dim, out_dim)
        self.value = nn.Linear(in_dim, out_dim)
        self.out_dim=out_dim

    def forward(self, x):
        q = self.query(x)
        k = self.key(x)
        v = self.value(x)

        attention_scores = torch.matmul(q, k.transpose(1,2))/(self.out_dim)**0.5
        B,T,C=attention_scores.shape
        mask=torch.tril(torch.ones(T,T)).to(device)
        if mask is not None:
          attention_scores=attention_scores.masked_fill(mask==0,float("-inf"))
        attention_weights = torch.nn.functional.softmax(attention_scores, dim=-1)
        output = torch.matmul(attention_weights, v)

        return output





In [10]:
class TokenModel(nn.Module):
  def __init__(self,out_features:int,hidden:int):
    super().__init__()
    self.embedding=nn.Embedding(enc.n_vocab+1,hidden)
    self.self_attention=SelfAttention(hidden,hidden)
    self.layer=nn.Sequential(nn.Linear(in_features=hidden,out_features=hidden,bias=True),
                             nn.GELU(),
                             nn.Linear(in_features=hidden,out_features=enc.n_vocab,bias=True),
                             )
  def forward(self,x):
    return self.layer(self.self_attention(self.embedding(x)))


In [76]:
model=TokenModel(500,500).to(device)

In [77]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model.parameters(),lr=0.001,)


In [58]:
def train(epochs:int,model,loss_fn,optimizer):
  for i in range(epochs):
    model.train()
    x,y=generate_batch(4,10)
    y_logit=model(x)
    B,T,C=y_logit.shape
    y_logit=y_logit.view(B*T,C)
    y=y.view(B*T)
    loss=loss_fn(y_logit,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [83]:
train(10000,model,loss_fn,optimizer)
g=torch.Generator(device="cuda")
g.manual_seed(2147483647)

<torch._C.Generator at 0x7df1d88a2d50>

In [80]:
def generate(model,tokens):
  x=generate_batch(4,10)[0]
  result=copy.deepcopy(x).to(device)
  for i in range(tokens):
   y_logit=model(x)
   y_logit=y_logit[:,-1,:].to(device)
   y_pred=torch.softmax(y_logit,dim=1).to(device)
   ans=torch.multinomial(y_pred,num_samples=1,generator=g).to(device)
   result=torch.cat([result,ans],dim=1).to(device)
   x=torch.cat([x,ans],dim=1).to(device)
   x=x[:,1:]
  return result


In [90]:
!mkdir -p '/content/drive/MyDrive/Models'

# Calculating Loss

In [23]:
avg=0
with torch.inference_mode():
  for i in range(100):
    loss_batch=generate_batch(4,8)
    y_logit=model(loss_batch[0])
    B,T,C=y_logit.shape
    y_logit=y_logit.view(B*T,C)
    y=loss_batch[1].view(B*T)
    avg+=nn.functional.cross_entropy(y_logit,y).item()
  print(avg/100)



1.2006141656637193


In [91]:
torch.save(model.state_dict(),"/content/drive/MyDrive/Models/model.pth")

# Loading the torch model

In [19]:
model=TokenModel(500,500)
model.load_state_dict(torch.load("/content/drive/MyDrive/Models/model.pth"))
model=model.to(device)

In [20]:
def generate(model,tokens):
  x=generate_batch(4,10)[0]
  result=copy.deepcopy(x).to(device)
  for i in range(tokens):
   y_logit=model(x)
   y_logit=y_logit[:,-1,:].to(device)
   y_pred=torch.softmax(y_logit,dim=1).to(device)
   ans=torch.multinomial(y_pred,num_samples=1,generator=g).to(device)
   result=torch.cat([result,ans],dim=1).to(device)
   x=torch.cat([x,ans],dim=1).to(device)
   x=x[:,1:]
  return result

In [22]:
model.eval()
g=torch.Generator(device="cuda")
g.manual_seed(2147483647)
with torch.inference_mode():
  y=generate(model,50)
  for j in y:
    print(enc.decode(j.tolist()))

 for errors and correct them if necessary. if a vital component in data integrity in the of single-correcting codes are various typesstate drives and efficient solution to the parity will pinpoint be flipped implementation realm early early parity checks information is carefully to suit to be chosen chosen chosen calculated chosen to cosmic rays or
 of linear error-correcting code, specifically designed to the original information but also designed to create suit applications design and cache memory, hamming(7, allowing it to check for single-fi. in the problem realm erroneous bitbit error correction of error correction of error occurs during transmission, and information
, where data integrity is paramount.in summary, every 4 bits to prevent and correct them if solid bits if correct them if hamming(15, is codes. this code, every 4 bits to create cosmic makes where data are used in the of hamming codes can detect and correction ofming codes
 error correction and double-bit error detect