<a href="https://colab.research.google.com/github/Stupid-Creations/HoLLMes/blob/main/Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Note: Run this on the T4 runtime, will take a lot of time otherwise**

# **Load Needed Files**
Get the model from the repo

In [None]:
!wget https://raw.githubusercontent.com/Stupid-Creations/HoLLMes/main/state.txt

--2024-09-06 16:39:09--  https://raw.githubusercontent.com/Stupid-Creations/HoLLMes/main/state.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 52710306 (50M) [application/octet-stream]
Saving to: ‘state.txt.2’


2024-09-06 16:39:10 (282 MB/s) - ‘state.txt.2’ saved [52710306/52710306]



# Load Stuff
Run this to load the needed libraries and create the needed models and functions

In [None]:
!wget https://raw.githubusercontent.com/Stupid-Creations/HoLLMes/main/YAY.txt

import torch

embed_size = 384
block_size = 256
dropout = 0.2
n_layer = 6
n_head = 6
device = "cuda" if torch.cuda.is_available() else "cpu"

print(device)

text = open("YAY.txt","r").read()
preprocessed = [a for a in text]

vocab = sorted(list(set(preprocessed)))
vocab_size = len(vocab)

encode = lambda x: [vocab.index(i) for i in x]
decode = lambda x: ''.join([vocab[i] for i in x])

print(decode(encode("Rubber Ducks are nice")))
tokenized = torch.tensor(encode(preprocessed))
train_data = tokenized[:int(len(tokenized)*0.9)]
val_data = tokenized[int(len(tokenized)*0.9):]

def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (32,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

xb,yb = get_batch('train')

import torch
import torch.nn as nn
from torch.nn import functional as F

class trans_block(nn.Module):
    def __init__(self,embed_size,heads):
        super().__init__()
        head_size = embed_size // heads
        self.attention = Heads(heads,head_size)
        self.ff_layer = FF_Layer(embed_size)
        self.lnorm1 = nn.LayerNorm(embed_size)
        self.lnorm2 = nn.LayerNorm(embed_size)
    def forward(self,x):
        x = x + self.attention(self.lnorm1(x))
        x = x + self.ff_layer(self.lnorm2(x))
        return x

class Head(nn.Module):
    def __init__(self,headsize):
        super().__init__()
        self.key = nn.Linear(embed_size,headsize,bias=False)
        self.query = nn.Linear(embed_size,headsize,bias=False)
        self.value = nn.Linear(embed_size,headsize,bias=False)
        self.register_buffer('tril',torch.tril(torch.ones(block_size,block_size)))
        self.dropout = nn.Dropout(dropout)
    def forward(self,x):
        Batches, Time, Channels = x.shape
        k = self.key(x)
        q = self.query(x)

        wei = q @ k.transpose(-2,-1) * Channels**-0.5
        wei = wei.masked_fill(self.tril[:Time,:Time] == 0,float('-inf'))
        wei = F.softmax(wei,dim=-1)
        wei = self.dropout(wei)

        v = self.value(x)
        out = wei @ v
        return out

class Heads(nn.Module):
    def __init__(self,n_head,head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for i in range(n_head)])
        self.projection = nn.Linear(embed_size, embed_size)
        self.dropout = nn.Dropout(dropout)
    def forward(self,x):
        out = torch.cat([head(x) for head in self.heads],dim=-1)
        out = self.dropout(self.projection(out))
        return out

class FF_Layer(nn.Module):
    def __init__(self,embed_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(embed_size,4*embed_size),
            nn.ReLU(),
            nn.Linear(4*embed_size,embed_size),
            nn.Dropout(dropout)
        )
    def forward(self,x):
        return self.net(x)
class BigramLM(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding_table = nn.Embedding(vocab_size,embed_size)
        self.position_embedding_table = nn.Embedding(block_size,embed_size)
        self.lm_head = nn.Linear(embed_size,vocab_size)
        self.blocks = nn.Sequential(*[trans_block(embed_size,heads = n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(embed_size)
    def forward(self,idx,targets=None):
        Branch,Time = idx.shape

        token_embed = self.embedding_table(idx)
        position_embed = self.position_embedding_table(torch.arange(Time,device=device))
        added = token_embed + position_embed
        added = self.blocks(added)
        added = self.ln_f(added)
        logits = self.lm_head(added)

        if targets is None:
            loss = None
        else:
            Batch, Time, Channel = logits.shape
            logits = logits.view(Batch*Time,Channel)
            targets = targets.view(Batch*Time)
            loss = F.cross_entropy(logits,targets)
        return logits,loss
    def generate(self, idx, max_tokens):
        for i in range(max_tokens):
            idx_condition = idx[:, -block_size:]
            logits, loss = self(idx_condition)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)
        return idx

--2024-09-06 15:18:04--  https://raw.githubusercontent.com/Stupid-Creations/HoLLMes/main/YAY.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 452395 (442K) [text/plain]
Saving to: ‘YAY.txt.1’


2024-09-06 15:18:04 (25.3 MB/s) - ‘YAY.txt.1’ saved [452395/452395]

cuda
Rubber Ducks are nice


# **Load Model and Train**
No need to run these two, these are just here to show what the training process looked like ***TRAINING IS A 15 MINUTE PROCESS***

In [None]:
modelsomething = BigramLM()
model = modelsomething.to(device)
out = model(xb,yb)

idx = torch.zeros((1,1),dtype=torch.long,device = device)
print(decode(model.generate(idx,max_tokens = 100)[0].tolist()))
optimizer = torch.optim.AdamW(model.parameters(),lr = 3e-4)

for i in range(5000):
    xb,yb = get_batch('train')
    _,loss = model(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    if i % 100 == 0:
        print(loss.item())
print(loss.item())

4.548182487487793
2.493067741394043
2.4365293979644775
2.393076181411743
2.27532958984375
2.141753911972046
2.0261852741241455
1.9384242296218872
1.8738847970962524
1.8161969184875488
1.7369664907455444
1.6583244800567627
1.6205623149871826
1.6492551565170288
1.5520501136779785
1.49257493019104
1.4790692329406738
1.4678559303283691
1.4291702508926392
1.3939497470855713
1.3945571184158325
1.3230855464935303
1.3718379735946655
1.3699769973754883
1.3409188985824585
1.3041119575500488
1.2792139053344727
1.3102176189422607
1.2740859985351562
1.214362382888794
1.2315821647644043
1.2021310329437256
1.1914782524108887
1.1705621480941772
1.1942352056503296
1.162575602531433
1.1559720039367676
1.1428824663162231
1.1546624898910522
1.0970523357391357
1.0774110555648804
1.0557125806808472
1.099472999572754
1.0959663391113281
1.0614824295043945
1.0712947845458984
1.0422184467315674
1.0067641735076904
1.02080500125885
1.037742018699646
1.0178428888320923


***Don't run these unless you've run the one above, the model is loaded in the cell below these two***

In [None]:
print(decode(model.generate(idx,max_tokens = 5000)[0].tolist())) #DON'T RUN THIS ONE UNLESS YOU'VE RUN THE ONE ABOVE, THIS IS JUST A RANDOM PIECE OF THE MODEL'S OUTPUT


everyt down to a thick.  He has surely a great little messocuse about
him.  Then a hurted man was with a bunding and the glarle maid quainting
insuitely as a week, and told to ascert of his depression Don't accy if he
vame for my visiolence.  Cosion as t that we
lost unlessoned me---I may.  But that I was always for a very cupable burgan
in the country-sense of our two house establishme of my chair of lame.  And
Johnson tropiece for that Throne gave to the settion as soubtled-biod
had suspects to spurposit.

"Who forgetably?" he asked, Massaultate.  Now could that they is matcht from
The Professon who occurred.  He understand the heavy hand the lest outbreak
of his sworst pine-as Army and box burgained by the corner, but the Man Domy
hand."

"We remarked that you to have a rich, and the wild brass all might of a
price.  We lead to Pray, cleased me now in danger and they say the
windows of it."

"What?"

"What do you get a moment?"

"They asked well dogs completely.  The matter out of 

In [None]:
torch.save(model.state_dict(),"drive/MyDrive/state.txt")

# **Loading the model**
Run these two to load and run the model!


In [None]:
modelsomething2 = BigramLM()
model2 = modelsomething2.to(device)
model2.load_state_dict(torch.load("state.txt"))

  model2.load_state_dict(torch.load("state.txt"))


<All keys matched successfully>

In [None]:
while True:
  contextc = input("Type your prompt/context! (type EXIT to stop)")
  print("\n")
  contextc = torch.tensor([encode(contextc)])
  context = contextc.to(device)
  if(decode(context[0].tolist()) == "EXIT"):
    break
  print(decode(model2.generate(context,max_tokens = 1000)[0].tolist()))
  #CHANGE MAX_TOKENS FOR DIFFERENT LENGTH
  print("\n")

Type your prompt/context! (type EXIT to stop) Hello there


Hello there was nothing in this way.  I could not promise to
reach the trap in Alger her drive.

"I feet the lady cobe at Prine myster"--for the exocky of burderer.  He was
marry short that we made for all the casuace, here heading to his and,
returned by sial way on his inappointment affairation."

Holmes had been his noce for a such.

"I reached that in your mestes," said he, "then, our conformon--of couplease, wave
also unfluence that you from you sladed her very obscuring about that you
when you calking?"

"I swear heard, Holmes, and they were thought my wish to matter in a
mere of askinal an extraordinary.  But why closet were
when you to mean?"

"He clust that could be in the estace of ourselves."

"And that they took explain we come if that problem with whom her, they
don't brut here and a clerate of the same.  It was one in the geounds of
thoughts over the beach."

He looked his skin occurning in her bewildern by the s