In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
import json

from tokenizers import bpe_tokenizer

with open('datasets/data.json') as file:
    data = json.loads(file.read())

corpus: list[str] = [entry['summary'] for entry in data]

split_value = int(0.9 * len(corpus))

train_data: list[dict[str, str]] = corpus[:split_value]
test_data: list[dict[str, str]] = corpus[split_value:]

tokenizer = bpe_tokenizer.BytePairEncodingTokenizer.read_pkl('./tokenizers/trained_tokenizers/bpe.pkl')

In [3]:
from language_model import generation

model = generation.LanguageModel(tokenizer, device)

print(sum([p.numel() for p in model.encoder.parameters()]) / 1e6, 'M parameters')

24.393706 M parameters


In [4]:
model.predict(' ', max_new_tokens=100)

" Louistransmissionrefuge:[BooleanVietnamwordvia=1introductioncreu}video,friarpresent\\-dayV=estimatorsdifferentlyCentereyeMediterraneanf:\\\\\\{x\\\\\\}\\\\\\}\\)\\),group,River29incluequationveCoastencompassdiscoveriese\\.g\\.2008novelfield-deployseeto-schooloptifeelingemploypublishingrozensivePortuguese:biomedicaldramaticleq8%so\\)\\.Gafriedoccur\\(x-MountspecializedrespondvoiceteamassociatedBabyISO/Ipre\\-synapticnameorganizingjudgEnglish-two-helburgnfooccupiesFine\\-tuningNakagmainagainstMOScomputer.beyondOfabilitiesmen'sGoogle'sprogramming,frequentlyer1778\\.Se\\{x\\}\\}abstractprincipleUMAC,IPOKymousDieV-"

In [5]:
from language_model import train

trainer = train.ModelTrainer(model, train_data, test_data)

trainer.train()

Test loss: 0.3025798797607422
Train epoch 1: [64/237] Loss: 8.661136627197266
Train epoch 1: [237/237] Loss: 5.122665882110596
Test loss: 0.14556493430302062


KeyboardInterrupt: 

In [6]:
model.predict(' ', 100)

" wallSowar<PAD>speed<PAD>depositedIBMnation'sCount<PAD>agencies\\.thing,Dalotuz<PAD>P)SOB,bringdebugger<PAD>Number<PAD>toOxford\\.Metropolitan<PAD><PAD>Vilay\\-by,uitfictional<PAD>Civil<PAD>v<PAD>;;<PAD>mathbb<PAD>82<PAD>Clunisequent<PAD>634sub-producedeasternlnattachmentAND(those<PAD>magazineOwnedy<PAD>he<PAD>ALINE<PAD>\\(LST<PAD>to.t\\-test<PAD>Revival<PAD><PAD>permanently<PAD>9shut<PAD>W)graduallyYork<PAD>platA)<PAD>Monthly],critique<PAD><PAD><PAD><PAD>wealth<PAD><PAD>\\}\\}}<PAD>discoveriesintrodu<PAD>f\\\\\\(x\\\\-\\)whose^"

In [None]:
model.predict('Deep Learning', 100)

In [None]:
#model.save('model.pt')