In [1]:
import transformers
import torch, math, einops
from torch import nn

from torch.distributions.categorical import Categorical

from src.encoder import GPT2Encoder
from src.decoder import GPT2Decoder
from src.GPT2 import GPT2_Block, GPT2
from src import BlockGenerator
from src import Tokenizer, random_graph_maker

In [2]:
from datasets import load_dataset

train = load_dataset("wikipedia", "20220301.simple",split="train[:80%]")
valid = load_dataset("wikipedia", "20220301.simple",split="train[80%:]")

Found cached dataset wikipedia (/Users/francescosacco/.cache/huggingface/datasets/wikipedia/20220301.simple/2.0.0/aa542ed919df55cc5d3347f42dd4521d05ca68751f50dbc32bae2a7f1e167559)
Found cached dataset wikipedia (/Users/francescosacco/.cache/huggingface/datasets/wikipedia/20220301.simple/2.0.0/aa542ed919df55cc5d3347f42dd4521d05ca68751f50dbc32bae2a7f1e167559)


In [3]:
device='cpu'
#device = 'mps'  if torch.backends.mps.is_available() else 'cpu'
device = 'cuda' if torch.cuda.is_available() else device

dK = 64
dV = 64
heads = 12
d_Embedding = dK*heads

tokenizer = Tokenizer('gpt2',device=device)
encoder = GPT2Encoder()
decoder = GPT2Decoder()
block_generator = BlockGenerator(GPT2_Block)
model = GPT2(tokenizer, encoder, block_generator, decoder)

pretrained=True
if pretrained:
    pretrained = transformers.GPT2LMHeadModel.from_pretrained('gpt2').to(device)
    model.load_from_original(pretrained)

graph_maker = random_graph_maker(50, 50)


In [4]:
from src.decoder import Loss

loss_function=Loss(decoder)
ce_loss=nn.CrossEntropyLoss()
lr=1e-9
gamma=0.99

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma)

In [5]:
n_epochs = 2
model.train()
losses = []
for _ in range(n_epochs):
    for page in train:
        text = page['text']
        nodes = tokenizer(text).to(device)
        nodes, target = nodes[:-3], nodes[1:-2]

        if isinstance(encoder, GPT2Encoder):
            nodes = nodes[:1023]
            target = target[:1023]
        edge_index = graph_maker(len(nodes))

        optimizer.zero_grad()  # reinitialize the gradient to zero
        
        prediction=model(nodes,edge_index)
        loss=loss_function(prediction,target)
        #prediction=pretrained(nodes)[0]
        #loss=ce_loss(prediction,target)
        
        print(loss.item())
        losses.append(loss.item())
        loss.backward()

        optimizer.step()


Token indices sequence length is longer than the specified maximum sequence length for this model (3814 > 1024). Running this sequence through the model will result in indexing errors


2.54327654838562
2.1318130493164062
3.1408321857452393
3.305172920227051
3.1729109287261963
3.1030735969543457
3.188854694366455


KeyboardInterrupt: 

In [8]:
from src.graph_initialization import linear_unidirectional_graph_maker
text = "Legolas and Gimli advanced on the orcs, raising their weapons with a harrowing war cry. "

print(text, end='')
gpt2_graph_maker = linear_unidirectional_graph_maker(100)
model.generate(text, 20, gpt2_graph_maker,temperature=1)


Legolas and Gimli advanced on the orcs, raising their weapons with a harrowing war cry.   After rescuing Tok'ra senses, Chronus climbed to Promethean aid.   A threat

"Legolas and Gimli advanced on the orcs, raising their weapons with a harrowing war cry. \xa0 After rescuing Tok'ra senses, Chronus climbed to Promethean aid. \xa0 A threat"