In [1]:
import os
import sys
from pathlib import Path
sys.path.insert(1, os.path.realpath(os.path.pardir))

import safetensors
import torch
import torch.nn.functional as F
from accelerate import notebook_launcher
from einops import rearrange
from einops.layers.torch import Rearrange
from simple_parsing import ArgumentParser
import einops

from models import brainformer
from utils.data_utils import BrainDataset, get_tokenizer
from utils.train_utils import TrainConfig, run_train_model, count_parameters

from torch import nn
from models.brainformer import Encoder, CrossBlock, build_complex_rope_cache, Config


In [2]:
from transformers import GPT2Tokenizer
from models.gpt2_model import GPT

import tiktoken

from contextlib import nullcontext


In [3]:
device = 'cuda'

device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
dtype = 'float32'
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)


In [6]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT.from_pretrained('gpt2', dict(dropout=0.0))
model.eval().to(device)

print('initing completed')




start = '<|endoftext|>'
input_ids = tokenizer(start,  return_tensors="pt")['input_ids']
input_ids = input_ids.to(device)

prefix = torch.randn(1, 16, model.config.n_embd, dtype=ptdtype, device=device)

max_new_tokens = 5
temperature = 1.0 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions

with torch.no_grad():
    with ctx:
        y = model.generate_beam_search(input_ids, max_new_tokens, prefix=prefix, 
                                       temperature=temperature, beam_width=2)
        # print(y)
        print(tokenizer.decode(y[0].tolist()))
        print('---------------')

loading weights from pretrained gpt: gpt2
forcing vocab_size=50257, block_size=1024, bias=True
overriding dropout rate to 0.0
number of parameters: 123.65M
initing completed
tensor([0., 0.], device='cuda:0')
Starting generation
beams.shape, prefix.shape torch.Size([2, 1]) torch.Size([2, 16, 768])
-----
new token
logits.shape torch.Size([2, 50257])
[(tensor(-1.7620, device='cuda:0'), 0, tensor(464, device='cuda:0')), (tensor(-1.7620, device='cuda:0'), 1, tensor(464, device='cuda:0')), (tensor(-2.9174, device='cuda:0'), 0, tensor(32, device='cuda:0')), (tensor(-2.9174, device='cuda:0'), 1, tensor(32, device='cuda:0'))]
beam_scores tensor([-1.7620, -1.7620], device='cuda:0')
-----
new token
logits.shape torch.Size([2, 50257])
[(tensor(-5.4096, device='cuda:0'), 0, tensor(366, device='cuda:0')), (tensor(-5.4096, device='cuda:0'), 1, tensor(366, device='cuda:0')), (tensor(-5.4636, device='cuda:0'), 0, tensor(717, device='cuda:0')), (tensor(-5.4636, device='cuda:0'), 1, tensor(717, device='c

In [5]:
beam = torch.tensor([37, 38])
word_idx = torch.tensor(192)


torch.stack([beam, word_idx.reshape(1)], dim=1)

RuntimeError: stack expects each tensor to be equal size, but got [2] at entry 0 and [1] at entry 1

In [None]:
beam.shape, word_idx.reshape(1).shape

In [None]:
result = torch.cat((beam, word_idx.reshape(1)), dim=0)
result

In [None]:
prefix.shape

# Old code

In [None]:
start = '<|endoftext|>i love you so much <|endoftext|>'

input_ids = gpt2_tokenizer(start,  return_tensors="pt")['input_ids']
input_ids = input_ids.to(device)

prefix = torch.randn(1, 32, model.config.n_embd, dtype=ptdtype, device=device)

print('Input shapes', input_ids.shape, prefix.shape)

loss, logits = model.forward(idx=input_ids, targets=input_ids, prefix=prefix, )

print(loss)

In [None]:
model.train().to(device)

enc = tiktoken.get_encoding("gpt2")
encode = lambda s: enc.encode(s, allowed_special={"<|endoftext|>"})
decode = lambda l: enc.decode(l)


start = "Russian is the best"
start_ids = encode(start)
x = (torch.tensor(start_ids, dtype=torch.long, device=device)[None, ...])

prefix = torch.randn(1, 16, model.config.n_embd, dtype=ptdtype, device=device)

max_new_tokens = 15
temperature = 1.0 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 20

with torch.no_grad():
    with ctx:
        for k in range(3):
            y = model.generate(x, max_new_tokens, prefix=prefix, temperature=temperature, top_k=top_k)
            # print(y)
            print(decode(y[0].tolist()))
            print('---------------')

### loss calculation 

## Let's add context vectors into model

- forward get into account idxs and also context vectors I did smt similar actually. 

N-fixed number of brain_tokens. 

- add into beggining of the sne

## Cut our model: remove layers.

This is approach to reduce number of layers. which allows to tune models with fewer GPU clusters. Like distillation.


So we can distill model for our task. 

In [None]:
all_blocks = list(model.transformer.h.children())
cut_blocks = all_blocks[:4] + all_blocks[4:8]
model.transformer.h = nn.Sequential(*cut_blocks)
count_parameters(model)


In [None]:
max_new_tokens = 15
temperature = 1.0 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 10

with torch.no_grad():
    with ctx:
        for k in range(3):
            y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
            # print(y)
            print(decode(y[0].tolist()))
            print('---------------')

In [None]:
l1 = [20, 30, 50]
l2 = [50, 60]

l1 + l2

In [None]:
len(list(amodel.transformer.h[:10].children()))

In [None]:
start = '<|endoftext|>i love you so much'
print(encode(start))

input_ids = gpt2_tokenizer(start,  return_tensors="pt")['input_ids']
input_ids


In [None]:
project_name = 'brainformer'

train_config = TrainConfig(exp_name='brainformer_simple', 
                           mixed_precision=False, 
                           batch_size=16)

data_path = Path(r"D:\Work\brain-to-text-competition\data\competitionData")

# train_dataset = BrainDataset(data_path / 'train')

test_dataset = BrainDataset(data_path / 'test')

# submit_dataset = BrainDataset(data_path / 'competitionHoldOut')



In [None]:
test_dataset.targets

In [None]:
def get_unique_words(lines):
    unique_words = set()
    for line in lines:
        unique_words.update(line.lower().replace('.', '').split())
    return unique_words

In [None]:
train_set = get_unique_words(train_dataset.targets)
test_set = get_unique_words(test_dataset.targets)

intersection = train_set.intersection(test_set)

print(len(train_set))
print(len(test_set))
print(len(intersection))


In [None]:
train_dataset.targets

In [None]:
from models import gpt2_model
