In [1]:
import gc
import torch
import pandas as pd
from torch.utils.data import Dataset, random_split
from transformers import AutoTokenizer, TrainingArguments, Trainer, GPT2LMHeadModel, GPTNeoForCausalLM, AutoModelForCausalLM
torch.manual_seed(42)
texts = pd.read_csv('data.csv')
texts = texts.dropna()
texts = texts.dropna(subset=['Quest Description'])
class TextDataset(Dataset):
    def __init__(self, txt_list, tokenizer, max_length):
        self.labels = []
        self.input_ids = []
        self.attn_masks = []        
        for title, descrp in zip(txt_list['Quest Title'], txt_list['Quest Description']):
            encodings_dict = tokenizer('<|startoftext|>' + 'Title: ' + title + 'Description: ' + descrp + '<|endoftext|>', truncation=True, max_length=max_length, padding="max_length")
            self.input_ids.append(torch.tensor(encodings_dict['input_ids']))
            self.attn_masks.append(torch.tensor(encodings_dict['attention_mask']))
    def __len__(self): return len(self.input_ids)
    def __getitem__(self, idx): return self.input_ids[idx], self.attn_masks[idx]

In [7]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "EleutherAI/gpt-neo-2.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>')

max_length = max([len(tokenizer.encode('Title: ' + title + 'Description: ' + descr)) for title, descr in zip(texts['Quest Description'], texts['Quest Title'])])
max_length2 = max([len(tokenizer.encode('<|startoftext|>' + 'Title: ' + title + 'Description: ' + descr + '<|endoftext|>')) for title, descr in zip(texts['Quest Description'], texts['Quest Title'])])
print(max_length)
print(max_length2)
dataset = TextDataset(texts, tokenizer, max_length=max_length)
train_size = int(0.9 * len(dataset))
train_dataset, val_dataset = random_split(dataset, [train_size, len(dataset) - train_size])
gc.collect()
print(train_size)

# Ensure pad_token_id is set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
    
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     torch_dtype=torch.float16,
#     attn_implementation="flash_attention_2"
# ).cuda()
from peft import LoraConfig, get_peft_model, PeftConfig
from transformers import LlamaTokenizer, LlamaForCausalLM

import torch
from transformers import BitsAndBytesConfig

config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=config
)

from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

from peft import LoraConfig

config = LoraConfig(
    r=16,
    lora_alpha=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# config = LoraConfig(
#     r=16,
#     lora_alpha=32,
#     target_modules=["q_proj", "v_proj"],
#     lora_dropout=0.05,
#     bias="none",
#     task_type="CAUSAL_LM",
# )

# model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, config)

peft_config = PeftConfig.from_pretrained('iproskurina/opt-2.7b-gptq-4bit')

# to initiate with random weights
peft_config.init_lora_weights = False

model.add_adapter(peft_config)
model.enable_adapters()

model.resize_token_embeddings(len(tokenizer))
# model.half()

173
175
6096


ValueError: Can't find 'adapter_config.json' at 'iproskurina/opt-2.7b-gptq-4bit'

In [None]:
# Initialize model with desired settings

# model.half()
# model.bfloat16()
# 
from transformers import EarlyStoppingCallback
torch.cuda.empty_cache()

#training .. 

training_args = TrainingArguments(output_dir='./results',
                                  num_train_epochs=1,
                                  load_best_model_at_end=True,
                                  # max_steps=10, 
                                  # logging_steps=10,
                                  overwrite_output_dir=True,
                                  eval_strategy="epoch",
                                  save_strategy="epoch",
                                  # save_steps=100,
                                  # eval_steps=100,
                                  per_device_train_batch_size=4,
                                  per_device_eval_batch_size=4,
                                  warmup_steps=10,
                                  weight_decay=0.05,
                                  # fp16=True,
                                  logging_dir='./logs',
                                  report_to = 'none')

model.enable_adapters()
trainer = Trainer(model=model,
        args=training_args,
        train_dataset = train_dataset, 
        eval_dataset = val_dataset,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
        data_collator = lambda data: {'input_ids': torch.stack([f[0] for f in data]),
                                      'attention_mask': torch.stack([f[1] for f in data]),
                                      'labels': torch.stack([f[0] for f in data])})

trainer.train()
# model.save_pretrained(output_path)
# tokenizer.save_pretrained(output_path)



# Prepare the input text
# input_text = "<|startoftext|> Title: Sharptalon's Claw Description: "
input_text = "Title: Sharptalon's Claw \nDescription:"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.cuda()
# input_ids = {key: value.half() for key, value in input_ids.items()}

# Ensure the model is in eval mode
model.eval()

# Generate text
try:
    sample_outputs = model.generate(
        input_ids=input_ids,
        pad_token_id=tokenizer.pad_token_id,
        do_sample=True,
        top_k=50,
        max_length=300,
        top_p=0.95,
        temperature=0.7,
        num_return_sequences=20
    )
    # Decode and print generated texts
    generated_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in sample_outputs]
    for i, text in enumerate(generated_texts):
        print(f"Generated text {i+1}:\n{text}\n")

except RuntimeError as e:
    print("RuntimeError during generation:", e)

    # Additional Debugging: Check logits
    with torch.no_grad():
        outputs = model(input_ids=input_ids)
        logits = outputs.logits
        assert not torch.isnan(logits).any(), "logits contain NaNs"
        assert not torch.isinf(logits).any(), "logits contain Infs"
        print("Logits sample:", logits[0, -1, :10])
