In [None]:
!pip install wandb

In [None]:
import os
from transformers import AutoTokenizer,AutoModelForCausalLM

In [None]:
os.environ["HF_TOKEN"]=os.getenv("hf_api_key")
os.environ["WANDB_DISABLED"] = "true"

In [None]:
hf_model="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer=AutoTokenizer.from_pretrained(hf_model)
model=AutoModelForCausalLM.from_pretrained(hf_model)

In [None]:
from peft import LoraConfig,get_peft_model,TaskType

In [None]:
configuration=LoraConfig(task_type=TaskType.CAUSAL_LM,
                         r=4,
                         lora_alpha=8,
                         lora_dropout=0.1,
                         bias="none",
                         target_modules=["q_proj", "k_proj", "v_proj", "o_proj"])


lora_model=get_peft_model(model=model,peft_config=configuration)

In [None]:
lora_model.print_trainable_parameters()

In [None]:
with open("Q&A outputs.txt","r") as fi:
  lk=[]
  for l in fi:
    lk.append(l)

In [None]:
# lk

In [None]:
import re

def load_qa_pairs(file_name):
  pairs=[]

  with open(file_name,"r") as f:
    lines=[]

    for l in f:
      if 'here' in l.strip().lower():
        continue
      if l.strip() and l.strip()!='\n':
        l=l.strip().replace('\n','')
        lines.append(l)

    for i in range(0,len(lines),2):
      if i+1 < len(lines):
        q=re.sub(r'^Q\d+:\s*', '', lines[i]).strip()
        a=re.sub(r'^A\d+:\s*', '', lines[i+1]).strip()
        pairs.append({"question":q,"answer":a})

  return pairs

In [None]:
q_a_data=load_qa_pairs("Q&A outputs.txt")

In [None]:
# q_a_data

In [None]:
!pip install datasets

In [None]:
from datasets import Dataset
q_a_data=Dataset.from_list(q_a_data)
q_a_data

In [None]:
def tokenize_function(example):
    tokenized = tokenizer(
        example['question'],
        padding="max_length",
        max_length=512,
        truncation=True
    )

    labels_generated = tokenizer(
        example['answer'],
        padding="max_length",
        max_length=512,
        truncation=True
    )

    labels = labels_generated['input_ids']
    labels = [label if label != tokenizer.pad_token_id else -100 for label in labels]

    tokenized['labels'] = labels

    return tokenized


In [None]:
tokenized_q_a_data=q_a_data.map(tokenize_function,remove_columns=['question','answer'])

In [None]:
tokenized_q_a_data

In [None]:
tokenizer.pad_token_id

In [None]:
print(tokenized_q_a_data[0])

In [None]:
import torch

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
from transformers import Trainer,TrainingArguments

In [None]:
training_arguments=TrainingArguments(output_dir=r"./llama3-lora-qa",
                                     per_device_train_batch_size=1,
                                     gradient_accumulation_steps=4,
                                     learning_rate=0.001,
                                     num_train_epochs=3,
                                     logging_steps=10,
                                     save_strategy="epoch",
                                     fp16=False,
                                     label_names=["labels"])

# training_arguments

In [None]:
trainer=Trainer(model=lora_model,
        args=training_arguments,
        train_dataset=tokenized_q_a_data,
        # tokenizer=tokenizer,
        data_collator=data_collator
        )

In [None]:
trainer.train()

In [None]:
lora_model.print_trainable_parameters()

In [None]:
print(type(lora_model))

In [None]:
lora_model.save_pretrained("./llama3-lora-qa")
tokenizer.save_pretrained("./llama3-lora-qa")

In [None]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(hf_model)
model = PeftModel.from_pretrained(base_model, "./llama3-lora-qa")

tokenizer = AutoTokenizer.from_pretrained("./llama3-lora-qa")
model.eval()


In [None]:
question = "what leads to hear attacks?"

inputs = tokenizer(question, return_tensors="pt").to("cpu")

with torch.no_grad():
    output = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=100,
        temperature=0.7,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )

response = tokenizer.decode(output[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)


In [None]:
response

In [None]:
import shutil
import zipfile

shutil.make_archive("llama3-lora-qa", 'zip', "llama3-lora-qa")