In [4]:
from datasets import Dataset, DatasetDict
from huggingface_hub import notebook_login

import pandas as pd
from peft import AutoPeftModelForCausalLM, get_peft_model, LoraConfig
from random import randrange
from sklearn.model_selection import train_test_split


from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
import torch
from trl import SFTTrainer
import wandb

In [None]:
df=pd.read_csv('final_master_cleaned.csv')
df.head(3)

In [9]:
#Data split 70% train-20% test-10% valid
train, test = train_test_split(df, test_size=0.2,random_state=42)
train, valid = train_test_split(train, test_size=0.125,random_state=42)

In [10]:
raw_train = Dataset.from_pandas(train)
raw_test = Dataset.from_pandas(test)

In [11]:
raw_train=raw_train.remove_columns('__index_level_0__')
raw_test = raw_test.remove_columns('__index_level_0__')

In [12]:
ds_dict = {'train' : raw_train
    ,'test' : raw_test}

dataset = DatasetDict(ds_dict)
dataset

In [None]:
notebook_login()

In [8]:
#To instruct tune our model, we need to convert our structured examples into a collection of tasks described via instructions.
def format_instruction(sample):
	return f"""### Instruction:
Use the Input below to create a question, which could be asked in exam. 

### Input:
{sample['context']}

### Response:
{sample['question']}
"""

In [None]:
#example of formatted prompt
print(format_instruction(dataset['train'][randrange(len(dataset))]))

In [None]:
model_id = "meta-llama/Llama-2-7b-hf"

model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=False, device_map="auto",torch_dtype=torch.float16)
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [15]:
# LoRA config
peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM", 
)

model = get_peft_model(model, peft_config)

In [16]:
args = TrainingArguments(
    output_dir="llama-7-master",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_checkpointing=True,
    optim="adamw_hf",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=False,
    fp16=True,
    tf32=False,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False,  # disable tqdm since with packing values are in correct
)



In [17]:
max_seq_length = 512 # max sequence length for model and packing of the dataset

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=format_instruction, 
    args=args,
)

Start training our model by calling the `train()` method on our `Trainer` instance.

In [None]:
# train
trainer.train() # there will not be a progress bar since tqdm is disabled

In [19]:
# save model
trainer.save_model()

In [None]:
#merge adapter weights into the base model.

model_id = "llama-7-master"
model = AutoPeftModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
)

# Merge LoRA and base model
merged_model = model.merge_and_unload()


merged_model.save_pretrained("merged_model",safe_serialization=True)
tokenizer.save_pretrained("merged_model")

In [None]:
# push merged model to the hub
merged_model.push_to_hub("prnv13/llama-7-master")
tokenizer.push_to_hub("prnv13/llama-7-master")

Evaluation

In [None]:
tokenizer = AutoTokenizer.from_pretrained("prnv13/llama-7-master")
model = AutoModelForCausalLM.from_pretrained("prnv13/llama-7-master", device_map="auto",torch_dtype=torch.float16)

In [None]:
model.to('cuda:0')
model.eval()

In [None]:
test_data = pd.read_csv('test-master.csv')

In [None]:
def prompt_gen(sample):
    prompt = f"""### Instruction:
    Use the Input below to create a question, which could be asked in an exam.

    ### Input:
    {sample['context']}

    ### Response:
    """

    return prompt


In [None]:
prompts = []
for index, row in test_data.iterrows():
    prompts.append(prompt_gen(row))

In [None]:
def stream_inference(model, tokenizer, text_stream):
    generated_texts = []

    for text in text_stream:
        input_ids = tokenizer(text, return_tensors="pt", max_length=512, padding=True, truncation=True).input_ids.cuda()

        # Perform inference on the input
        with torch.no_grad():
            output = model.generate(input_ids=input_ids, max_new_tokens=50, do_sample=True, top_p=0.9,temperature=0.9)

        generated_text = tokenizer.batch_decode(output.detach(), skip_special_tokens=True)[0][len(text):]
        generated_texts.append(generated_text)

        torch.cuda.empty_cache()


    return generated_texts

In [None]:
predicted = stream_inference(model, tokenizer, prompts)
print("Streaming Inference Results:")
print(predicted)

In [None]:
model_results = []
for i, sentence in enumerate(predicted, start=1):
    model_results.append((i, [sentence]))

In [None]:
references = []
for i, sentence in enumerate(test_data["Question 1"], start=1):
    references.append((i, sentence))

In [None]:
from src.evaluation.eval_main import Metrics


metrics = Metrics(save_to_file=True)

result = pd.DataFrame(
    metrics.evaluate(model_output=model_results, references=references),
    index=["llama"]
)
result

In [None]:
result.to_csv('result.csv')