In [None]:
!pip install bitsandbytes
!pip install datasets
!pip install peft

In [None]:
from huggingface_hub import login
login(token="HF TOKEN")

In [None]:
import pandas as pd
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset, Dataset
from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

PEFT_MODEL = "/kaggle/input/llm-project-fine-tuned-4000/pytorch/default/1/trained-model"

config = PeftConfig.from_pretrained(PEFT_MODEL)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

generation_config = model.generation_config
generation_config.max_new_tokens = 150
generation_config.temperature = 0.5
generation_config.top_p = 0.8
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

import numpy as np


In [None]:
import pandas as pd

df = pd.read_csv("/kaggle/input/llm-project-amod-english/llm_project_Amod_English.csv")
df=df.head(3)

In [None]:
from transformers import pipeline
my_answers=[]

def post_process(predicted_text):
    index = predicted_text.find("[/INST]")
    if(index==-1):
        return predicted_text
    
    predicted_text=predicted_text[index:].strip()
    predicted_text=predicted_text[len("[/INST]"):].strip()
    return predicted_text

for index, row in df.iterrows():
    print("Currently on sample",index)
    ground_truth_input=row['Context']
    ground_truth_output=row['Response']

    ground_truth=ground_truth_output

    prompt=f"<s> [INST] <<SYS>> You are a helpful assistant, who always provides empathetic responses to the user's query and helps them solve their mental problems. <</SYS>> {ground_truth_input} [/INST]".strip()

    device = "cuda"
    encoding = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.inference_mode():
        outputs = model.generate(
            input_ids = encoding.input_ids,
            attention_mask = encoding.attention_mask,
            generation_config = generation_config
        )

    predicted_text=(tokenizer.decode(outputs[0], skip_special_tokens=True))
    predicted_text=post_process(predicted_text)
#     print(ground_truth)
#     print()
#     print(predicted_text)
#     print("============================================")
    my_answers.append({"ground_truth":ground_truth, "prediction":predicted_text})



In [None]:
# print(my_answers[0]['ground_truth'])
# print('=======================================')
# print(my_answers[0]['prediction'])

df = pd.DataFrame(my_answers)

# Save the DataFrame to a CSV file
df.to_csv("LLAMA2_7b_5epoch_finetuned_predictions.csv", index=False)