# Result test

In [None]:
!HF_ENDPOINT=https://hf-mirror.com 

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import PeftModel, PeftConfig
import torch

In [3]:
# Loading PEFT model

config = PeftConfig.from_pretrained(PEFT_MODEL_LIST[0])

bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [6]:
PEFT_MODEL_LIST = ["adapter_nbertagnolli/checkpoint-660",
                   "adapter_mentalLLama_Final/checkpoint-960",
                   "adapter_mentalLLama_dreaddit/checkpoint-1120",
                   "adapter_mentalLLama_DR/checkpoint-320"]
model = peft_base_model
for i in range(len(PEFT_MODEL_LIST)):
    model = PeftModel.from_pretrained(model, PEFT_MODEL_LIST[i])
    model = model.merge_and_unload()

In [None]:
from transformers import GenerationConfig
# Function to generate responses from both original model and PEFT model and compare their answers.
def generate_answer(post, question):
  system_prompt = f"<s>[INST]You will get a Post and a Question And you have to answer the question based on the post, If you think the post is meaningless, just say \"No\". The anwser should following this format: Yes/No, Reasoning: (Your Reasoning)\n"
  post_prompt = f"<s>[INST]Consider this post: \"{post}\" "
  question_prompt = f"Question: {question}[/INST]"

  final_prompt = post_prompt + question_prompt

  device = "cuda:0"
  dashline = "-".join("" for i in range(50))

  encoding = tokenizer(final_prompt, return_tensors="pt", padding=True).to(device)
  outputs = model.generate(input_ids=encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=256, pad_token_id = tokenizer.eos_token_id, \
                                                                                                                do_sample = True, eos_token_id = tokenizer.eos_token_id, attention_mask = encoding.attention_mask, \
                                                                                                                   temperature=0.4, top_p=0.6, repetition_penalty=1.3, num_return_sequences=1,))
  text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

  print(dashline)
  print(f'MODEL RESPONSE:\n{text_output}')
  print(dashline)

In [None]:
query = "How to avoid a relapse? I\'ve been having a particularly rough year; I attempted suicide, the love of my life left me, I failed my year at university, I\'ve been physically assaulted, I\'ve been sexually assaulted and today my grandfather died. I feel like I\'m running on empty and doing the bare minimum to survive for myself while trying to be a rock to others. My emotional resilience has been wiped out. I feel \"okay\" but I\'ve felt like this in the past and it has turned out that I\'ve just been lying to myself and making things worse. Any advice on how to manage/process my emotions? Or just how to better understand myself? "
question = "Does the poster suffers from depression?"
generate_answer(query, question)

In [None]:
query = "How to avoid a relapse? I\'ve been having a particularly rough year; I attempted suicide, the love of my life left me, I failed my year at university, I\'ve been physically assaulted, I\'ve been sexually assaulted and today my grandfather died. I feel like I\'m running on empty and doing the bare minimum to survive for myself while trying to be a rock to others. My emotional resilience has been wiped out. I feel \"okay\" but I\'ve felt like this in the past and it has turned out that I\'ve just been lying to myself and making things worse. Any advice on how to manage/process my emotions? Or just how to better understand myself? "
question = "Does the poster suffers from depression?"
generate_answer(query, question)

In [None]:
query = "Ancestry.com - is it safe? Hello!: ) I'm a new user so if this post ends up in a weird place/thread, pls bear w me. Has anyone used Ancestry.com lately? They offer a two week trial but still require your credit card information. Is this suspicious? It doesn't cost anything for the first two weeks, and then you can end the trial so you don't have to pay anything even then, but I just feel a little uncomfortable dialing my credit card information on there online.... Am I being too doubtfull? Share your experiences? Thank you! If my English isn't perfect, that's because I'm not a native speaker."
question = "Does the poster suffers from depression?"
generate_answer(query, question)

In [None]:
query = "Dini ticaret haline getirenler" 
question = "Does the poster suffers from depression?"
generate_answer(query, question)

# Result Generation

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
# The evaluation Data provide by MentalLLAMA is in MentalLLaMA_dataset/test_data/test_complete/SAD.csv
# Generate the response data using MentalLLaMA_dataset/src/IMHI.py
!python MentalLLaMA_dataset/src/IMHI.py --model_path "mistralai/Mixtral-8x7B-Instruct-v0.1"\
    --adapter_path mistral7b_mentalLLama_Final/checkpoint-960\
    --batch_size 8 \
    --model_output_path SAD \
    --test_dataset IMHI-completion \
    --test_data_path MentalLLaMA_dataset/test_data/test_complete/SAD.csv \
    --cuda

# Correctness Evaluation

In [None]:
# Using the model trained by MentalLLaMA to test the accuracy of the generated response
!python MentalLLaMA_dataset/src/label_inference.py --model_path Tianlin668 \
                               --data_path MentalLLaMA_dataset/model_output/Irf \
                               --data_output_path MentalLLaMA_dataset/model_output/result/ \
                               --cuda --calculate

# BartScore: Explanation Quality Evaluation

In [None]:
# Using the bart_score to calculate the quality of the generated response
!python MentalLLaMA_dataset/src/score.py --gen_dir_name SAD \
    --score_method bart_score \
    --cuda