# Dowloading Necessary Libraries

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.9/116.9 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m83.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import torch
from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoModelForQuestionAnswering,
    AutoModelForCausalLM,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Defining Q-LoRa Parameters

In [None]:
# Base Model
model_name = "NousResearch/Llama-2-7b-chat-hf"

# Your Fine-tuned model name
new_model2 = "Llama-2-7b-chat-finetune"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

# Data Loading and Transformation

In [None]:
from datasets import load_dataset, DatasetDict

# Load the dataset
dataset = load_dataset("ShenLab/MentalChat16K")

# Split the dataset into train and test
train_test_split = dataset['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']


train_valid_split = train_dataset.train_test_split(test_size=0.1)
train_dataset = train_valid_split["train"]
validation_dataset = train_valid_split["test"]

# Combine into a DatasetDict
split_dataset = DatasetDict({
    "train": train_dataset,
    "validation": validation_dataset,
    "test": test_dataset
})

print(split_dataset)
dataset = split_dataset


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/3.58k [00:00<?, ?B/s]

Interview_Data_6K.csv:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

Synthetic_Data_10K.csv:   0%|          | 0.00/32.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16084 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 11580
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 1287
    })
    test: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 3217
    })
})


In [None]:
#function to transform prompt
def transform_example(example):

    instruction = example['instruction'].strip()
    input_text = example['input'].strip() if example['input'] else ""
    output_text = example['output'].strip()


    if input_text:
        formatted_text = f"<s>[INST] {instruction} {input_text} [/INST] {output_text} </s>" #for cases when ther's no INST
    else:
        formatted_text = f"<s>[INST] {instruction} [/INST] {output_text} </s>"

    return {'text': formatted_text}

# transformer function call
transformed_dataset = DatasetDict({
    split: data.map(transform_example, remove_columns=data.column_names)
    for split, data in dataset.items()
})

print(transformed_dataset['train'][0]['text'])

Map:   0%|          | 0/11580 [00:00<?, ? examples/s]

Map:   0%|          | 0/1287 [00:00<?, ? examples/s]

Map:   0%|          | 0/3217 [00:00<?, ? examples/s]

<s>[INST] You are a helpful mental health counselling assistant, please answer the mental health questions based on the patient's description. 
The assistant gives helpful, comprehensive, and appropriate answers to the user's questions. Can you explain more about exposure therapy and whether it could be beneficial for someone with anxiety like me? I've heard mixed opinions about it, and I'm not sure if it would be a suitable approach for my specific situation. [/INST] Exposure therapy is a type of treatment that can be beneficial for individuals with anxiety. It involves gradually exposing yourself to the things or situations that trigger your anxiety in a controlled and safe environment. The goal is to help you become less fearful and anxious over time.

One way exposure therapy works is by helping you confront your fears instead of avoiding them. By facing your anxieties, you have the opportunity to learn that they are not as threatening as they may seem. This process can lead to a r

In [None]:
transformed_dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 11580
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 1287
    })
    test: Dataset({
        features: ['text'],
        num_rows: 3217
    })
})

# Training Pre-trained model for 1 epoch

In [None]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=transformed_dataset["train"],         # train split
    eval_dataset=transformed_dataset["validation"],     # validation split
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train model
trainer.train()



config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]



Map:   0%|          | 0/11580 [00:00<?, ? examples/s]

Map:   0%|          | 0/1287 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  return fn(*args, **kwargs)


Step,Training Loss
25,1.4402
50,1.346
75,1.0475
100,0.899
125,0.9518
150,0.8549
175,0.9188
200,0.8288
225,0.9022
250,0.8226


Step,Training Loss
25,1.4402
50,1.346
75,1.0475
100,0.899
125,0.9518
150,0.8549
175,0.9188
200,0.8288
225,0.9022
250,0.8226


TrainOutput(global_step=2895, training_loss=0.7776352969681867, metrics={'train_runtime': 26722.0525, 'train_samples_per_second': 0.433, 'train_steps_per_second': 0.108, 'total_flos': 1.4908046459731968e+17, 'train_loss': 0.7776352969681867, 'epoch': 1.0})

# Saving Model to Goggle Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
output_dir = "/content/drive/MyDrive/llama-2-7b-chat-finetuned"


In [None]:
!pip install peft




In [None]:
from peft import PeftModel

# PeftModel
peft_model = PeftModel(model, peft_config)

# output directory path
adapter_output_dir = "/content/drive/MyDrive/llama-2-7b-chat-adapter"

# Save the LoRA adapters
peft_model.save_pretrained(adapter_output_dir)
tokenizer.save_pretrained(adapter_output_dir)  # Save tokenizer config as usual


('/content/drive/MyDrive/llama-2-7b-chat-adapter/tokenizer_config.json',
 '/content/drive/MyDrive/llama-2-7b-chat-adapter/special_tokens_map.json',
 '/content/drive/MyDrive/llama-2-7b-chat-adapter/tokenizer.model',
 '/content/drive/MyDrive/llama-2-7b-chat-adapter/added_tokens.json',
 '/content/drive/MyDrive/llama-2-7b-chat-adapter/tokenizer.json')

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig

# Load the base model
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the adapter
peft_config = PeftConfig.from_pretrained(adapter_output_dir)
model = PeftModel.from_pretrained(model, adapter_output_dir)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  adapters_weights = torch.load(


In [None]:
import os

adapter_output_dir = "/content/drive/MyDrive/llama-2-7b-chat-adapter"
print("Saved files:", os.listdir(adapter_output_dir))


Saved files: ['README.md', 'adapter_model.bin', 'adapter_config.json', 'tokenizer_config.json', 'special_tokens_map.json', 'added_tokens.json', 'tokenizer.model', 'tokenizer.json']


# Hugging Face Logins & saving Trained model to Hugging Face

In [None]:
!pip install huggingface_hub
from huggingface_hub import HfApi
from huggingface_hub import notebook_login

notebook_login()




VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
repo_id= "DiatWork/llama2-Chat-Mental-Health"
model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)

adapter_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/DiatWork/llama2-Chat-Mental-Health/commit/5893f9cc4fcdd7e863340cd9a3b698333fabcf88', commit_message='Upload tokenizer', commit_description='', oid='5893f9cc4fcdd7e863340cd9a3b698333fabcf88', pr_url=None, pr_revision=None, pr_num=None)

# Load Model from Hugging Face

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model_name = "NousResearch/Llama-2-7b-chat-hf"  # Base model
repo_id = "DiatWork/llama2-Chat-Mental-Health"  # Repository with LoRA adapter

# Load the base model and LoRA adapter from Hugging Face
base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto", offload_folder="offload")
model = PeftModel.from_pretrained(base_model, repo_id)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)





config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/453 [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

  adapters_weights = torch.load(


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

# Inference

In [None]:
import torch
import textwrap

# Input with Prompt
input_text = "<s>[INST] You are a helpful mental health assistant. I'm very stressed because I have a Job Interview, though I have practiced for it but this is my first interview so I'm anxious. How to deal with this stress? [/INST] "

inputs = tokenizer(input_text, return_tensors="pt").to(model.device). # Tokenize the input

# Generate response
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=400)

decoded_output = tokenizer.decode(output[0], skip_special_tokens=True).strip() # Tokenize the output

# Presentation
if "[INST]" in decoded_output and "[/INST]" in decoded_output:
    question, model_answer = decoded_output.split("[/INST]", 1)
else:
    question, model_answer = decoded_output, ""  # Handle cases where split fails


question = "\n".join(textwrap.wrap(question, width=80))
model_answer = "\n".join(textwrap.wrap(model_answer.strip(), width=80))

print("Question:\n", question)
print("\nModel Answer:\n", model_answer)
print("\n" + "=" * 80 + "\n")




Question:
 [INST] You are a helpful mental health assistant. I'm very stressed because I
have a Job Interview, though I have practiced for it but this is my first
interview so I'm anxious. How to deal with this stress?

Model Answer:
 Hello there! 😊 I understand how you're feeling, and it's completely normal to
feel anxious before a job interview. Here are some tips that may help you manage
your stress and perform your best during the interview:  1. Practice relaxation
techniques: Deep breathing, progressive muscle relaxation, and visualization can
help calm your nerves and reduce stress. You can try these techniques before the
interview to help you relax and focus. 2. Get enough sleep: Lack of sleep can
exacerbate stress and anxiety. Make sure you get enough sleep the night before
the interview to feel rested and refreshed. 3. Stay hydrated: Drink plenty of
water throughout the day to stay hydrated and alert. Avoid caffeine and alcohol
as they can increase anxiety. 4. Exercise: Exerci

In [None]:
# Input with Prompt
input_text = "<s>[INST] You are a helpful mental health assistant. I'm very stressed because I have a group project in last semester of my college and I want to get first honours, I'm scared. Please help me cope with stress. [/INST]"

inputs = tokenizer(input_text, return_tensors="pt").to(model.device). # Tokenize the input

# Generate response
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=200)

decoded_output = tokenizer.decode(output[0], skip_special_tokens=True).strip() # Tokenize the output

# Presentation
if "[INST]" in decoded_output and "[/INST]" in decoded_output:
    question, model_answer = decoded_output.split("[/INST]", 1)
else:
    question, model_answer = decoded_output, ""  # Handle cases where split fails


question = "\n".join(textwrap.wrap(question, width=80))
model_answer = "\n".join(textwrap.wrap(model_answer.strip(), width=80))

print("Question:\n", question)
print("\nModel Answer:\n", model_answer)
print("\n" + "=" * 80 + "\n")



Question:
 [INST] You are a helpful mental health assistant. I'm very stressed because I
have a group project in last semester of my college and I want to get first
honours, I'm scared. Please help me cope with stress.

Model Answer:
 Of course, I'm here to help! It's completely normal to feel stressed during the
final stretch of a semester, especially when it comes to a significant project
like the one you're working on. Here are some strategies that may help you cope
with stress and achieve your goal of getting first honors:  1. Break down the
project into smaller tasks: Sometimes, feeling overwhelmed by a big project can
be a major source of stress. To combat this, try breaking down the project into
smaller, more manageable tasks. This will help you focus on one task at a time
and make progress gradually. 2. Create a schedule: Once you have broken down the
project into smaller tasks, create a schedule that outlines when you will work
on each task. Be realistic about how long each ta

# Evaluation

In [None]:
!pip install evaluate bert_score transformers


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score, evaluate
Successfully installed bert_score-0.3.13 evaluate-0.4.3


In [None]:
dataset['test']

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 3217
})

# Testing on 50% Test Data

In [None]:
import textwrap
import torch
import evaluate

# 50% test data evaluation
test_data = dataset['test']
sample_size = int(0.5* len(test_data))

prompts = []
references = []

# Build prompts and references
    sample = test_data[i]
    instruction = sample['instruction']
    input_text = sample['input']
    output_text = sample['output']

    if input_text:
        prompt = f"<s>[INST] {instruction} {input_text} [/INST]"
    else:
        prompt = f"<s>[INST] {instruction} [/INST]"

    prompts.append(prompt)
    references.append(output_text)  #reference answer





In [None]:
# Generate responses for each prompt and format them
generated_texts = []
for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            top_p=0.9,
            top_k=50,
            temperature=0.7,
            use_cache=False,
            pad_token_id=tokenizer.eos_token_id
        )

    # Decode the response
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

    # Presentation
    try:
        question, model_answer = decoded_output.split("[/INST]", 1)
    except ValueError:
        question, model_answer = decoded_output, ""


    question = "\n".join(textwrap.wrap(question, width=80))
    model_answer = "\n".join(textwrap.wrap(model_answer, width=80))

    print("Question:\n", question)
    print("\nModel Answer:\n", model_answer)
    print("\n" + "="*80 + "\n")

    # Append decoded output to generated_texts for evaluation
    generated_texts.append(decoded_output)




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
overwhelming emotions, I have been practicing deep breathing exercises,
grounding techniques, and journaling. While they occasionally provide temporary
relief, underlying trauma-related issues persist.  In our counseling sessions, I
would like to learn more about different therapeutic approaches to trauma
healing. How can we work together to process and reframe my experiences? Are
there specific strategies you recommend for managing panic attacks and reducing
hypervigilance?

Model Answer:
   Thank you for sharing your experiences and goals with me. It takes a lot of
courage to seek help and work towards healing from traumatic events. I'm here to
support you every step of the way.  Firstly, it's important to understand that
healing from trauma is a complex and ongoing process. It's not something that
can be fixed overnight, but with the right tools and support, you can learn to
manage your symptoms and reg


Question:
 [I

KeyboardInterrupt: 

In [None]:

!pip install rouge_score
# Initialize evaluation metrics
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

# Calculate Scores
bleu_score = bleu.compute(predictions=generated_texts, references=[[ref] for ref in references])
rouge_score = rouge.compute(predictions=generated_texts, references=references)
bert_score = bertscore.compute(predictions=generated_texts, references=references, lang="en")

# Print Results
print("BLEU Score:", bleu_score)
print("ROUGE Score:", rouge_score)
print("BERTScore (Precision, Recall, F1):", bert_score["precision"], bert_score["recall"], bert_score["f1"])


from huggingface_hub import Repository

repo = Repository(local_dir="./evaluation_results", clone_from="DiatWork/llama2-Chat-Mental-Health")

# Save metrics to JSON file
evaluation_metrics = {
    "ROUGE": rouge_score,
    "BERTScore": bert_score,
    "BLEU": bleu_score,
}

import json

output_file = "evaluation_metrics.json"

# Save the metrics to the file
with open(output_file, "w") as json_file:
    json.dump(evaluation_metrics, json_file, indent=4)



tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/DiatWork/llama2-Chat-Mental-Health into local empty directory.


BLEU Score: {'bleu': 0.0668386227741915, 'precisions': [0.4917284839463828, 0.14870607353239895, 0.06641476173765434, 0.0383716831569542], 'brevity_penalty': 0.5720646438333925, 'length_ratio': 0.6416412545579591, 'translation_length': 247234, 'reference_length': 385315}
ROUGE Score: {'rouge1': 0.37550647846754004, 'rouge2': 0.11139243663537664, 'rougeL': 0.17264640439026438, 'rougeLsum': 0.2975981866428563}
BERTScore: {'precision': [0.8524824380874634, 0.8520378470420837, 0.851635217666626, 0.8400753736495972, 0.8597930669784546, 0.8389649391174316, 0.8346267342567444, 0.8541004657745361, 0.8425173163414001, 0.8515846729278564, 0.8299710750579834, 0.8455315828323364, 0.8561450839042664, 0.8353731632232666, 0.8438079357147217, 0.8452134132385254, 0.8566950559616089, 0.8620609045028687, 0.8577095866203308, 0.8491437435150146, 0.872461199760437, 0.8468649387359619, 0.8443640470504761, 0.871571958065033, 0.8475203514099121, 0.8588418960571289, 0.8314037919044495, 0.8331429958343506, 0.860

Download file adapter_model.bin:   0%|          | 8.00k/128M [00:00<?, ?B/s]

Download file tokenizer.model:   2%|1         | 8.00k/488k [00:00<?, ?B/s]

Clean file tokenizer.model:   0%|          | 1.00k/488k [00:00<?, ?B/s]

Clean file adapter_model.bin:   0%|          | 1.00k/128M [00:00<?, ?B/s]

# Evaluation Results: BERT Avg.

In [None]:
import json
import numpy as np

file_path = '/content/evaluation_metrics.json'

with open(file_path, 'r') as file:
    gemma_data = json.load(file)

# Calculate Avg. BERTScore
bert_score = gemma_data["BERTScore"]

precision_avg = np.mean(bert_score["precision"])
recall_avg = np.mean(bert_score["recall"])
f1_avg = np.mean(bert_score["f1"])

precision_avg, recall_avg, f1_avg


(0.8533380799187945, 0.8477766775557036, 0.8504841806043998)