## Install required libraries

In [1]:
!pip install trl==0.6.0 transformers==4.32.0 accelerate==0.12.0 peft==0.5.0 -Uqqq
!pip install datasets==2.13.1 bitsandbytes==0.41.1 einops==0.7.0 wandb==0.15.8 -Uqqq

!pip install -q  sentencepiece accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 tensorboard
import os, torch, logging
import pandas as pd
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM,BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

## Importing libraries

In [2]:
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer
import warnings
warnings.filterwarnings("ignore")


## Model Training

In [3]:
model_name = "ybelkada/falcon-7b-sharded-bf16" # sharded falcon-7b model

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,            # load model in 4-bit precision
    bnb_4bit_quant_type="nf4",    # pre-trained model should be quantized in 4-bit NF format
    bnb_4bit_use_double_quant=True, # Using double quantization as mentioned in QLoRA paper
    bnb_4bit_compute_dtype=torch.bfloat16, # During computation, pre-trained model should be loaded in BF16 format
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config, # Use bitsandbytes config
    device_map="auto",  # Specifying device_map="auto" so that HF Accelerate will determine which GPU to put each layer of the model on
    trust_remote_code=True, # Set trust_remote_code=True to use falcon-7b model with custom code
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Set trust_remote_code=True
tokenizer.pad_token = tokenizer.eos_token # Setting pad_token same as eos_token

## Inference Pipeline

In [5]:
# Loading original model
model_name = "ybelkada/falcon-7b-sharded-bf16"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [6]:
# Loading PEFT model
PEFT_MODEL = "heliosbrahma/falcon-7b-sharded-bf16-finetuned-mental-health-conversational"

config = PeftConfig.from_pretrained(PEFT_MODEL)
peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

peft_model = PeftModel.from_pretrained(peft_base_model, PEFT_MODEL)

peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [7]:
import torch

def generate_peft_response(query, peft_model, peft_tokenizer):
    system_prompt = """Answer the following question truthfully.
    If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
    If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""

    user_prompt = f"""<HUMAN>: {query}
    <ASSISTANT>: """

    final_prompt = system_prompt + "\n" + user_prompt

    device = "cuda:0" if torch.cuda.is_available() else "cpu"

    peft_encoding = peft_tokenizer(final_prompt, return_tensors="pt").to(device)
    peft_outputs = peft_model.generate(
        input_ids=peft_encoding.input_ids,
        max_length=256,
        temperature=0.4,
        top_p=0.6,
        repetition_penalty=1.3,
        num_return_sequences=1,
        pad_token_id=peft_tokenizer.eos_token_id,
        eos_token_id=peft_tokenizer.eos_token_id,
        attention_mask=peft_encoding.attention_mask,
    )
    peft_text_output = peft_tokenizer.decode(peft_outputs[0], skip_special_tokens=True)

    return peft_text_output


In [8]:
# Now, you can use this function to get the PEFT model's response
query = "I feel like traveling up country, what should I do?"
peft_response = generate_peft_response(query, peft_model, peft_tokenizer)
print(peft_response)


Answer the following question truthfully.
    If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
    If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.
<HUMAN>: I feel like traveling up country, what should I do?
    <ASSISTANT>:  It's great that you feel like exploring new places and experiencing different cultures. Taking trips can be beneficial for your mental health, as it allows you to explore new environments, experiences, and perspectives. 

If you're feeling overwhelmed or unsure how to approach planning such an adventure, reach out to a travel therapist. They can help you navigate through the decision-making process and provide personalized recommendations based on your specific needs and desires. 

Additionally, consider reaching out to a psychiatric counselor who can assist you in dealing with any emotional challenges that may arise during the preparation phase. Remember, seeking professional 