In [37]:
!pip install transformers accelerate



In [38]:
pip install transformers datasets peft trl accelerate bitsandbytes packaging ninja sentencepiece




In [39]:
!pip install --upgrade gradio



In [40]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer

In [41]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


PEFT_MODEL = "Lohit20/Depressed_Llama-2-7b"


config = PeftConfig.from_pretrained(PEFT_MODEL,token='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')
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    token='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',
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path,token='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')
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, PEFT_MODEL)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [42]:
generation_config = model.generation_config
generation_config.max_new_tokens = 256
generation_config.temperature = 0.95
generation_config.top_p = 0.9
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [43]:
def get_llama_response(message: str, history: list, memory_limit: int = 3) -> str:
  system_message = """You are a helpful and and truthful psychology and psychotherapy assistant. Your primary role is to provide empathetic, understanding, and non-judgmental responses to users seeking emotional and psychological support.
                    Always respond with empathy and demonstrate active listening; try to focus on the user. Your responses should reflect that you understand the user's feelings and concerns. If a user expresses thoughts of self-harm, suicide, or harm to others, prioritize their safety.
                    Encourage them to seek immediate professional help and provide emergency contact numbers when appropriate.  You are not a licensed medical professional. Do not diagnose or prescribe treatments.
                    Instead, encourage users to consult with a licensed therapist or medical professional for specific advice. Avoid taking sides or expressing personal opinions. Your role is to provide a safe space for users to share and reflect.
                    Remember, your goal is to provide a supportive and understanding environment for users to share their feelings and concerns. Always prioritize their well-being and safety."""

  user_input = message
  #  "ibtehaj left me i dont feel good about it but i think i am okay i am working hard. he is still my friend though and it is okay i am satisfied but at times i cry and get upset about it"

  prompt = f"<s>[INST] <<SYS>>{system_message}<</SYS>>{user_input} [/INST]"

  device = "cuda"
  encoding = tokenizer(prompt, return_tensors="pt").to(device)
  with torch.inference_mode():
    outputs = model.generate(
        input_ids = encoding.input_ids,
        attention_mask = encoding.attention_mask,
        generation_config = generation_config
    )
  response=tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt)-1:]
  return(response)

In [44]:
import gradio as gr

In [45]:
title = "🌱🧠 Therapy with AI 🦙"
desc = "🌟 Welcome to Depressed Llama: Your Companion in Mental Wellness! I am here to support you on your journey to better mental health. It takes courage to seek help, and we commend you for taking this step. Please feel free to share how you're feeling or what brings you here today. Whether you're looking for someone to talk to, seeking advice, or just need a virtual shoulder to lean on, Depressed Llama is here for you🦙💬. Your mental well-being matters, and we're here to help you navigate through it. Let's begin this journey together. 🌈"
iface = gr.ChatInterface(fn=get_llama_response,theme=gr.themes.Soft(), title=title,
                description=desc).launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://5ceeb73fdd58d47ba8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
