<a href="https://colab.research.google.com/github/RiazMahmood6/AI-Therapist/blob/main/Finetuned_AI_Therapist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Dataset**

In [None]:
import pandas as pd
import requests
import json

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load the dataset
train_df = pd.read_csv('/content/drive/My Drive/BD Calling/dataset/finetune_therapist_combined_train.csv')
valid_df = pd.read_csv('/content/drive/My Drive/BD Calling/dataset/finetune_therapist_combined_valid.csv')

In [None]:
print(train_df.head())

                                              prompt             completion
0  do you think it is well I hope that through ou...  Questions on Emotions
1  it it seems like I've used to this question wh...  Questions on Emotions
2  I love that analogy good thinking good thinkin...  Questions on Emotions
3  okay so we've talked about where you are in te...  Questions on Emotions
4  Hello, how are you feeling today? What brings ...  Questions on Emotions


In [None]:
print(valid_df.head())

                                              prompt             completion
0       What is the feeling? Are you feeling it now?  Questions on Emotions
1  these are great ideas Nathan you've told me qu...  Questions on Emotions
2  What was it like for you to hear me say that t...  Questions on Emotions
3  well marijuana is a drug that affects learning...        Psychoeducation
4  okay well I will show you what I normally go t...        Psychoeducation


In [None]:
# Load the therapist prompts
prompts_df = pd.read_csv('/content/drive/My Drive/BD Calling/dataset//therapist_prompts.csv')

In [None]:
print(prompts_df.head())

                        intent  \
0         Makes Needs Explicit   
1      Makes Emotions Explicit   
2        Makes Values Explicit   
3  Makes Consequences Explicit   
4      Makes Conflict Explicit   

                                          definition  \
0  Identifies an implied or background need for t...   
1  Identifies an implied or background emotion fo...   
2  Identifies an implied or background value or s...   
3  Identifies consequences the client experience ...   
4  Identifies an implied or background emotional ...   

                                  positive example 1  \
0  So, having a dependable and efficient means of...   
1  So, you are relieved and excited about finishi...   
2  Being financially secure is a top priority for...   
3  Whenever you overspend on luxury items, you st...   
4  You're striving to improve your health, but yo...   

                                  positive example 2  \
0  It sounds like you've realized that maintainin...   
1  So it 

# **Model**

In [None]:
import pandas as pd
import requests
import json
import os
import time
from tqdm import tqdm

In [None]:
# Optional import for OpenAI - will only be used when you have credits
try:
    from openai import OpenAI
    OPENAI_AVAILABLE = True
except ImportError:
    print("OpenAI package not installed. Only DeepSeek R1 will be available.")
    OPENAI_AVAILABLE = False

# Load the datasets
def load_datasets(base_path="/content/drive/My Drive/BD Calling/dataset/"):
    """Load all necessary datasets"""
    train_df = pd.read_csv(f'{base_path}finetune_therapist_combined_train.csv')
    valid_df = pd.read_csv(f'{base_path}finetune_therapist_combined_valid.csv')
    prompts_df = pd.read_csv(f'{base_path}therapist_prompts.csv')

    print(f"Training examples: {len(train_df)}")
    print(f"Validation examples: {len(valid_df)}")
    print(f"Therapy prompt types: {len(prompts_df)}")

    return train_df, valid_df, prompts_df

# Create a therapist system prompt from the prompts dataset
def create_therapist_system_prompt(prompts_df):
    """
    Create a comprehensive system prompt from the therapist prompts
    """
    system_prompt = "You are an empathetic and skilled AI therapist. Your responses should follow these guidelines:\n\n"

    # Extract intents and examples
    for _, row in prompts_df.iterrows():
        intent = row['intent']
        definition = row['definition']
        pos_examples = [row['positive example 1'], row['positive example 2'], row['positive example 3']]

        system_prompt += f"## {intent}\n"
        system_prompt += f"{definition}\n\n"
        system_prompt += "Positive examples:\n"
        for example in pos_examples:
            if isinstance(example, str) and len(example.strip()) > 0:
                system_prompt += f"- {example}\n"
        system_prompt += "\n"

    system_prompt += """
Additionally:
- Practice active listening and reflection
- Validate the client's feelings without judgment
- Ask open-ended questions to encourage exploration
- Maintain professional boundaries
- Focus on the client's needs and goals
- Provide a safe and supportive environment
- Respect confidentiality and privacy
"""

    return system_prompt

# DeepSeek R1 API Implementation
def therapist_deepseek(prompt, api_key, system_message):
    """
    Get a therapeutic response from DeepSeek R1 via OpenRouter
    """
    url = "https://openrouter.ai/api/v1/chat/completions"

    # This is the critical part that needs fixing - proper header formatting
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
        "HTTP-Referer": "https://localhost:8888",  # Required by OpenRouter
        "X-Title": "AI Therapist Application"  # Optional: name your application
    }

    messages = []
    if system_message:
        messages.append({"role": "system", "content": system_message})

    messages.append({"role": "user", "content": prompt})

    data = {
        "model": "deepseek/deepseek-r1",
        "messages": messages,
        "temperature": 0.7,
        "max_tokens": 800
    }

    try:
        print(f"Sending request to OpenRouter with API key: {api_key[:5]}...{api_key[-5:] if len(api_key) > 10 else ''}")
        response = requests.post(url, headers=headers, json=data)  # Changed from data=json.dumps(data) to json=data

        if response.status_code != 200:
            print(f"Error status code: {response.status_code}")
            print(f"Error response: {response.text}")
            return f"Error: Unable to get response. Status code: {response.status_code}. Please check your API key."

        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        print(f"Exception: {str(e)}")
        if 'response' in locals() and hasattr(response, 'text'):
            print(f"Response: {response.text}")
        return f"Error: Unable to get response from DeepSeek R1. {str(e)}"

# OpenAI API Implementation - will be used when you have credits
def therapist_openai(prompt, api_key, system_message, model="gpt-3.5-turbo"):
    """
    Get a therapeutic response from OpenAI GPT
    """
    if not OPENAI_AVAILABLE or not api_key:
        return "OpenAI API key not available or package not installed."

    try:
        client = OpenAI(api_key=api_key)

        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=800
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error: Unable to get response from OpenAI. {str(e)}"

# Prepare data for OpenAI fine-tuning (for future use)
def prepare_openai_finetune_data(train_df, valid_df, output_dir="."):
    """
    Prepare data for OpenAI fine-tuning in the JSONL format
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    train_file_path = os.path.join(output_dir, "train_data.jsonl")
    valid_file_path = os.path.join(output_dir, "valid_data.jsonl")

    # Format each row as required by OpenAI
    def format_for_openai(row):
        return {
            "messages": [
                {"role": "user", "content": row["prompt"]},
                {"role": "assistant", "content": row["completion"]}
            ]
        }

    # Convert DataFrames to lists of formatted dictionaries
    train_data = [format_for_openai(row) for _, row in train_df.iterrows()]
    valid_data = [format_for_openai(row) for _, row in valid_df.iterrows()]

    # Save to JSONL files
    with open(train_file_path, "w") as f:
        for item in train_data:
            f.write(json.dumps(item) + "\n")

    with open(valid_file_path, "w") as f:
        for item in valid_data:
            f.write(json.dumps(item) + "\n")

    print(f"Data prepared for fine-tuning:")
    print(f"- Training file: {train_file_path}")
    print(f"- Validation file: {valid_file_path}")

    return train_file_path, valid_file_path

# Interactive AI Therapist Interface
def run_interactive_therapist():
    """
    Run an interactive session with the AI therapist
    """
    # Load datasets
    try:
        train_df, valid_df, prompts_df = load_datasets()
        system_prompt = create_therapist_system_prompt(prompts_df)
        print("System prompt created successfully.")
    except Exception as e:
        print(f"Error loading datasets: {str(e)}")
        print("Using a default system prompt instead.")
        system_prompt = """
        You are an empathetic and skilled AI therapist. Your responses should:
        - Practice active listening and reflection
        - Validate the client's feelings without judgment
        - Ask open-ended questions to encourage exploration
        - Maintain professional boundaries
        - Focus on the client's needs and goals
        - Provide a safe and supportive environment
        """

    print("\n" + "="*50)
    print("AI Therapist Interactive Session")
    print("="*50)
    print("Type 'exit' to end the session.")
    print("Type 'switch' to toggle between DeepSeek R1 and OpenAI (if available).")
    print("First, enter your OpenRouter API key:")

    import getpass

    openrouter_api_key = getpass.getpass("OpenRouter API key: ")

    # openrouter_api_key = input("OpenRouter API key: ").strip()

    print("Enter your OpenAI API key (or leave blank to skip):")
    openai_api_key = input("OpenAI API key: ").strip()

    # Check if OpenAI is available
    openai_available = OPENAI_AVAILABLE and openai_api_key != ""

    # Default to DeepSeek since you mentioned it's working
    using_openai = False

    # Current model display
    current_model = "DeepSeek R1"

    # Test DeepSeek R1 with a sample prompt
    print("\nTesting connection to DeepSeek R1...")
    test_prompt = "Hello, can you hear me?"
    response = therapist_deepseek(test_prompt, openrouter_api_key, "You are a helpful assistant.")

    if response.startswith("Error:"):
        print(f"Connection test failed: {response}")
        print("Please check your API key and try again.")
        return
    else:
        print("Connection successful! You can now start your therapy session.")

    while True:
        user_input = input("\nYou: ")

        if user_input.lower() == 'exit':
            print("\nThank you for using AI Therapist. Goodbye!")
            break

        if user_input.lower() == 'switch':
            if not openai_available:
                print("\nOpenAI is not available. Please add your API key first.")
                continue

            using_openai = not using_openai
            current_model = "OpenAI GPT" if using_openai else "DeepSeek R1"
            print(f"\nSwitched to {current_model}")
            continue

        print(f"\nThinking... ({current_model})")

        if using_openai and openai_available:
            response = therapist_openai(user_input, openai_api_key, system_prompt)
        else:
            response = therapist_deepseek(user_input, openrouter_api_key, system_prompt)

#only response

        import re

        response = therapist_deepseek(user_input, openrouter_api_key, system_prompt)

        # Check if the response ends with a complete sentence
        while not response.strip().endswith(('.', '?', '!')):
            # If not, try to get the complete response from the model
            user_input = "Please continue."
            next_response = therapist_deepseek(user_input, openrouter_api_key, system_prompt)
            response += " " + next_response

        # Remove labels
        response = re.sub(r"\*\*(.*?)\*\*", "", response)

        # Remove extra whitespace
        response = response.replace("\n\n", "\n")

        print(f"\nAI Therapist: {response}")






        # print(f"\nAI Therapist: {response}")

# Main function
if __name__ == "__main__":
    run_interactive_therapist()

Training examples: 372
Validation examples: 97
Therapy prompt types: 13
System prompt created successfully.

AI Therapist Interactive Session
Type 'exit' to end the session.
Type 'switch' to toggle between DeepSeek R1 and OpenAI (if available).
First, enter your OpenRouter API key:
OpenRouter API key: ··········
Enter your OpenAI API key (or leave blank to skip):
OpenAI API key: 

Testing connection to DeepSeek R1...
Sending request to OpenRouter with API key: sk-or...dc7aa
Connection successful! You can now start your therapy session.

You: my result of exam is so bad. i am scard how can i go home?

Thinking... (DeepSeek R1)
Sending request to OpenRouter with API key: sk-or...dc7aa
Sending request to OpenRouter with API key: sk-or...dc7aa
Sending request to OpenRouter with API key: sk-or...dc7aa

AI Therapist:   
"It sounds like you're feeling really scared and anxious about sharing your exam results with your family. That’s a heavy weight to carry."  
  
"Part of you might want to be