In [180]:
import os
import json
import random
from enum import Enum
from datasets import load_dataset
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

print("✅ Libraries imported successfully!")


✅ Libraries imported successfully!


In [181]:
class RoleType(Enum):
   
    USER = 'user'
    SYSTEM = 'system'
    ASSISTANT = 'assistant'


class Role:

    
    def __init__(self, role_type: RoleType, content: str):
        self.role = role_type.value
        self.content = content
        self.value = {'role': self.role, 'content': self.content}


class Message:

    
    def __init__(self, user_content: str, system_content: str, assistant_content: str):
        self.user_role = Role(role_type=RoleType.USER, content=user_content)
        self.system_role = Role(role_type=RoleType.SYSTEM, content=system_content)
        self.assistant_role = Role(role_type=RoleType.ASSISTANT, content=assistant_content)
        
        self.message = {
            "messages": [
                self.system_role.value,
                self.user_role.value,
                self.assistant_role.value
            ]
        }

print("✅ Data classes created successfully!")


✅ Data classes created successfully!


In [182]:
# Load the mental health counseling dataset
dataset = load_dataset(
    "Amod/mental_health_counseling_conversations",
    data_files="combined_dataset.json",
    split="train"
)

print(f"Dataset loaded successfully! Total samples: {len(dataset)}")
print(f"Sample keys: {dataset[0].keys()}")


Dataset loaded successfully! Total samples: 3512
Sample keys: dict_keys(['Context', 'Response'])


In [183]:
# Define the system prompt for mental health counseling
SYSTEM_PROMPT = """You serve as a supportive and honest psychology and psychotherapy assistant. Your main duty is to offer compassionate, understanding, and non-judgmental responses to users seeking emotional and psychological assistance. Respond with empathy and exhibit active listening skills. Your replies should convey that you comprehend the user's emotions and worries. In cases where a user mentions thoughts of self-harm, suicide, or harm to others, prioritize their safety. Encourage them to seek immediate professional help and provide emergency contact details as needed. It's important to note that you are not a licensed medical professional. Refrain from diagnosing or prescribing treatments. Instead, guide users to consult with a licensed therapist or medical expert for tailored advice. Never store or disclose any personal information shared by users. Uphold their privacy at all times. Avoid taking sides or expressing personal viewpoints. Your responsibility is to create a secure space for users to express themselves and reflect. Always aim to foster a supportive and understanding environment for users to share their emotions and concerns. Above all, prioritize their well-being and safety."""

# Create a sample Message object to test the format
sample_context = dataset[152]["Context"]
sample_response = dataset[152]["Response"]
sample_message = Message(
    user_content=sample_context,
    system_content=SYSTEM_PROMPT,
    assistant_content=sample_response
)

print("Sample message format:")
print(json.dumps(sample_message.message, indent=2))


Sample message format:
{
  "messages": [
    {
      "role": "system",
      "content": "You serve as a supportive and honest psychology and psychotherapy assistant. Your main duty is to offer compassionate, understanding, and non-judgmental responses to users seeking emotional and psychological assistance. Respond with empathy and exhibit active listening skills. Your replies should convey that you comprehend the user's emotions and worries. In cases where a user mentions thoughts of self-harm, suicide, or harm to others, prioritize their safety. Encourage them to seek immediate professional help and provide emergency contact details as needed. It's important to note that you are not a licensed medical professional. Refrain from diagnosing or prescribing treatments. Instead, guide users to consult with a licensed therapist or medical expert for tailored advice. Never store or disclose any personal information shared by users. Uphold their privacy at all times. Avoid taking sides or ex

In [184]:
def save_to_jsonl(data, file_path):
    """Save data to JSONL format for OpenAI fine-tuning."""
    with open(file_path, 'w') as file:
        for row in data:
            # Extract the dict representation
            if hasattr(row, "message"):
                row = row.message
            line = json.dumps(row)
            file.write(line + '\n')

# Sample a subset of the dataset for training
sampled_dataset = random.choices(dataset, k=100)
train_dataset = []

for row in sampled_dataset:
    message_obj = Message(
        user_content=row['Context'],
        system_content=SYSTEM_PROMPT,
        assistant_content=row['Response']
    )
    train_dataset.append(message_obj)

print(f"Created {len(train_dataset)} training samples")


Created 100 training samples


In [185]:
# Split data into training and validation sets
train_data = train_dataset[:-5]  # First 95 samples for training
validation_data = train_dataset[-5:]  # Last 5 samples for validation

# Save data in JSONL format
training_data_path = './data/train.jsonl'
validation_data_path = './data/validation.jsonl'

save_to_jsonl(train_data, training_data_path)
save_to_jsonl(validation_data, validation_data_path)

print("Data saved successfully!")
print(f"Training data: {training_data_path} ({len(train_data)} samples)")
print(f"Validation data: {validation_data_path} ({len(validation_data)} samples)")


Data saved successfully!
Training data: ./data/train.jsonl (95 samples)
Validation data: ./data/validation.jsonl (5 samples)


## 5. Fine-Tune the Model


In [186]:
# Initialize OpenAI client
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

print(" OpenAI client initialized")


 OpenAI client initialized


In [187]:
# Upload training and validation files to OpenAI
print("📤 Uploading files to OpenAI...")

with open(training_data_path, "rb") as training_file:
    training_response = client.files.create(file=training_file, purpose="fine-tune")
    training_file_id = training_response.id

with open(validation_data_path, "rb") as validation_file:
    validation_response = client.files.create(file=validation_file, purpose="fine-tune")
    validation_file_id = validation_response.id

print(f"Training file ID: {training_file_id}")
print(f"Validation file ID: {validation_file_id}")
print(f"Training file status: {training_response.status}")
print(f"Validation file status: {validation_response.status}")


📤 Uploading files to OpenAI...
Training file ID: file-U81b4UCMvjeziYB8L2MDLS
Validation file ID: file-PsXxmeBNswnAWiwHgCUDnM
Training file status: processed
Validation file status: processed


In [188]:
# Create fine-tuning job
print("🚀 Creating fine-tuning job...")

response = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    model="gpt-4o-2024-08-06",
    suffix="mental-health-counselor",
    validation_file=validation_file_id
)

job_id = response.id
print(f"✅ Fine-tuning job created successfully!")
print(f"📋 Job ID: {job_id}")
print(f"📊 Job Status: {response.status}")
print(f"🤖 Model: {response.model}")


🚀 Creating fine-tuning job...
✅ Fine-tuning job created successfully!
📋 Job ID: ftjob-DqQecglYdHm9vSdbNyzvl1a9
📊 Job Status: validating_files
🤖 Model: gpt-4o-2024-08-06


In [189]:
# Monitor fine-tuning job progress
print("🔍 Monitoring fine-tuning job progress...")

try:
    job_status = client.fine_tuning.jobs.retrieve(job_id)
    print(f"📊 Job Status: {job_status.status}")
    print(f"🤖 Model: {job_status.model}")
    
    if job_status.status == "succeeded":
        print("✅ Fine-tuning completed successfully!")
        print(f"🎯 Fine-tuned Model ID: {job_status.fine_tuned_model}")
    elif job_status.status == "failed":
        print("❌ Fine-tuning failed!")
        print(f"Error: {job_status.error}")
    elif job_status.status in ["validating_files", "queued", "running"]:
        print(f"⏳ Fine-tuning in progress. Status: {job_status.status}")
        print("Please wait for completion...")
    else:
        print(f"⚠️ Unknown status: {job_status.status}")
        
except Exception as e:
    print(f"❌ Error retrieving job status: {e}")


🔍 Monitoring fine-tuning job progress...
📊 Job Status: validating_files
🤖 Model: gpt-4o-2024-08-06
⏳ Fine-tuning in progress. Status: validating_files
Please wait for completion...


In [190]:
# Prepare test messages
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": "Every winter I find myself getting sad because of the weather. How can I fight this?"}
]

print("Test message prepared:")
print(f"User: {messages[1]['content']}")


Test message prepared:
User: Every winter I find myself getting sad because of the weather. How can I fight this?


In [191]:
# Test with base model (GPT-3.5-turbo)
print("🤖 Testing base model (gpt-4o-2024-08-06)...")

try:
    completion_base = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=messages
    )
    print("Base model response:")
    print(completion_base.choices[0].message.content)
except Exception as e:
    print(f"❌ Error testing base model: {e}")


🤖 Testing base model (gpt-4o-2024-08-06)...
Base model response:
I'm really sorry to hear that the winter months can be tough for you. It's not uncommon for people to feel this way, as the shorter and darker days can affect our moods. Here are a few suggestions that might help:

1. **Light Therapy:** Consider using a light therapy box. It mimics natural sunlight and can help improve your mood and regulate your sleep patterns.

2. **Stay Active:** Even though it might be tempting to stay indoors, regular physical activity can boost your mood and energy levels.

3. **Social Connections:** Try to stay connected with family and friends. Socializing, even if it's just a phone call or video chat, can be uplifting.

4. **Professional Help:** If these feelings become overwhelming, consider speaking with a therapist. They can provide support and strategies tailored to your situation.

5. **Plan Pleasant Activities:** Schedule things to look forward to, like hobbies or outings, which can make th

In [192]:
# Test with fine-tuned model (if available)
if 'job_status' in locals() and job_status.status == "succeeded" and job_status.fine_tuned_model:
    print("\n🤖 Testing fine-tuned model...")
    
    try:
        completion_ft = client.chat.completions.create(
            model=job_status.fine_tuned_model,
            messages=messages
        )
        print("Fine-tuned model response:")
        print(completion_ft.choices[0].message.content)
        
        print("\n" + "="*60)
        print("✅ Fine-tuning is working! Compare the responses above.")
    except Exception as e:
        print(f" Error testing fine-tuned model: {e}")
else:
    print("\n Fine-tuned model not available yet. Please wait for training to complete.")



 Fine-tuned model not available yet. Please wait for training to complete.
