In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import joblib

In [2]:
# Step 1: Define the LogisticRegressionTorch class (same as during training)
class LogisticRegressionTorch(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionTorch, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return torch.sigmoid(self.linear(x))

# Step 2: Load the scaler and model
scaler = joblib.load('scaler.pkl')
input_dim = 7  # Match the input_dim from training (number of features)
model = LogisticRegressionTorch(input_dim)
model.load_state_dict(torch.load('logreg_rl_model.pth'))
model.eval()

# Step 3: Load therapist data
try:
    therapists_df = pd.read_csv('therapist.csv')
except FileNotFoundError as e:
    print(f"Error: {e}. Please ensure 'therapists.csv' is in the correct directory.")
    exit(1)

# Step 4: Define the create_features function (same as during training)
def create_features(user, therapist):
    features = []
    features.append(1 if user['preferred_modality'] == therapist['modality'] else 0)
    features.append(1 if user['preferred_gender'] == therapist['gender'] else 0)
    features.append(1 if user['preferred_language'] == therapist['language'] else 0)
    features.append(1 if user['preferred_mode'] == therapist['mode'] else 0)
    user_days = set(user['preferred_days'].split(',')) if pd.notna(user['preferred_days']) else set()
    therapist_days = set(therapist['available_days'].split(',')) if pd.notna(therapist['available_days']) else set()
    features.append(1 if user_days.intersection(therapist_days) else 0)
    user_specialties = set(user['preferred_specialties'].split(',')) if pd.notna(user['preferred_specialties']) else set()
    therapist_specialties = set(therapist['specialties'].split(',')) if pd.notna(therapist['specialties']) else set()
    features.append(len(user_specialties.intersection(therapist_specialties)))
    features.append(therapist['experience_years'] if pd.notna(therapist['experience_years']) else 0)
    return np.array(features)

# Step 5: Define a function to suggest top 5 therapists for a user
def suggest_top_5_therapists(user_data, therapists_df, scaler, model):
    scores = []
    therapist_ids = []
    
    # Compute scores for all therapists
    for _, therapist in therapists_df.iterrows():
        features = create_features(user_data, therapist)
        features_scaled = scaler.transform([features])[0]
        features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
        with torch.no_grad():
            prob = model(features_tensor).item()
        scores.append(prob)
        therapist_ids.append(therapist['id'])
    
    # Get top 5 therapists
    top_indices = np.argsort(scores)[-5:][::-1]  # Sort in descending order and take top 5
    top_therapists = [
        {
            "therapist_id": therapist_ids[i],
            "therapist_name": therapists_df.loc[therapists_df['id'] == therapist_ids[i], 'therapist_name'].iloc[0],
            "score": scores[i]
        }
        for i in top_indices
    ]
    
    return top_therapists

# Step 6: Example usage
# Option 1: Provide user data manually as a dictionary
user_data_manual = pd.Series({
    "preferred_modality": "CBT",
    "preferred_gender": "female",
    "preferred_language": "English",
    "preferred_days": "Monday,Wednesday",
    "preferred_mode": "in-person",
    "preferred_specialties": "anxiety,depression",
    "age": 30  # Not used in features but included for completeness
})

top_5 = suggest_top_5_therapists(user_data_manual, therapists_df, scaler, model)
print("\nTop 5 therapists for manually provided user:")
for therapist in top_5:
    print(f"Therapist ID: {therapist['therapist_id']}, Name: {therapist['therapist_name']}, Score: {therapist['score']:.4f}")

# Option 2: Pick a user from users.csv for demonstration
try:
    users_df = pd.read_csv('user.csv')
    if not users_df.empty:
        # Pick the first user for demonstration (you can change the index or user_id)
        sample_user = users_df.iloc[0]
        print(f"\nSuggesting top 5 therapists for user: {sample_user['name']} (User ID: {sample_user['user_id']})")
        top_5 = suggest_top_5_therapists(sample_user, therapists_df, scaler, model)
        for therapist in top_5:
            print(f"Therapist ID: {therapist['therapist_id']}, Name: {therapist['therapist_name']}, Score: {therapist['score']:.4f}")
    else:
        print("No users found in user.csv.")
except FileNotFoundError as e:
    print(f"Error: {e}. Could not load user.csv for demonstration.")


Top 5 therapists for manually provided user:
Therapist ID: 61, Name: Dr. Ibrahim Kebede, Score: 0.2229
Therapist ID: 91, Name: Dr. Tarekegn Mohammed, Score: 0.2229
Therapist ID: 68, Name: Dr. Lemlem Assefa, Score: 0.2229
Therapist ID: 29, Name: Dr. Chaltu Getachew, Score: 0.2058
Therapist ID: 63, Name: Dr. Kalkidan Worku, Score: 0.2058

Suggesting top 5 therapists for user: Lemlem Aberra (User ID: 94)
Therapist ID: 70, Name: Dr. Mamo Berhanu, Score: 0.9684
Therapist ID: 92, Name: Dr. Tesfaye Tadesse, Score: 0.9246
Therapist ID: 40, Name: Dr. Endale Abebe, Score: 0.9183
Therapist ID: 18, Name: Dr. Ayele Gebre, Score: 0.9159
Therapist ID: 26, Name: Dr. Birhan Hailu, Score: 0.8923
