# Mental Health Recommender System - Backend Pipeline

This notebook consolidates the entire backend logic of the Mental Health Recommender System into a single flow.

### Modules Included:
1. **Graph Database**: Neo4j Connection Handling
2. **Graph Builder**: Populating the graph with synthetic & real data
3. **Graph Embedding (ML)**: Training Spectral Embeddings on the graph structure
4. **Inference Engine**: Finding similar nodes (Hybrid Recommendation)
5. **Recommender API Logic**: The heavy lifting behind the API endpoints

### Prerequisites
Ensure Neo4j is running locally (`docker-compose up -d`).

In [1]:
# 1. Setup & Imports
import os
import sys
import pickle
import uuid
import numpy as np
import pandas as pd
import networkx as nx
from neo4j import GraphDatabase
from dotenv import load_dotenv
from sklearn.manifold import SpectralEmbedding
from sklearn.metrics.pairwise import cosine_similarity

# Load Environment Variables
load_dotenv()

# Configuration (Defaults to localhost if .env is missing)
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER = os.getenv("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")

print("Environment Configured.")

Environment Configured.


## 2. Database Connection (`graph/db.py`)

In [2]:
class Neo4jConnection:
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(Neo4jConnection, cls).__new__(cls)
            cls._instance.driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
        return cls._instance

    def close(self):
        if self.driver:
            self.driver.close()

    def query(self, query, parameters=None):
        with self.driver.session() as session:
            result = session.run(query, parameters)
            return [record.data() for record in result]

# Global Instance
db = Neo4jConnection()
print("Database Connection Initialized.")

Database Connection Initialized.


## 3. Graph Builder (`graph/builder.py`)

In [3]:
def clear_graph():
    print("Clearing existing graph...")
    db.query("MATCH (n) DETACH DELETE n")

def create_constraints():
    print("Creating constraints...")
    constraints = [
        "CREATE CONSTRAINT IF NOT EXISTS FOR (u:User) REQUIRE u.id IS UNIQUE",
        "CREATE CONSTRAINT IF NOT EXISTS FOR (c:Country) REQUIRE c.name IS UNIQUE",
        "CREATE CONSTRAINT IF NOT EXISTS FOR (s:State) REQUIRE s.name IS UNIQUE",
        "CREATE CONSTRAINT IF NOT EXISTS FOR (a:Activity) REQUIRE a.id IS UNIQUE",
        "CREATE INDEX IF NOT EXISTS FOR (u:User) ON (u.name)"
    ]
    for q in constraints:
        db.query(q)

def load_solutions():
    print("Loading Synthetic Solutions (Activities/Content)...")
    solutions = [
        {'name': 'Mindfulness Meditation', 'type': 'Meditation', 'target': 'Stress'},
        {'name': 'Deep Breathing Exercises', 'type': 'Exercise', 'target': 'Stress'},
        {'name': 'Stress Management Workshop', 'type': 'Workshop', 'target': 'Stress'},
        {'name': 'Emotional Regulation Guidance', 'type': 'Therapy', 'target': 'MoodSwings'},
        {'name': 'Journaling for Clarity', 'type': 'Writing', 'target': 'MoodSwings'},
        {'name': 'Mood Tracking App', 'type': 'Tool', 'target': 'MoodSwings'},
        {'name': 'Group Therapy Session', 'type': 'Social', 'target': 'SocialWeakness'},
        {'name': 'Public Speaking Club', 'type': 'Social', 'target': 'SocialWeakness'},
        {'name': 'Community Meetup', 'type': 'Social', 'target': 'SocialWeakness'},
        {'name': 'Nature Hiking Group', 'type': 'Exercise', 'target': 'Isolation'},
        {'name': 'Sunlight Exposure Routine', 'type': 'Routine', 'target': 'Isolation'},
        {'name': 'Resilience Training', 'type': 'Training', 'target': 'CopingIssues'},
        {'name': 'Cognitive Behavioral Therapy (CBT)', 'type': 'Therapy', 'target': 'CopingIssues'},
        {'name': 'Career Counseling', 'type': 'Consultation', 'target': 'WorkBurnout'},
        {'name': 'Work-Life Balance Workshop', 'type': 'Workshop', 'target': 'WorkBurnout'},
        {'name': 'Maintenance Yoga', 'type': 'Exercise', 'target': 'WellBeing'},
        {'name': 'Daily Gratitude Journal', 'type': 'Writing', 'target': 'WellBeing'},
    ]

    for sol in solutions:
        query = """
        MERGE (a:Activity {name: $name})
        SET a.id = $id, a.type = $type
        MERGE (s:State {name: $target})
        MERGE (a)-[:TREATS]->(s)
        """
        db.query(query, {'name': sol['name'], 'id': str(uuid.uuid4()), 'type': sol['type'], 'target': sol['target']})

def load_real_data(limit=100):
    # Assuming the CSV is in the parent directory relative to notebook execution context
    csv_path = 'Mental Health Dataset.csv'
    if not os.path.exists(csv_path):
        print(f"Dataset not found at {csv_path}. Skipping real data load.")
        return

    print(f"Loading users from {csv_path}...")
    df = pd.read_csv(csv_path).head(limit) 
    
    count = 0
    for _, row in df.iterrows():
        user_id = f"U{count}"
        country = row.get('Country', 'Unknown')
        gender = row.get('Gender', 'Unknown')
        
        query_user = """
        MERGE (u:User {id: $uid})
        SET u.gender = $gender
        MERGE (c:Country {name: $country})
        MERGE (u)-[:LIVES_IN]->(c)
        """
        db.query(query_user, {'uid': user_id, 'gender': gender, 'country': country})

        # Link to States
        if row.get('Growing_Stress') == 'Yes':
            db.query("MATCH (u:User {id: $uid}) MERGE (s:State {name: 'Stress'}) MERGE (u)-[:EXPERIENCES]->(s)", {'uid': user_id})
        if row.get('Mood_Swings') in ['High', 'Medium']:
            db.query("MATCH (u:User {id: $uid}) MERGE (s:State {name: 'MoodSwings'}) MERGE (u)-[:EXPERIENCES]->(s)", {'uid': user_id})
        if row.get('Social_Weakness') == 'Yes':
            db.query("MATCH (u:User {id: $uid}) MERGE (s:State {name: 'SocialWeakness'}) MERGE (u)-[:EXPERIENCES]->(s)", {'uid': user_id})  
        days = row.get('Days_Indoors')
        if days and "More" in str(days):
            db.query("MATCH (u:User {id: $uid}) MERGE (s:State {name: 'Isolation'}) MERGE (u)-[:EXPERIENCES]->(s)", {'uid': user_id})
        if row.get('Coping_Struggles') == 'Yes':
            db.query("MATCH (u:User {id: $uid}) MERGE (s:State {name: 'CopingIssues'}) MERGE (u)-[:EXPERIENCES]->(s)", {'uid': user_id})
        if row.get('Work_Interest') == 'No':
            db.query("MATCH (u:User {id: $uid}) MERGE (s:State {name: 'WorkBurnout'}) MERGE (u)-[:EXPERIENCES]->(s)", {'uid': user_id})

        count += 1
    print("Data loading functions defined.")

## 4. Machine Learning Module (`ml/graph_embedding.py` + `inference.py`)
Handles Training (Learning Graph Structure) and Inference (Similarity Search).

In [4]:
class GraphLearner:
    def __init__(self):
        self.graph = nx.Graph()
        self.vectors = {}

    def fetch_graph_data(self):
        print("Fetching data from Neo4j...")
        query = """
        MATCH (n)-[r]->(m)
        RETURN 
            CASE WHEN n.id IS NOT NULL THEN n.id ELSE n.name END as source,
            CASE WHEN m.id IS NOT NULL THEN m.id ELSE m.name END as target,
            type(r) as type
        """
        results = db.query(query)
        for record in results:
            if record['source'] and record['target']:
                self.graph.add_edge(record['source'], record['target'])
        print(f"Graph built: {self.graph.number_of_nodes()} nodes.")

    def train_embeddings(self, dimensions=32):
        if self.graph.number_of_edges() == 0:
            print("Graph is empty.")
            return
        print("Training Spectral Embedding...")
        nodes = list(self.graph.nodes())
        adj_matrix = nx.to_numpy_array(self.graph, nodelist=nodes)
        embedding = SpectralEmbedding(n_components=dimensions, affinity='precomputed')
        node_vectors = embedding.fit_transform(adj_matrix)
        self.vectors = {node: vec for node, vec in zip(nodes, node_vectors)}
        print("Training complete.")

    def save_embeddings(self, filepath="graph_embeddings.pkl"):
        with open(filepath, 'wb') as f:
            pickle.dump(self.vectors, f)
        print(f"Saved embeddings to {filepath}.")

class NeuralRecommender:
    def __init__(self, embedding_path="graph_embeddings.pkl"):
        self.vectors = None
        self.keys = []
        self.matrix = None
        self.embedding_path = embedding_path
        self.load_model()

    def load_model(self):
        if os.path.exists(self.embedding_path):
            with open(self.embedding_path, 'rb') as f:
                self.vectors = pickle.load(f)
            self.keys = list(self.vectors.keys())
            self.matrix = np.array([self.vectors[k] for k in self.keys])
            print(f"Loaded embeddings for {len(self.vectors)} nodes.")
        else:
            print("Embedding file not found.")

    def get_activity_details(self, node_id):
        query = """
        MATCH (a:Activity {id: $aid})
        RETURN a.id as id, a.name as title, a.type as type, 'Activity' as category
        """
        result = db.query(query, {'aid': node_id})
        return result[0] if result else None

    def predict(self, user_id, limit=5):
        if not self.vectors or user_id not in self.vectors: return []
        user_vector = self.vectors[user_id].reshape(1, -1)
        scores = cosine_similarity(user_vector, self.matrix)[0]
        top_indices = scores.argsort()[::-1]
        recommendations = []
        for idx in top_indices:
            node_id = self.keys[idx]
            if node_id == user_id: continue
            details = self.get_activity_details(node_id)
            if details:
                details['score'] = round(float(scores[idx]), 3)
                details['reason_category'] = 'AI Match'
                recommendations.append(details)
            if len(recommendations) >= limit: break
        return recommendations

    def predict_cold_start(self, distinct_states, limit=5):
        if not self.vectors: return []
        state_vectors = [self.vectors[s] for s in distinct_states if s in self.vectors]
        if not state_vectors: return []
        proxy_vector = np.mean(state_vectors, axis=0).reshape(1, -1)
        scores = cosine_similarity(proxy_vector, self.matrix)[0]
        top_indices = scores.argsort()[::-1]
        recommendations = []
        for idx in top_indices:
            node_id = self.keys[idx]
            if node_id in distinct_states: continue
            details = self.get_activity_details(node_id)
            if details:
                details['score'] = round(float(scores[idx]), 3)
                details['reason_category'] = 'AI Match'
                recommendations.append(details)
            if len(recommendations) >= limit: break
        return recommendations

## 5. Recommendation Engine (`recommender/engine.py`)

In [5]:
class Recommender:
    def __init__(self):
        self.neural = NeuralRecommender()

    def get_recommendations(self, user_id=None, attributes=None, limit=5, strategy='hybrid'):
        neural_recs = []
        if strategy in ['hybrid', 'neural']:
            if user_id:
                neural_recs = self.neural.predict(user_id, limit=limit)
            elif attributes:
                 # Map attributes to states for cold start
                target_states = []
                if attributes.get('growing_stress') == 'Yes': target_states.append('Stress')
                if attributes.get('mood_swings') in ['High', 'Medium']: target_states.append('MoodSwings')
                if attributes.get('social_weakness') == 'Yes': target_states.append('SocialWeakness')
                if attributes.get('coping_struggles') == 'Yes': target_states.append('CopingIssues')
                if attributes.get('work_interest') == 'No': target_states.append('WorkBurnout')
                neural_recs = self.neural.predict_cold_start(target_states, limit=limit)

            if strategy == 'neural':
                return neural_recs

        # Graph/Rule Based Logic
        graph_recs = []
        if user_id:
            query = """
            MATCH (u:User {id: $uid})-[:EXPERIENCES]->(s:State)<-[:TREATS]-(a:Activity)
            RETURN a.id as id, a.name as title, a.type as type, s.name as reason_category, 'Activity' as category
            LIMIT $limit
            """
            graph_recs = db.query(query, {'uid': user_id, 'limit': limit})
        elif attributes:
             # Re-map attributes if we didn't do it above (or just reuse logic)
            target_states = []
            if attributes.get('growing_stress') == 'Yes': target_states.append('Stress')
            if attributes.get('mood_swings') in ['High', 'Medium']: target_states.append('MoodSwings')
            if attributes.get('social_weakness') == 'Yes': target_states.append('SocialWeakness')
            if attributes.get('coping_struggles') == 'Yes': target_states.append('CopingIssues')
            if attributes.get('work_interest') == 'No': target_states.append('WorkBurnout')
            if not target_states: target_states.append('WellBeing')

            query = """
            MATCH (s:State)<-[:TREATS]-(a:Activity)
            WHERE s.name IN $states
            RETURN a.id as id, a.name as title, a.type as type, s.name as reason_category, 'Activity' as category
            LIMIT $limit
            """
            graph_recs = db.query(query, {'states': target_states, 'limit': limit})

        # Hybrid Merge (Interleave)
        combined = []
        seen = set()
        max_len = max(len(graph_recs), len(neural_recs))
        for i in range(max_len):
            if i < len(graph_recs):
                item = graph_recs[i]
                if item['id'] not in seen:
                    combined.append(item)
                    seen.add(item['id'])
            if i < len(neural_recs):
                item = neural_recs[i]
                if item['id'] not in seen:
                    combined.append(item)
                    seen.add(item['id'])
        return combined[:limit]

## 6. Execution & Verification
Run the pipeline: Build Graph -> Train Model -> Get Recommendations.

In [13]:
# A. Build Graph (Run once or to reset)
clear_graph()
create_constraints()
load_solutions()
load_real_data(limit=100)

# B. Train Model
learner = GraphLearner()
learner.fetch_graph_data()
learner.train_embeddings()
learner.save_embeddings()

# C. Test Recommender
print("\n--- Testing Hybrid Recommender ---")
rec = Recommender()

# 1. Profile Recommendation (User 0)
print("Recommendation for User U0 (Hybrid):")
result = rec.get_recommendations(user_id='U0', strategy='hybrid')
for r in result:
    print(f" - {r['title']} [{r['reason_category']}]")

# 2. Cold Start Recommendation
print("\nCold Start Recommendation (Stress + Mood Swings):")
attrs = {'growing_stress': 'Yes', 'mood_swings': 'High'}
result_cold = rec.get_recommendations(attributes=attrs, strategy='hybrid')
for r in result_cold:
    print(f" - {r['title']} [{r['reason_category']}]")

Clearing existing graph...
Creating constraints...
Loading Synthetic Solutions (Activities/Content)...
Loading users from Mental Health Dataset.csv...
Data loading functions defined.
Fetching data from Neo4j...
Graph built: 134 nodes.
Training Spectral Embedding...
Training complete.
Saved embeddings to graph_embeddings.pkl.

--- Testing Hybrid Recommender ---
Loaded embeddings for 134 nodes.
Recommendation for User U0 (Hybrid):




 - Career Counseling [WorkBurnout]
 - Maintenance Yoga [AI Match]
 - Work-Life Balance Workshop [WorkBurnout]
 - Cognitive Behavioral Therapy (CBT) [AI Match]
 - Group Therapy Session [SocialWeakness]

Cold Start Recommendation (Stress + Mood Swings):
 - Mindfulness Meditation [Stress]
 - Deep Breathing Exercises [AI Match]
 - Emotional Regulation Guidance [AI Match]
 - Stress Management Workshop [Stress]
 - Journaling for Clarity [MoodSwings]
