# Model Experiments

This section covers various experiments performed using different models and parameters.

In [1]:
#Save the model in joblib
import joblib
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression

def create_final_pipeline():
    """Create the final pipeline with MinMax scaler"""
    numeric_features = ['age', 'is_tech_company']
    categorical_features = ['gender', 'country', 'company_size', 'work_remotely', 
                          'has_mental_health_benefits', 'current_disorder']
    
    # Create preprocessing pipelines
    numeric_transformer = Pipeline(steps=[
        ('scaler', MinMaxScaler())
    ])
    
    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
    
    # Combine preprocessing steps
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])
    
    # Create full pipeline
    return Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', LogisticRegression(random_state=42))
    ])

def train_and_save_model(X_train, y_train, model_path='final_mental_health_model.joblib'):
    """Train and save the final model"""
    # Create pipeline
    pipeline = create_final_pipeline()
    
    # Fit the pipeline
    pipeline.fit(X_train, y_train)
    
    # Save the model
    joblib.dump(pipeline, model_path)
    print(f"Model saved successfully to: {model_path}")
    
    return pipeline

def main():
    # Load your preprocessed data
    df = load_and_preprocess_data()
    
    # Prepare features and target
    X = df.drop('sought_treatment', axis=1)
    y = df['sought_treatment']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Train and save model
    final_model = train_and_save_model(X_train, y_train)
    
    # Verify the saved model
    loaded_model = joblib.load('final_mental_health_model.joblib')
    test_pred = loaded_model.predict(X_test)
    test_score = f1_score(y_test, test_pred)
    
    print("\nModel Verification:")
    print("-" * 50)
    print(f"Test F1 Score with loaded model: {test_score:.4f}")

if __name__ == "__main__":
    main()

NameError: name 'load_and_preprocess_data' is not defined

# FastAPI Development

This section details the API development process using FastAPI.
Navigate: [Previous (Feature Engineering)](03_feature_engineering.ipynb) | [Next (Streamlit Application)](05_streamlit_app.ipynb)

In [None]:
#FastAPI code main.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd  # Add pandas import
from fastapi.middleware.cors import CORSMiddleware 
import uvicorn

# Create FastAPI app
app = FastAPI(title="Mental Health Treatment Prediction API")

# Add CORS middleware to allow Streamlit app to make requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow all origins; for production, restrict to specific domain
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Load the saved model
model = joblib.load('final_mental_health_model.joblib')
#model = joblib.load(r'C:\Users\ASHISH\mental_app\final_mental_health_model.joblib')

# Define input data structure
class PredictionInput(BaseModel):
    age: int
    gender: str
    country: str
    company_size: str
    is_tech_company: int
    work_remotely: str
    has_mental_health_benefits: str
    current_disorder: str

    class Config:
        json_schema_extra = {
            "example": {
                "age": 30,
                "gender": "male",
                "country": "United States",
                "company_size": "26-100",
                "is_tech_company": 1,
                "work_remotely": "sometimes",
                "has_mental_health_benefits": "yes",
                "current_disorder": "no"
            }
        }

class PredictionOutput(BaseModel):
    likelihood: float
    prediction: str
    probability: float

@app.get("/")
def home():
    return {"message": "Mental Health Treatment Prediction API", 
            "health_check": "OK"}

@app.post("/predict", response_model=PredictionOutput)
def predict(data: PredictionInput):
    try:
        # Convert input data to DataFrame
        input_df = pd.DataFrame([{
            'age': data.age,
            'gender': data.gender,
            'country': data.country,
            'company_size': data.company_size,
            'is_tech_company': data.is_tech_company,
            'work_remotely': data.work_remotely,
            'has_mental_health_benefits': data.has_mental_health_benefits,
            'current_disorder': data.current_disorder
        }])
        
        # Make prediction
        prediction = model.predict(input_df)[0]
        probability = model.predict_proba(input_df)[0][1]
        
        # Create response
        response = {
            "likelihood": probability,
            "prediction": "Likely to seek treatment" if prediction == 1 else "Unlikely to seek treatment",
            "probability": round(float(probability), 3)
        }
        
        return response

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)