In [2]:
import pandas as pd
import numpy as np
import joblib
import os
import warnings

warnings.filterwarnings('ignore')

def get_model_path(directory='evaluation_results/models'):
    """Finds the saved model file"""
    if not os.path.exists(directory): return None
    files = [f for f in os.listdir(directory) if f.endswith('.pkl')]
    if not files: return None
    return os.path.join(directory, files[0])

def create_binary_test_cases(csv_path):
    """
    Creates 2 specific rows of data:
    1. A High Risk Student (Target = 1)
    2. A Low Risk Student (Target = 0)
    """
    # 1. Get the column structure
    try:
        df_structure = pd.read_csv(csv_path, nrows=0)
        if 'Depression' in df_structure.columns:
            df_structure = df_structure.drop(columns=['Depression'])
        columns = df_structure.columns
    except:
        print("‚ùå Error: Could not read preprocessed data file.")
        return None

    # 2. Create an empty DataFrame with those columns
    test_df = pd.DataFrame(np.zeros((2, len(columns))), columns=columns)

    # ======================================================
    # CASE 1: THE "DEPRESSED" PROFILE (Row 0)
    # We use roughly 2.0 for High (2 Std Dev above mean)
    # and -2.0 for Low (2 Std Dev below mean)
    # ======================================================
    test_df.iloc[0]['Academic Pressure'] = 2.5   # Very High Pressure
    test_df.iloc[0]['Work Pressure'] = 2.0       # High Work Pressure
    test_df.iloc[0]['CGPA'] = -2.0               # Very Low Grades
    test_df.iloc[0]['Study Satisfaction'] = -2.0 # Hates Studying
    test_df.iloc[0]['Financial Stress_5'] = 1    # Max Financial Stress (One-Hot)
    test_df.iloc[0]['Dietary Habits_Unhealthy'] = 1
    # Check for sleep column existence before setting
    for col in columns:
        if "Less Than 5 Hours" in col: test_df.iloc[0][col] = 1

    # ======================================================
    # CASE 2: THE "HEALTHY" PROFILE (Row 1)
    # ======================================================
    test_df.iloc[1]['Academic Pressure'] = -2.0  # Very Low Pressure
    test_df.iloc[1]['Work Pressure'] = -1.5      # Low Work Pressure
    test_df.iloc[1]['CGPA'] = 2.0                # High Grades
    test_df.iloc[1]['Study Satisfaction'] = 2.0  # Loves Studying
    # No Financial Stress set (defaults to 0)
    # Healthy habits (Unhealthy = 0)
    for col in columns:
        if "7-8 Hours" in col: test_df.iloc[1][col] = 1

    return test_df

# ==========================================
# MAIN EXECUTION
# ==========================================
if __name__ == "__main__":

    DATA_PATH = "data/processed/preprocessed_data.csv"
    MODEL_PATH = get_model_path()

    print("\n" + "="*60)
    print("üß™ BINARY PREDICTION TEST (0 vs 1)")
    print("="*60)

    if not MODEL_PATH:
        print("‚ùå Model not found! Please run the training script first.")
        exit()

    # Load Model
    print(f"üìÇ Loading Model: {os.path.basename(MODEL_PATH)}")
    model = joblib.load(MODEL_PATH)

    # Create Data
    test_data = create_binary_test_cases(DATA_PATH)

    if test_data is not None:
        # Predict
        predictions = model.predict(test_data)
        probabilities = model.predict_proba(test_data)[:, 1] if hasattr(model, 'predict_proba') else [0, 0]

        # ------------------------------------------------------
        # DISPLAY RESULTS
        # ------------------------------------------------------

        print("\n" + "-"*60)
        print("üë§ TEST CASE 1: The 'High Stress' Student")
        print("   (High Pressure, Low Grades, Bad Sleep)")
        print("-"*60)

        result_1 = predictions[0]
        prob_1 = probabilities[0]

        if result_1 == 1:
            print(f"   üî¥ PREDICTION: DEPRESSED (Class 1)")
            print(f"   üìä Confidence: {prob_1:.2%} sure")
            print("   ‚úÖ PASSED (Model correctly identified risk)")
        else:
            print(f"   üü¢ PREDICTION: NOT DEPRESSED (Class 0)")
            print(f"   ‚ùå FAILED (Model missed the risk)")

        print("\n" + "-"*60)
        print("üë§ TEST CASE 2: The 'Happy' Student")
        print("   (Low Pressure, High Grades, Good Sleep)")
        print("-"*60)

        result_2 = predictions[1]
        prob_2 = probabilities[1]

        if result_2 == 0:
            print(f"   üü¢ PREDICTION: NOT DEPRESSED (Class 0)")
            print(f"   üìä Confidence: {(1-prob_2):.2%} sure")
            print("   ‚úÖ PASSED (Model correctly identified health)")
        else:
            print(f"   üî¥ PREDICTION: DEPRESSED (Class 1)")
            print(f"   ‚ùå FAILED (Model false alarm)")

        print("\n" + "="*60)


üß™ BINARY PREDICTION TEST (0 vs 1)
üìÇ Loading Model: best_model.pkl

------------------------------------------------------------
üë§ TEST CASE 1: The 'High Stress' Student
   (High Pressure, Low Grades, Bad Sleep)
------------------------------------------------------------
   üî¥ PREDICTION: DEPRESSED (Class 1)
   üìä Confidence: 99.52% sure
   ‚úÖ PASSED (Model correctly identified risk)

------------------------------------------------------------
üë§ TEST CASE 2: The 'Happy' Student
   (Low Pressure, High Grades, Good Sleep)
------------------------------------------------------------
   üü¢ PREDICTION: NOT DEPRESSED (Class 0)
   üìä Confidence: 98.05% sure
   ‚úÖ PASSED (Model correctly identified health)



In [3]:
import pandas as pd
import numpy as np
import joblib
import os
import warnings

warnings.filterwarnings('ignore')

# ==============================================================================
# üìù INPUT DATA HERE
# Modify these values to test different student profiles
# ==============================================================================
INPUT_DATA = {
    # Low Academic Stress
    'Academic Pressure': 2.0,   # Low pressure
    'Work Pressure':     1.0,   # Low pressure
    'Study Satisfaction': 4.0,  # Likes studying

    # THE MAJOR STRESSOR
    'Financial Stress':  5.0,   # Maximum Debt/Financial Worry

    # Average Stats
    'CGPA':              7.5,   # Average grades
    'Work/Study Hours':  6.0,   # Normal hours
    'Age':               23.0,

    # Good Habits
    'Sleep Quality':     'Good', # Sleeping well
    'Dietary Habits':    'Good'  # Eating well
}
# ==============================================================================

# Scaling constants (approximate from training data)
SCALING_STATS = {
    'Academic Pressure': {'mean': 3.0, 'std': 1.2},
    'Work Pressure':     {'mean': 2.5, 'std': 1.5},
    'CGPA':              {'mean': 7.5, 'std': 1.5},
    'Study Satisfaction':{'mean': 3.0, 'std': 1.2},
    'Work/Study Hours':  {'mean': 6.0, 'std': 3.0},
    'Age':               {'mean': 21.0, 'std': 2.5}
}

def load_model_and_columns():
    """Finds the model and gets the column structure"""
    base_dir = 'evaluation_results/models'

    # 1. Find Model
    if not os.path.exists(base_dir):
        print(f"‚ùå Error: Directory '{base_dir}' not found.")
        return None, None

    files = [f for f in os.listdir(base_dir) if f.endswith('.pkl')]
    if not files:
        print("‚ùå Error: No .pkl model found.")
        return None, None

    model_path = os.path.join(base_dir, files[0])
    model = joblib.load(model_path)
    print(f"‚úÖ Loaded Model: {files[0]}")

    # 2. Get Columns from preprocessed data
    try:
        df = pd.read_csv("data/processed/preprocessed_data.csv", nrows=0)
        if 'Depression' in df.columns:
            cols = df.drop(columns=['Depression']).columns
        else:
            cols = df.columns
        return model, cols
    except:
        print("‚ùå Error: Could not read column structure from CSV.")
        return None, None

def process_and_predict(input_dict, model, columns):
    """Converts simple input into the complex format the model needs"""

    # 1. Create a row of zeros (blank template)
    data = pd.DataFrame(np.zeros((1, len(columns))), columns=columns)

    # 2. Scale Numeric Values (Z-Score Scaling)
    # The model expects numbers like -1.2, not 5.0
    for key, stats in SCALING_STATS.items():
        if key in input_dict and key in data.columns:
            raw_val = input_dict[key]
            scaled_val = (raw_val - stats['mean']) / stats['std']
            data.iloc[0][key] = scaled_val

    # 3. Handle Categorical / One-Hot Encoding
    # Sleep
    if input_dict['Sleep Quality'] == 'Bad':
        # Find the column like "Sleep Duration_'Less Than 5 Hours'"
        col = [c for c in columns if "Less Than 5" in c]
        if col: data.iloc[0][col[0]] = 1
    else:
        col = [c for c in columns if "7-8" in c]
        if col: data.iloc[0][col[0]] = 1

    # Diet
    if input_dict['Dietary Habits'] == 'Bad':
        col = [c for c in columns if "Unhealthy" in c]
        if col: data.iloc[0][col[0]] = 1

    # Financial Stress (Map 1-5 to specific column)
    f_stress = int(input_dict['Financial Stress'])
    f_col = [c for c in columns if f"Financial Stress_{f_stress}" in c]
    if f_col: data.iloc[0][f_col[0]] = 1

    # 4. Predict
    prediction = model.predict(data)[0]
    prob = model.predict_proba(data)[0][1] if hasattr(model, 'predict_proba') else 0.0

    return prediction, prob

if __name__ == "__main__":
    print("\n" + "="*60)
    print("üß™ TESTING WITH CUSTOM INPUT DATA")
    print("="*60)

    # Load
    model, cols = load_model_and_columns()

    if model:
        print("-" * 60)
        print(f"INPUTS:")
        print(f"  ‚Ä¢ GPA: {INPUT_DATA['CGPA']}")
        print(f"  ‚Ä¢ Pressure: {INPUT_DATA['Academic Pressure']}")
        print(f"  ‚Ä¢ Sleep: {INPUT_DATA['Sleep Quality']}")
        print("-" * 60)

        # Run
        pred, prob = process_and_predict(INPUT_DATA, model, cols)

        # Result
        print("\nRESULT:")
        if pred == 1:
            print(f"  üî¥ DEPRESSION PREDICTED")
            print(f"  üìä Probability: {prob:.2%}")
        else:
            print(f"  üü¢ NOT DEPRESSED")
            print(f"  üìä Probability: {(1-prob):.2%} (Healthy)")

        print("\n" + "="*60)


üß™ TESTING WITH CUSTOM INPUT DATA
‚úÖ Loaded Model: best_tuned_model_Gradient_Boosting.pkl
------------------------------------------------------------
INPUTS:
  ‚Ä¢ GPA: 7.5
  ‚Ä¢ Pressure: 2.0
  ‚Ä¢ Sleep: Good
------------------------------------------------------------

RESULT:
  üî¥ DEPRESSION PREDICTED
  üìä Probability: 53.50%



In [6]:
# ==============================================================================
# üìù INPUT DATA: HEALTHY STUDENT PROFILE
# ==============================================================================
INPUT_DATA = {
    # Low Stress, High Performance
    'Academic Pressure': 1.0,   # Low pressure
    'Work Pressure':     0.0,   # No work pressure
    'Study Satisfaction': 5.0,  # Loves studying
    'Financial Stress':  1.0,   # No financial stress

    # Good Stats
    'CGPA':              9.8,   # High grades
    'Work/Study Hours':  6.0,   # Balanced hours
    'Age':               22.0,

    # Good Habits
    'Sleep Quality':     'Good', # 7-8 hours
    'Dietary Habits':    'Good'  # Healthy food
}
# ==============================================================================