# Expanded Relapse Risk Scoring
This notebook implements an evidence-based relapse risk scoring function.

In [None]:
import pandas as pd

# Assumes df_clean has already been created from previous steps
# Example:
# df_clean = df_selected.dropna()

In [None]:
# Define relapse risk scoring function
def assign_risk(row):
    score = 0

    # 1. Physical Health (scale: 1 = very poor, 5 = excellent)
    health = row['HEALTH']
    if health <= 1:
        score += 3
    elif health == 2:
        score += 2
    elif health == 3:
        score += 1
    elif health == 4:
        score += 0
    elif health == 5:
        score -= 1

    # 2. Treatment Engagement (BKDRUG and/or CADRKDRUG)
    bkdrug = row['BKDRUG']
    cadrkdrug = row['CADRKDRUG']
    if bkdrug == 0 and cadrkdrug == 0:
        score += 3
    elif bkdrug == 1 or cadrkdrug == 1:
        score -= 2

    # 3. Recovery Status
    if row['RCVYSUBPRB'] == 0:
        score += 2
    else:
        score -= 2

    # 4. Mental Health (1 = very poor, 5 = excellent)
    if 'MENTAL_HEALTH' in row:
        mental = row['MENTAL_HEALTH']
        if mental <= 2:
            score += 2
        elif mental == 3:
            score += 1
        elif mental >= 4:
            score -= 1

    # 5. Stress Level (1 = low, 5 = high)
    if 'STRESS_LEVEL' in row:
        stress = row['STRESS_LEVEL']
        if stress >= 4:
            score += 2
        elif stress == 3:
            score += 1
        elif stress <= 2:
            score -= 1

    # 6. Social Support (1 = has support, 0 = isolated)
    if 'SOCIAL_SUPPORT' in row:
        if row['SOCIAL_SUPPORT'] == 0:
            score += 2
        elif row['SOCIAL_SUPPORT'] == 1:
            score -= 1

    # 7. Employment Status (1 = employed, 0 = unemployed)
    if 'EMPLOYMENT_STATUS' in row:
        if row['EMPLOYMENT_STATUS'] == 0:
            score += 1
        elif row['EMPLOYMENT_STATUS'] == 1:
            score -= 1

    # 8. Final label
    if score >= 6:
        return 'High'
    elif score >= 3:
        return 'Medium'
    else:
        return 'Low'

In [None]:
# Apply scoring function to DataFrame
df_clean['relapse_risk'] = df_clean.apply(assign_risk, axis=1)

In [None]:
# Preview results
df_clean[['HEALTH', 'BKDRUG', 'CADRKDRUG', 'RCVYSUBPRB',
          'MENTAL_HEALTH', 'STRESS_LEVEL', 'SOCIAL_SUPPORT',
          'EMPLOYMENT_STATUS', 'relapse_risk']].head(10)