In [10]:
import pandas as pd
import numpy as np

In [None]:
try:
    df = pd.read_csv('../Data/students_v2.csv')
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: students.csv not found. Please ensure Activity 1 is complete.")

Dataset loaded successfully.


In [12]:
# --- HELPER FUNCTIONS FOR NORMALIZATION ---

def normalize_1_to_10(val):
    """Converts 1-10 scale to 0-100"""
    return val * 10

def invert_1_to_10(val):
    """Inverts 1-10 scale (where 10 is bad) to 0-100 (where 100 is good)"""
    return (10 - val) * 10

def normalize_sleep(hours):
    """Normalizes sleep. 8 hours = 100%. Caps at 100%."""
    score = (hours / 8.0) * 100
    return min(score, 100.0)

In [13]:
# --- SCORING FUNCTIONS ---

def calculate_aps(row):
    # APS = 60% GPA + 20% Assignments + 20% Attendance
    gpa_norm = normalize_1_to_10(row['gpa'])
    aps = (gpa_norm * 0.60) + \
          (row['assignments_completion'] * 0.20) + \
          (row['attendance'] * 0.20)
    return round(aps, 1)

def calculate_wws(row):
    # WWS = 40% Wellbeing + 30% Sleep + 30% Low Stress
    wellbeing_norm = normalize_1_to_10(row['mental_wellbeing'])
    stress_score = invert_1_to_10(row['stress_level'])
    sleep_score = normalize_sleep(row['sleep_hours'])
    
    wws = (wellbeing_norm * 0.40) + \
          (sleep_score * 0.30) + \
          (stress_score * 0.30)
    return round(wws, 1)

def calculate_ptms(row):
    # PTMS = 50% Productivity + 30% Low Distractions + 20% Platform Engagement
    prod_norm = normalize_1_to_10(row['productivity_score'])
    distraction_score = invert_1_to_10(row['distractions'])
    
    ptms = (prod_norm * 0.50) + \
           (distraction_score * 0.30) + \
           (row['engagement_score'] * 0.20)
    return round(ptms, 1)

def calculate_crs(row):
    # CRS = 50% Career Clarity + 50% Skill Readiness
    clarity_norm = normalize_1_to_10(row['career_clarity'])
    skill_norm = normalize_1_to_10(row['skill_readiness'])
    
    crs = (clarity_norm * 0.50) + (skill_norm * 0.50)
    return round(crs, 1)

In [14]:
# --- APPLY LOGIC ---

# 1. Calculate Individual Component Scores
df['APS'] = df.apply(calculate_aps, axis=1)
df['WWS'] = df.apply(calculate_wws, axis=1)
df['PTMS'] = df.apply(calculate_ptms, axis=1)
df['CRS'] = df.apply(calculate_crs, axis=1)

# 2. Calculate Master Index (SRI)
# Formula: 0.3 APS + 0.25 WWS + 0.20 PTMS + 0.25 CRS
df['SRI'] = (df['APS'] * 0.30) + \
            (df['WWS'] * 0.25) + \
            (df['PTMS'] * 0.20) + \
            (df['CRS'] * 0.25)
df['SRI'] = df['SRI'].round(1)

# 3. Categorize (Green/Blue/Yellow/Red)
def classify_student(sri):
    if sri >= 80: return 'Green'   # Excellent
    elif sri >= 60: return 'Blue'  # Good
    elif sri >= 40: return 'Yellow' # Warning
    else: return 'Red'             # Critical

df['Risk_Category'] = df['SRI'].apply(classify_student)

In [16]:
# --- VALIDATION & EXPORT ---

# Preview specific cases to validate logic
print("\n--- Validation: High Stress / Low Output (Burnout Case) ---")
print(df[df['stress_level'] >= 8][['student_id', 'gpa', 'stress_level', 'APS', 'WWS', 'SRI', 'Risk_Category']].head(3))

print("\n--- Validation: High GPA / Low Career Clarity (Drifter Case) ---")
print(df[(df['gpa'] > 8.5) & (df['career_clarity'] < 4)][['student_id', 'gpa', 'career_clarity', 'APS', 'CRS', 'SRI', 'Risk_Category']].head(3))

# Save Updated Dataset
df.to_csv('students_with_scores_v2.csv', index=False)
print("\nSuccess: 'students_with_scores.csv' has been generated.")


--- Validation: High Stress / Low Output (Burnout Case) ---
  student_id   gpa  stress_level   APS   WWS   SRI Risk_Category
1       S002  2.29             9  27.1  22.4  20.8           Red
7       S008  5.25             8  57.9  50.6  52.6        Yellow
9       S010  5.20             9  54.3  42.2  45.4        Yellow

--- Validation: High GPA / Low Career Clarity (Drifter Case) ---
Empty DataFrame
Columns: [student_id, gpa, career_clarity, APS, CRS, SRI, Risk_Category]
Index: []

Success: 'students_with_scores.csv' has been generated.
