# Shapash Visualization for Early Warning System

This notebook demonstrates how to use the Shapash library to create explainable AI visualizations for the student attrition prediction model.

In [None]:
# Install Shapash if not already installed
!pip install shapash[report]


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from shapash.explainer.smart_explainer import SmartExplainer

# Load dataset
df = pd.read_csv('CS101_Student_Behavior.csv')

# Convert stringified lists/dicts back to Python objects
df['Weekly_Logins'] = df['Weekly_Logins'].apply(ast.literal_eval)
df['Weekly_Attendance'] = df['Weekly_Attendance'].apply(ast.literal_eval)
df['Assignments'] = df['Assignments'].apply(ast.literal_eval)
df['Quizzes'] = df['Quizzes'].apply(ast.literal_eval)
df['Programming_Projects'] = df['Programming_Projects'].apply(ast.literal_eval)

# Feature engineering function (simplified for demonstration)
def calculate_features(df_row):
    grade_map = {'A': 4, 'B': 3, 'C': 2, 'D': 1, 'E': 0.5, 'F': 0}
    attendance = np.mean(df_row['Weekly_Attendance'])
    logins = np.mean(df_row['Weekly_Logins'])
    assignments_on_time = sum(1 for a in df_row['Assignments'] if a['Submission_Status'] == 'On Time') / len(df_row['Assignments'])
    avg_quiz_grade = np.mean([grade_map[g] for g in df_row['Quizzes']])
    return pd.Series({
        'Avg_Attendance': attendance,
        'Avg_Logins': logins,
        'Pct_OnTime_Assignments': assignments_on_time,
        'Avg_Quiz_Grade': avg_quiz_grade
    })

# Apply feature engineering
features_df = df.apply(calculate_features, axis=1)

# Prepare final dataset
X = pd.concat([df[['Gender', 'Ethnicity', 'Scholarship']], features_df], axis=1)
y = df['Label']

# Encode categorical features
categorical_features = ['Gender', 'Ethnicity', 'Scholarship']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

X_processed = preprocessor.fit_transform(X)

# Encode target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train a RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_processed, y_encoded)

# Prepare features dict for Shapash
features_dict = {
    'Gender': ['Male', 'Female', 'Non-binary'],
    'Ethnicity': ['Asian', 'Black', 'Hispanic', 'White', 'Other'],
    'Scholarship': ['Yes', 'No'],
    'Avg_Attendance': 'Average Weekly Attendance',
    'Avg_Logins': 'Average Weekly Logins',
    'Pct_OnTime_Assignments': 'Percentage of On-Time Assignments',
    'Avg_Quiz_Grade': 'Average Quiz Grade'
}

# Create SmartExplainer
xpl = SmartExplainer(features_dict=features_dict, preprocessing=preprocessor)

# Compile explainer
xpl.compile(
    x=X,
    model=model,
    y_pred=model.predict(X_processed),
    y_target=y_encoded
)

# Run Shapash web app
app = xpl.run_app(title_story='Early Warning System - Student Attrition Prediction')
