In [1]:
# COMPLETE STUDENT PERFORMANCE ANALYTICS UI
# Copy this entire code into ONE cell and run it

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error, classification_report
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

# Generate sample data
np.random.seed(42)
n_students = 500

data = {
    'StudentID': range(1001, 1001 + n_students),
    'Age': np.random.choice([15, 16, 17, 18], n_students),
    'Gender': np.random.choice([0, 1], n_students),
    'StudyTimeWeekly': np.random.uniform(0, 20, n_students),
    'Absences': np.random.poisson(5, n_students),
    'ParentalSupport': np.random.choice([0, 1, 2, 3, 4], n_students),
    'Tutoring': np.random.choice([0, 1], n_students, p=[0.7, 0.3]),
    'Extracurricular': np.random.choice([0, 1], n_students, p=[0.6, 0.4]),
    'Sports': np.random.choice([0, 1], n_students, p=[0.55, 0.45]),
    'Music': np.random.choice([0, 1], n_students, p=[0.65, 0.35]),
    'Volunteering': np.random.choice([0, 1], n_students, p=[0.7, 0.3])
}

df = pd.DataFrame(data)

# Calculate GPA
df['GPA'] = (2.0 + 
             df['StudyTimeWeekly'] * 0.08 - 
             df['Absences'] * 0.05 + 
             df['ParentalSupport'] * 0.15 +
             df['Tutoring'] * 0.3 +
             (df['Extracurricular'] + df['Sports'] + df['Music'] + df['Volunteering']) * 0.1 +
             np.random.normal(0, 0.3, n_students))
df['GPA'] = df['GPA'].clip(0, 4)

# Calculate Grade Class
df['GradeClass'] = pd.cut(df['GPA'], bins=[0, 1, 2, 3, 4], labels=[3, 2, 1, 0]).astype(float)

# Add total activities
df['TotalActivities'] = df[['Tutoring', 'Extracurricular', 'Sports', 'Music', 'Volunteering']].sum(axis=1)

# Create UI Components

# 1. DATA EXPLORER TAB
def create_data_explorer():
    output = widgets.Output()
    
    with output:
        display(HTML("<h3>üìä Dataset Overview</h3>"))
        display(f"Total Students: {len(df)}")
        display(f"Features: {len(df.columns)}")
        display("\nFirst 5 rows:")
        display(df.head())
        display("\nStatistical Summary:")
        display(df.describe())
    
    return widgets.VBox([
        widgets.HTML("<h2 style='color: #2c3e50;'>üîç Data Explorer</h2>"),
        output
    ])

# 2. VISUALIZATION DASHBOARD
def create_dashboard():
    output = widgets.Output()
    
    with output:
        fig, axes = plt.subplots(2, 3, figsize=(15, 10))
        
        # GPA Distribution
        axes[0,0].hist(df['GPA'], bins=20, edgecolor='black', color='skyblue')
        axes[0,0].set_title('GPA Distribution')
        axes[0,0].set_xlabel('GPA')
        axes[0,0].set_ylabel('Frequency')
        
        # Study Time vs GPA
        axes[0,1].scatter(df['StudyTimeWeekly'], df['GPA'], alpha=0.5, color='green')
        axes[0,1].set_xlabel('Study Time (hours/week)')
        axes[0,1].set_ylabel('GPA')
        axes[0,1].set_title('Study Time vs GPA')
        
        # Absences vs GPA
        axes[0,2].scatter(df['Absences'], df['GPA'], alpha=0.5, color='red')
        axes[0,2].set_xlabel('Number of Absences')
        axes[0,2].set_ylabel('GPA')
        axes[0,2].set_title('Absences vs GPA')
        
        # Parental Support
        support_means = df.groupby('ParentalSupport')['GPA'].mean()
        axes[1,0].bar(range(5), support_means.values, color='purple', edgecolor='black')
        axes[1,0].set_xlabel('Parental Support Level')
        axes[1,0].set_ylabel('Average GPA')
        axes[1,0].set_title('Parental Support vs GPA')
        
        # Activities Impact
        activities = ['Tutoring', 'Sports', 'Music', 'Volunteering']
        impact = []
        for act in activities:
            with_act = df[df[act]==1]['GPA'].mean()
            without_act = df[df[act]==0]['GPA'].mean()
            impact.append(with_act - without_act)
        
        axes[1,1].bar(activities, impact, color='orange', edgecolor='black')
        axes[1,1].set_title('Activity Impact on GPA')
        axes[1,1].set_ylabel('GPA Difference')
        axes[1,1].tick_params(axis='x', rotation=45)
        
        # Age Distribution
        age_counts = df['Age'].value_counts().sort_index()
        axes[1,2].bar(age_counts.index, age_counts.values, color='teal', edgecolor='black')
        axes[1,2].set_xlabel('Age')
        axes[1,2].set_ylabel('Count')
        axes[1,2].set_title('Student Age Distribution')
        
        plt.tight_layout()
        plt.show()
    
    return widgets.VBox([
        widgets.HTML("<h2 style='color: #2c3e50;'>üìà Visualization Dashboard</h2>"),
        output
    ])

# 3. MODEL TRAINER
def create_model_trainer():
    problem_type = widgets.RadioButtons(
        options=['Classification (Grade Class)', 'Regression (GPA)'],
        value='Classification (Grade Class)',
        description='Problem:',
        style={'description_width': 'initial'}
    )
    
    model_type = widgets.Dropdown(
        options=['Random Forest', 'Decision Tree'],
        value='Random Forest',
        description='Model:',
        layout=widgets.Layout(width='300px')
    )
    
    train_button = widgets.Button(
        description='üöÄ Train Model',
        button_style='success',
        layout=widgets.Layout(width='200px')
    )
    
    output = widgets.Output()
    
    def on_train_click(b):
        with output:
            clear_output()
            
            features = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'TotalActivities']
            X = df[features]
            
            if 'Classification' in problem_type.value:
                y = df['GradeClass']
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
                
                scaler = StandardScaler()
                X_train = scaler.fit_transform(X_train)
                X_test = scaler.transform(X_test)
                
                if model_type.value == 'Random Forest':
                    model = RandomForestClassifier(n_estimators=100, random_state=42)
                else:
                    model = DecisionTreeClassifier(max_depth=5, random_state=42)
                
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                print(f"‚úÖ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
                print("\nüìã Classification Report:")
                print(classification_report(y_test, y_pred))
                
            else:
                y = df['GPA']
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
                
                scaler = StandardScaler()
                X_train = scaler.fit_transform(X_train)
                X_test = scaler.transform(X_test)
                
                if model_type.value == 'Random Forest':
                    model = RandomForestRegressor(n_estimators=100, random_state=42)
                else:
                    model = DecisionTreeRegressor(max_depth=5, random_state=42)
                
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                print(f"‚úÖ R¬≤ Score: {r2_score(y_test, y_pred):.4f}")
                print(f"‚úÖ RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
    
    train_button.on_click(on_train_click)
    
    return widgets.VBox([
        widgets.HTML("<h2 style='color: #2c3e50;'>ü§ñ Model Trainer</h2>"),
        problem_type,
        model_type,
        train_button,
        output
    ])

# 4. RISK PREDICTOR
def create_risk_predictor():
    style = {'description_width': 'initial'}
    
    study_time = widgets.FloatSlider(value=10, min=0, max=20, step=0.5, 
                                     description='Study Time (hrs):', style=style,
                                     layout=widgets.Layout(width='500px'))
    
    absences = widgets.IntSlider(value=5, min=0, max=30, 
                                description='Absences:', style=style,
                                layout=widgets.Layout(width='500px'))
    
    support = widgets.IntSlider(value=2, min=0, max=4, 
                               description='Parental Support:', style=style,
                               layout=widgets.Layout(width='500px'))
    
    activities = widgets.IntSlider(value=2, min=0, max=5, 
                                  description='Activities:', style=style,
                                  layout=widgets.Layout(width='500px'))
    
    predict_btn = widgets.Button(description='üéØ Predict Risk', button_style='danger',
                                 layout=widgets.Layout(width='200px'))
    
    output = widgets.Output()
    
    def on_predict(b):
        with output:
            clear_output()
            
            # Train model
            features = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'TotalActivities']
            X = df[features]
            y = df['GPA']
            
            model = RandomForestRegressor(n_estimators=100, random_state=42)
            model.fit(X, y)
            
            # Predict
            new_data = [[study_time.value, absences.value, support.value, activities.value]]
            pred_gpa = model.predict(new_data)[0]
            
            # Risk level
            if pred_gpa >= 3.0:
                risk = "üü¢ LOW RISK"
                color = "green"
                advice = "Student is performing well. Keep up the good work!"
            elif pred_gpa >= 2.0:
                risk = "üü° MEDIUM RISK"
                color = "orange"
                advice = "Student shows some risk factors. Consider additional support."
            else:
                risk = "üî¥ HIGH RISK"
                color = "red"
                advice = "Student needs immediate intervention and academic support."
            
            display(HTML(f"""
            <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 
                        padding: 25px; border-radius: 15px; color: white; margin-top: 20px;">
                <h2 style="text-align: center;">üéì Risk Assessment Result</h2>
                <hr>
                <div style="font-size: 20px; text-align: center;">
                    <p>Predicted GPA: <b>{pred_gpa:.2f}</b></p>
                    <p style="color: {color}; font-size: 24px;"><b>{risk}</b></p>
                    <p style="font-size: 16px;">{advice}</p>
                </div>
            </div>
            """))
    
    predict_btn.on_click(on_predict)
    
    return widgets.VBox([
        widgets.HTML("<h2 style='color: #2c3e50;'>üéØ Student Risk Predictor</h2>"),
        widgets.HTML("<p>Enter student information to predict academic risk:</p>"),
        study_time, absences, support, activities,
        predict_btn,
        output
    ])

# 5. CLUSTERING EXPLORER
def create_clustering():
    n_clusters = widgets.IntSlider(value=4, min=2, max=8, description='Number of Clusters:',
                                   layout=widgets.Layout(width='400px'))
    
    run_btn = widgets.Button(description='üîç Run Clustering', button_style='info',
                            layout=widgets.Layout(width='200px'))
    
    output = widgets.Output()
    
    def on_run_cluster(b):
        with output:
            clear_output()
            
            features = ['StudyTimeWeekly', 'Absences', 'GPA']
            X = df[features]
            
            # Standardize
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            
            # K-Means
            kmeans = KMeans(n_clusters=n_clusters.value, random_state=42, n_init=10)
            clusters = kmeans.fit_predict(X_scaled)
            
            # PCA for visualization
            pca = PCA(n_components=2)
            X_pca = pca.fit_transform(X_scaled)
            
            fig, axes = plt.subplots(1, 2, figsize=(14, 5))
            
            # Cluster visualization
            scatter = axes[0].scatter(X_pca[:,0], X_pca[:,1], c=clusters, cmap='viridis', s=50, alpha=0.6)
            axes[0].set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%})')
            axes[0].set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%})')
            axes[0].set_title('Student Clusters (PCA)')
            plt.colorbar(scatter, ax=axes[0])
            
            # Cluster profiles
            df['Cluster'] = clusters
            cluster_means = df.groupby('Cluster')[features].mean()
            cluster_means.plot(kind='bar', ax=axes[1])
            axes[1].set_title('Cluster Characteristics')
            axes[1].set_xlabel('Cluster')
            axes[1].set_ylabel('Average Value')
            axes[1].legend(loc='upper right')
            axes[1].tick_params(axis='x', rotation=0)
            
            plt.tight_layout()
            plt.show()
            
            display(HTML("<h4>üìä Cluster Profiles:</h4>"))
            display(cluster_means.round(2))
            
            # Cluster sizes
            sizes = df['Cluster'].value_counts().sort_index()
            display(HTML("<h4>üìà Cluster Sizes:</h4>"))
            for i in range(n_clusters.value):
                print(f"Cluster {i}: {sizes[i]} students ({sizes[i]/len(df)*100:.1f}%)")
    
    run_btn.on_click(on_run_cluster)
    
    return widgets.VBox([
        widgets.HTML("<h2 style='color: #2c3e50;'>üî¨ Clustering Explorer</h2>"),
        n_clusters,
        run_btn,
        output
    ])

# 6. CORRELATION ANALYZER
def create_correlation_analyzer():
    output = widgets.Output()
    
    with output:
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        corr_matrix = df[numeric_cols].corr()
        
        plt.figure(figsize=(12, 8))
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
                   square=True, linewidths=1, cbar_kws={"shrink": 0.8})
        plt.title('Feature Correlation Matrix', fontsize=16)
        plt.tight_layout()
        plt.show()
        
        display(HTML("<h4>üìà Top Correlations with GPA:</h4>"))
        gpa_corr = corr_matrix['GPA'].sort_values(ascending=False)
        for col, corr in gpa_corr.items():
            if col != 'GPA':
                print(f"{col}: {corr:.3f}")
    
    return widgets.VBox([
        widgets.HTML("<h2 style='color: #2c3e50;'>üîó Correlation Analyzer</h2>"),
        output
    ])

# Create the main tab interface
main_tab = widgets.Tab()

# Add all components to tabs
main_tab.children = [
    create_data_explorer(),
    create_dashboard(),
    create_correlation_analyzer(),
    create_model_trainer(),
    create_risk_predictor(),
    create_clustering()
]

# Set tab titles
main_tab.set_title(0, 'üîç Data Explorer')
main_tab.set_title(1, 'üìä Dashboard')
main_tab.set_title(2, 'üîó Correlations')
main_tab.set_title(3, 'ü§ñ Model Trainer')
main_tab.set_title(4, 'üéØ Risk Predictor')
main_tab.set_title(5, 'üî¨ Clustering')

# Display header
display(HTML("""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 
            padding: 30px; border-radius: 15px; color: white; margin-bottom: 20px; text-align: center;">
    <h1 style="font-size: 36px; margin: 0;">üéì Student Performance Analytics</h1>
    <p style="font-size: 18px; margin: 10px 0 0 0;">Interactive Data Mining Platform</p>
    <p style="font-size: 14px; margin: 5px 0 0 0;">Dataset: {} students, {} features</p>
</div>
""".format(len(df), len(df.columns))))

# Display the main tab interface
display(main_tab)

# Display footer with instructions
display(HTML("""
<div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; margin-top: 20px;">
    <h3 style="color: #2c3e50;">üìå How to Use:</h3>
    <ul style="font-size: 14px;">
        <li><b>Click on any tab</b> above to access different analysis tools</li>
        <li><b>Interact with sliders and buttons</b> to customize your analysis</li>
        <li><b>Results appear instantly</b> below each control</li>
        <li><b>All visualizations are interactive</b> and update in real-time</li>
    </ul>
    <p style="color: #666; font-size: 12px; text-align: center; margin-top: 10px;">
        Created with ‚ù§Ô∏è using Jupyter, ipywidgets, and scikit-learn
    </p>
</div>
"""))

Tab(children=(VBox(children=(HTML(value="<h2 style='color: #2c3e50;'>üîç Data Explorer</h2>"), Output())), VBox(‚Ä¶