In [1]:
# app.py - Student Performance Analytics API

from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io
import base64
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error, classification_report
from sklearn.decomposition import PCA
import json
import warnings
warnings.filterwarnings('ignore')

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Global variables to store data and models
student_data = None
trained_models = {}

# ============================================
# Helper Functions
# ============================================

def generate_sample_data(n_students=500):
    """Generate sample student data"""
    np.random.seed(42)
    
    data = {
        'StudentID': range(1001, 1001 + n_students),
        'Age': np.random.choice([15, 16, 17, 18], n_students),
        'Gender': np.random.choice([0, 1], n_students),
        'Ethnicity': np.random.choice([0, 1, 2, 3], n_students),
        'ParentalEducation': np.random.choice([0, 1, 2, 3, 4], n_students),
        'StudyTimeWeekly': np.random.uniform(0, 20, n_students),
        'Absences': np.random.poisson(5, n_students),
        'Tutoring': np.random.choice([0, 1], n_students, p=[0.7, 0.3]),
        'ParentalSupport': np.random.choice([0, 1, 2, 3, 4], n_students),
        'Extracurricular': np.random.choice([0, 1], n_students, p=[0.6, 0.4]),
        'Sports': np.random.choice([0, 1], n_students, p=[0.55, 0.45]),
        'Music': np.random.choice([0, 1], n_students, p=[0.65, 0.35]),
        'Volunteering': np.random.choice([0, 1], n_students, p=[0.7, 0.3])
    }
    
    df = pd.DataFrame(data)
    
    # Calculate GPA
    df['GPA'] = (2.0 + 
                 df['StudyTimeWeekly'] * 0.08 - 
                 df['Absences'] * 0.05 + 
                 df['ParentalSupport'] * 0.15 +
                 df['Tutoring'] * 0.3 +
                 (df['Extracurricular'] + df['Sports'] + df['Music'] + df['Volunteering']) * 0.1 +
                 np.random.normal(0, 0.3, n_students))
    df['GPA'] = df['GPA'].clip(0, 4)
    
    # Calculate Grade Class
    df['GradeClass'] = pd.cut(df['GPA'], bins=[0, 1, 2, 3, 4], labels=[3, 2, 1, 0]).astype(float)
    
    # Add total activities
    df['TotalActivities'] = df[['Tutoring', 'Extracurricular', 'Sports', 'Music', 'Volunteering']].sum(axis=1)
    
    return df

def fig_to_base64(fig):
    """Convert matplotlib figure to base64 string"""
    buf = io.BytesIO()
    fig.savefig(buf, format='png', bbox_inches='tight', dpi=100)
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_str

def prepare_features(df, features_list):
    """Prepare features for modeling"""
    X = df[features_list].copy()
    
    # Handle missing values
    X = X.fillna(X.mean())
    
    return X

# ============================================
# API Routes - Data Management
# ============================================

@app.route('/', methods=['GET'])
def home():
    """API home endpoint"""
    return jsonify({
        'name': 'Student Performance Analytics API',
        'version': '1.0',
        'endpoints': {
            '/data/upload': 'POST - Upload student data CSV',
            '/data/generate': 'POST - Generate sample data',
            '/data/summary': 'GET - Get data summary',
            '/data/statistics': 'GET - Get statistical summary',
            '/visualizations/dashboard': 'GET - Get dashboard visualizations',
            '/visualizations/correlation': 'GET - Get correlation matrix',
            '/analysis/feature-importance': 'POST - Get feature importance',
            '/predict/risk': 'POST - Predict student risk',
            '/predict/gpa': 'POST - Predict GPA',
            '/predict/grade': 'POST - Predict grade class',
            '/cluster/students': 'POST - Cluster students',
            '/models/train': 'POST - Train ML model',
            '/models/list': 'GET - List trained models'
        }
    })

@app.route('/data/upload', methods=['POST'])
def upload_data():
    """Upload student data CSV file"""
    global student_data
    
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400
    
    file = request.files['file']
    
    if file.filename == '':
        return jsonify({'error': 'No file selected'}), 400
    
    try:
        student_data = pd.read_csv(file)
        
        # Add derived features
        if 'GPA' in student_data.columns:
            if 'TotalActivities' not in student_data.columns:
                activity_cols = [c for c in ['Tutoring', 'Extracurricular', 'Sports', 'Music', 'Volunteering'] 
                               if c in student_data.columns]
                if activity_cols:
                    student_data['TotalActivities'] = student_data[activity_cols].sum(axis=1)
        
        return jsonify({
            'message': 'Data uploaded successfully',
            'shape': student_data.shape,
            'columns': student_data.columns.tolist()
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 400

@app.route('/data/generate', methods=['POST'])
def generate_data():
    """Generate sample student data"""
    global student_data
    
    n_students = request.json.get('n_students', 500) if request.json else 500
    student_data = generate_sample_data(n_students)
    
    return jsonify({
        'message': f'Sample data generated with {n_students} students',
        'shape': student_data.shape,
        'columns': student_data.columns.tolist()
    })

@app.route('/data/summary', methods=['GET'])
def data_summary():
    """Get basic data summary"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded. Please upload or generate data first.'}), 400
    
    summary = {
        'total_students': len(student_data),
        'total_features': len(student_data.columns),
        'columns': student_data.columns.tolist(),
        'data_types': student_data.dtypes.astype(str).to_dict(),
        'missing_values': student_data.isnull().sum().to_dict(),
        'sample_records': student_data.head(5).to_dict('records')
    }
    
    return jsonify(summary)

@app.route('/data/statistics', methods=['GET'])
def data_statistics():
    """Get statistical summary"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    # Get numeric columns only
    numeric_cols = student_data.select_dtypes(include=[np.number]).columns
    stats = student_data[numeric_cols].describe().to_dict()
    
    return jsonify(stats)

# ============================================
# API Routes - Visualizations
# ============================================

@app.route('/visualizations/dashboard', methods=['GET'])
def get_dashboard():
    """Get dashboard visualizations as base64 images"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    visualizations = {}
    
    # 1. GPA Distribution
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.hist(student_data['GPA'], bins=20, edgecolor='black', color='skyblue', alpha=0.7)
    ax.set_title('GPA Distribution')
    ax.set_xlabel('GPA')
    ax.set_ylabel('Frequency')
    visualizations['gpa_distribution'] = fig_to_base64(fig)
    
    # 2. Study Time vs GPA
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(student_data['StudyTimeWeekly'], student_data['GPA'], alpha=0.5, color='green')
    ax.set_xlabel('Study Time (hours/week)')
    ax.set_ylabel('GPA')
    ax.set_title('Study Time vs GPA')
    visualizations['study_vs_gpa'] = fig_to_base64(fig)
    
    # 3. Absences vs GPA
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(student_data['Absences'], student_data['GPA'], alpha=0.5, color='red')
    ax.set_xlabel('Number of Absences')
    ax.set_ylabel('GPA')
    ax.set_title('Absences vs GPA')
    visualizations['absences_vs_gpa'] = fig_to_base64(fig)
    
    # 4. Parental Support Impact
    fig, ax = plt.subplots(figsize=(8, 6))
    support_means = student_data.groupby('ParentalSupport')['GPA'].mean()
    ax.bar(range(len(support_means)), support_means.values, color='purple', edgecolor='black')
    ax.set_xlabel('Parental Support Level')
    ax.set_ylabel('Average GPA')
    ax.set_title('Parental Support vs GPA')
    visualizations['parental_support'] = fig_to_base64(fig)
    
    return jsonify(visualizations)

@app.route('/visualizations/correlation', methods=['GET'])
def get_correlation():
    """Get correlation matrix heatmap"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    numeric_cols = student_data.select_dtypes(include=[np.number]).columns
    corr_matrix = student_data[numeric_cols].corr()
    
    # Create heatmap
    fig, ax = plt.subplots(figsize=(12, 10))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
                square=True, linewidths=1, cbar_kws={"shrink": 0.8}, ax=ax)
    ax.set_title('Feature Correlation Matrix')
    
    # Get correlations with GPA
    gpa_corr = {}
    if 'GPA' in corr_matrix.columns:
        gpa_corr = corr_matrix['GPA'].drop('GPA').sort_values(ascending=False).to_dict()
    
    return jsonify({
        'correlation_matrix': corr_matrix.to_dict(),
        'correlation_heatmap': fig_to_base64(fig),
        'gpa_correlations': gpa_corr
    })

# ============================================
# API Routes - Predictions
# ============================================

@app.route('/predict/risk', methods=['POST'])
def predict_risk():
    """Predict student risk level"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    data = request.json
    
    # Extract features
    features = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'TotalActivities']
    
    # Prepare input
    input_data = pd.DataFrame([{
        'StudyTimeWeekly': data.get('study_time', 10),
        'Absences': data.get('absences', 5),
        'ParentalSupport': data.get('parental_support', 2),
        'TotalActivities': data.get('activities', 2)
    }])
    
    # Train model
    X = student_data[features]
    y = student_data['GPA']
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    # Predict
    pred_gpa = model.predict(input_data)[0]
    
    # Determine risk
    if pred_gpa >= 3.0:
        risk_level = 'Low Risk'
        risk_color = 'green'
        recommendation = 'Student is performing well. Keep up the good work!'
    elif pred_gpa >= 2.0:
        risk_level = 'Medium Risk'
        risk_color = 'orange'
        recommendation = 'Student shows some risk factors. Consider additional support.'
    else:
        risk_level = 'High Risk'
        risk_color = 'red'
        recommendation = 'Student needs immediate intervention and academic support.'
    
    return jsonify({
        'predicted_gpa': round(float(pred_gpa), 2),
        'risk_level': risk_level,
        'risk_color': risk_color,
        'recommendation': recommendation,
        'input_features': input_data.to_dict('records')[0]
    })

@app.route('/predict/gpa', methods=['POST'])
def predict_gpa():
    """Predict GPA based on student features"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    data = request.json
    
    # Define features
    feature_cols = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'Tutoring', 
                   'Extracurricular', 'Sports', 'Music', 'Volunteering']
    
    # Prepare input
    input_data = {}
    for col in feature_cols:
        input_data[col] = data.get(col.lower(), 0)
    
    input_df = pd.DataFrame([input_data])
    
    # Train model
    X = student_data[feature_cols]
    y = student_data['GPA']
    
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    # Predict
    pred_gpa = model.predict(input_df)[0]
    
    # Get confidence interval (simplified)
    predictions = []
    for estimator in model.estimators_:
        predictions.append(estimator.predict(input_df)[0])
    
    confidence_lower = np.percentile(predictions, 2.5)
    confidence_upper = np.percentile(predictions, 97.5)
    
    return jsonify({
        'predicted_gpa': round(float(pred_gpa), 3),
        'confidence_interval': {
            'lower': round(float(confidence_lower), 3),
            'upper': round(float(confidence_upper), 3)
        },
        'input_features': input_data
    })

@app.route('/predict/grade', methods=['POST'])
def predict_grade():
    """Predict grade class"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    data = request.json
    
    # Define features
    feature_cols = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'TotalActivities']
    
    # Prepare input
    input_data = {}
    for col in feature_cols:
        input_data[col] = data.get(col.lower(), 0)
    
    input_df = pd.DataFrame([input_data])
    
    # Train model
    X = student_data[feature_cols]
    y = student_data['GradeClass']
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    # Predict
    pred_class = model.predict(input_df)[0]
    pred_proba = model.predict_proba(input_df)[0].tolist()
    
    # Grade class mapping
    grade_map = {0: 'A (Highest)', 1: 'B', 2: 'C', 3: 'D (Lowest)'}
    
    return jsonify({
        'predicted_grade_class': int(pred_class),
        'grade_label': grade_map.get(int(pred_class), 'Unknown'),
        'prediction_probabilities': pred_proba,
        'input_features': input_data
    })

# ============================================
# API Routes - Analysis
# ============================================

@app.route('/analysis/feature-importance', methods=['POST'])
def feature_importance():
    """Get feature importance for prediction"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    data = request.json
    target = data.get('target', 'GPA')
    
    # Define features
    feature_cols = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'Tutoring', 
                   'Extracurricular', 'Sports', 'Music', 'Volunteering', 'TotalActivities']
    
    X = student_data[feature_cols]
    
    if target == 'GPA':
        y = student_data['GPA']
        model = RandomForestRegressor(n_estimators=100, random_state=42)
    else:
        y = student_data['GradeClass']
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    
    model.fit(X, y)
    
    # Get feature importance
    importance = pd.DataFrame({
        'feature': feature_cols,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    return jsonify({
        'target': target,
        'feature_importance': importance.to_dict('records')
    })

@app.route('/cluster/students', methods=['POST'])
def cluster_students():
    """Perform K-means clustering on students"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    data = request.json
    n_clusters = data.get('n_clusters', 4)
    features = data.get('features', ['StudyTimeWeekly', 'Absences', 'GPA'])
    
    # Prepare data
    X = student_data[features].copy()
    X = X.fillna(X.mean())
    
    # Standardize
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Perform clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    clusters = kmeans.fit_predict(X_scaled)
    
    # Add clusters to data
    student_data['Cluster'] = clusters
    
    # Get cluster profiles
    cluster_profiles = student_data.groupby('Cluster')[features].mean().to_dict()
    
    # Get cluster sizes
    cluster_sizes = student_data['Cluster'].value_counts().sort_index().to_dict()
    
    # PCA for visualization
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)
    
    # Create visualization
    fig, ax = plt.subplots(figsize=(10, 8))
    scatter = ax.scatter(X_pca[:, 0], X_pca[:, 1], c=clusters, cmap='viridis', alpha=0.6, s=50)
    ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%})')
    ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%})')
    ax.set_title(f'Student Clusters (k={n_clusters})')
    plt.colorbar(scatter, ax=ax, label='Cluster')
    
    return jsonify({
        'n_clusters': n_clusters,
        'features_used': features,
        'cluster_profiles': cluster_profiles,
        'cluster_sizes': cluster_sizes,
        'cluster_visualization': fig_to_base64(fig),
        'explained_variance': pca.explained_variance_ratio_.tolist()
    })

# ============================================
# API Routes - Model Management
# ============================================

@app.route('/models/train', methods=['POST'])
def train_model():
    """Train and save a machine learning model"""
    global student_data, trained_models
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    data = request.json
    model_name = data.get('model_name', 'default_model')
    model_type = data.get('model_type', 'random_forest')
    problem_type = data.get('problem_type', 'regression')
    features = data.get('features', ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'TotalActivities'])
    target = data.get('target', 'GPA')
    
    # Prepare data
    X = student_data[features]
    y = student_data[target]
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Select model
    if problem_type == 'regression':
        if model_type == 'random_forest':
            model = RandomForestRegressor(n_estimators=100, random_state=42)
        elif model_type == 'linear':
            model = LinearRegression()
        else:
            model = DecisionTreeRegressor(max_depth=5, random_state=42)
    else:
        if model_type == 'random_forest':
            model = RandomForestClassifier(n_estimators=100, random_state=42)
        elif model_type == 'logistic':
            model = LogisticRegression(max_iter=1000, random_state=42)
        else:
            model = DecisionTreeClassifier(max_depth=5, random_state=42)
    
    # Train model
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Calculate metrics
    if problem_type == 'regression':
        metrics = {
            'r2_score': r2_score(y_test, y_pred),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred))
        }
    else:
        metrics = {
            'accuracy': accuracy_score(y_test, y_pred)
        }
    
    # Save model
    trained_models[model_name] = {
        'model': model,
        'scaler': scaler,
        'features': features,
        'target': target,
        'problem_type': problem_type,
        'model_type': model_type,
        'metrics': metrics,
        'test_size': 0.2
    }
    
    return jsonify({
        'message': f'Model "{model_name}" trained successfully',
        'model_name': model_name,
        'model_type': model_type,
        'problem_type': problem_type,
        'features': features,
        'target': target,
        'metrics': metrics
    })

@app.route('/models/predict/<model_name>', methods=['POST'])
def predict_with_model(model_name):
    """Make predictions using a trained model"""
    global trained_models
    
    if model_name not in trained_models:
        return jsonify({'error': f'Model "{model_name}" not found'}), 404
    
    model_info = trained_models[model_name]
    data = request.json
    
    # Prepare input data
    input_data = {}
    for feature in model_info['features']:
        input_data[feature] = data.get(feature.lower(), 0)
    
    input_df = pd.DataFrame([input_data])
    
    # Scale features
    input_scaled = model_info['scaler'].transform(input_df)
    
    # Make prediction
    prediction = model_info['model'].predict(input_scaled)[0]
    
    # Get prediction probabilities for classification
    probabilities = None
    if model_info['problem_type'] == 'classification':
        if hasattr(model_info['model'], 'predict_proba'):
            probabilities = model_info['model'].predict_proba(input_scaled)[0].tolist()
    
    return jsonify({
        'model_name': model_name,
        'prediction': float(prediction) if isinstance(prediction, (int, float)) else str(prediction),
        'probabilities': probabilities,
        'input_features': input_data
    })

@app.route('/models/list', methods=['GET'])
def list_models():
    """List all trained models"""
    global trained_models
    
    models = []
    for name, info in trained_models.items():
        models.append({
            'name': name,
            'model_type': info['model_type'],
            'problem_type': info['problem_type'],
            'features': info['features'],
            'target': info['target'],
            'metrics': info['metrics']
        })
    
    return jsonify({
        'total_models': len(models),
        'models': models
    })

# ============================================
# API Routes - Export
# ============================================

@app.route('/export/data', methods=['GET'])
def export_data():
    """Export processed data as CSV"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    # Create CSV in memory
    csv_buffer = io.StringIO()
    student_data.to_csv(csv_buffer, index=False)
    csv_buffer.seek(0)
    
    return send_file(
        io.BytesIO(csv_buffer.getvalue().encode()),
        mimetype='text/csv',
        as_attachment=True,
        download_name='student_performance_data.csv'
    )

@app.route('/export/report', methods=['GET'])
def export_report():
    """Generate and export analysis report"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    # Create report
    report = []
    report.append("="*60)
    report.append("STUDENT PERFORMANCE ANALYSIS REPORT")
    report.append("="*60)
    report.append("")
    
    report.append(f"Total Students: {len(student_data)}")
    report.append(f"Average GPA: {student_data['GPA'].mean():.3f}")
    report.append(f"Median Study Time: {student_data['StudyTimeWeekly'].median():.2f} hours")
    report.append(f"Average Absences: {student_data['Absences'].mean():.2f}")
    report.append("")
    
    report.append("GPA Distribution:")
    gpa_cats = [
        ('Good (3.0-4.0)', student_data[student_data['GPA'] >= 3.0]),
        ('Average (2.0-2.99)', student_data[(student_data['GPA'] >= 2.0) & (student_data['GPA'] < 3.0)]),
        ('Below Average (1.0-1.99)', student_data[(student_data['GPA'] >= 1.0) & (student_data['GPA'] < 2.0)]),
        ('Failing (0-0.99)', student_data[student_data['GPA'] < 1.0])
    ]
    
    for cat_name, cat_data in gpa_cats:
        count = len(cat_data)
        percentage = (count / len(student_data)) * 100
        report.append(f"  {cat_name}: {count} students ({percentage:.1f}%)")
    
    report.append("")
    report.append("Key Findings:")
    report.append("  1. Study time is positively correlated with GPA")
    report.append("  2. Absences negatively impact student performance")
    report.append("  3. Parental support significantly improves outcomes")
    report.append("  4. Extracurricular activities have positive effects")
    
    report_text = "\n".join(report)
    
    return send_file(
        io.BytesIO(report_text.encode()),
        mimetype='text/plain',
        as_attachment=True,
        download_name='student_performance_report.txt'
    )

# ============================================
# Run the API
# ============================================

if __name__ == '__main__':
    # Generate sample data on startup
    student_data = generate_sample_data(500)
    print("âœ… Sample data generated with 500 students")
    print("ðŸš€ API running at http://localhost:5000")
    app.run(debug=True, host='0.0.0.0', port=5000)

âœ… Sample data generated with 500 students
ðŸš€ API running at http://localhost:5000
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.1.91:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1