In [1]:
# app.py - Student Performance Analytics API (FIXED VERSION)

from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt
import seaborn as sns
import io
import base64
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error
from sklearn.decomposition import PCA
import json
import warnings
import os
import sys
warnings.filterwarnings('ignore')

# Print startup message
print("="*60)
print("STUDENT PERFORMANCE ANALYTICS API")
print("="*60)
print(f"Python version: {sys.version}")
print(f"Flask version: {Flask.__version__}")

# Initialize Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Global variables to store data and models
student_data = None
trained_models = {}

# ============================================
# Helper Functions
# ============================================

def generate_sample_data(n_students=500):
    """Generate sample student data"""
    np.random.seed(42)
    
    data = {
        'StudentID': range(1001, 1001 + n_students),
        'Age': np.random.choice([15, 16, 17, 18], n_students),
        'Gender': np.random.choice([0, 1], n_students),
        'Ethnicity': np.random.choice([0, 1, 2, 3], n_students),
        'ParentalEducation': np.random.choice([0, 1, 2, 3, 4], n_students),
        'StudyTimeWeekly': np.random.uniform(0, 20, n_students),
        'Absences': np.random.poisson(5, n_students),
        'Tutoring': np.random.choice([0, 1], n_students, p=[0.7, 0.3]),
        'ParentalSupport': np.random.choice([0, 1, 2, 3, 4], n_students),
        'Extracurricular': np.random.choice([0, 1], n_students, p=[0.6, 0.4]),
        'Sports': np.random.choice([0, 1], n_students, p=[0.55, 0.45]),
        'Music': np.random.choice([0, 1], n_students, p=[0.65, 0.35]),
        'Volunteering': np.random.choice([0, 1], n_students, p=[0.7, 0.3])
    }
    
    df = pd.DataFrame(data)
    
    # Calculate GPA
    df['GPA'] = (2.0 + 
                 df['StudyTimeWeekly'] * 0.08 - 
                 df['Absences'] * 0.05 + 
                 df['ParentalSupport'] * 0.15 +
                 df['Tutoring'] * 0.3 +
                 (df['Extracurricular'] + df['Sports'] + df['Music'] + df['Volunteering']) * 0.1 +
                 np.random.normal(0, 0.3, n_students))
    df['GPA'] = df['GPA'].clip(0, 4)
    
    # Calculate Grade Class
    df['GradeClass'] = pd.cut(df['GPA'], bins=[0, 1, 2, 3, 4], labels=[3, 2, 1, 0]).astype(float)
    
    # Add total activities
    df['TotalActivities'] = df[['Tutoring', 'Extracurricular', 'Sports', 'Music', 'Volunteering']].sum(axis=1)
    
    return df

def fig_to_base64(fig):
    """Convert matplotlib figure to base64 string"""
    buf = io.BytesIO()
    fig.savefig(buf, format='png', bbox_inches='tight', dpi=100)
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_str

# ============================================
# API Routes
# ============================================

@app.route('/', methods=['GET'])
def home():
    """API home endpoint"""
    return jsonify({
        'name': 'Student Performance Analytics API',
        'version': '1.0',
        'status': 'running',
        'data_loaded': student_data is not None,
        'students_count': len(student_data) if student_data is not None else 0,
        'endpoints': {
            'GET /': 'API information',
            'GET /health': 'Health check',
            'POST /data/generate': 'Generate sample data',
            'GET /data/summary': 'Get data summary',
            'POST /predict/risk': 'Predict student risk',
            'POST /predict/gpa': 'Predict GPA',
            'GET /visualizations/dashboard': 'Get dashboard images',
            'POST /cluster/students': 'Cluster students'
        }
    })

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'server': 'running',
        'data_loaded': student_data is not None
    })

@app.route('/data/generate', methods=['POST'])
def generate_data():
    """Generate sample student data"""
    global student_data
    
    try:
        data = request.get_json() if request.is_json else {}
        n_students = data.get('n_students', 500) if data else 500
        
        student_data = generate_sample_data(n_students)
        
        return jsonify({
            'success': True,
            'message': f'Sample data generated with {n_students} students',
            'shape': list(student_data.shape),
            'columns': student_data.columns.tolist()
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/data/summary', methods=['GET'])
def data_summary():
    """Get basic data summary"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded. Please generate data first.'}), 400
    
    try:
        summary = {
            'total_students': len(student_data),
            'total_features': len(student_data.columns),
            'columns': student_data.columns.tolist(),
            'data_types': {k: str(v) for k, v in student_data.dtypes.to_dict().items()},
            'missing_values': student_data.isnull().sum().to_dict(),
            'sample_records': student_data.head(5).to_dict('records')
        }
        
        return jsonify(summary)
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/predict/risk', methods=['POST'])
def predict_risk():
    """Predict student risk level"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    try:
        data = request.get_json()
        if not data:
            return jsonify({'error': 'No data provided'}), 400
        
        # Extract features with defaults
        features = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'TotalActivities']
        
        # Prepare input
        input_data = pd.DataFrame([{
            'StudyTimeWeekly': float(data.get('study_time', data.get('StudyTimeWeekly', 10))),
            'Absences': float(data.get('absences', data.get('Absences', 5))),
            'ParentalSupport': float(data.get('parental_support', data.get('ParentalSupport', 2))),
            'TotalActivities': float(data.get('activities', data.get('TotalActivities', 2)))
        }])
        
        # Train model
        X = student_data[features]
        y = student_data['GPA']
        
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X, y)
        
        # Predict
        pred_gpa = float(model.predict(input_data)[0])
        
        # Determine risk
        if pred_gpa >= 3.0:
            risk_level = 'Low Risk'
            risk_color = 'green'
            recommendation = 'Student is performing well. Keep up the good work!'
        elif pred_gpa >= 2.0:
            risk_level = 'Medium Risk'
            risk_color = 'orange'
            recommendation = 'Student shows some risk factors. Consider additional support.'
        else:
            risk_level = 'High Risk'
            risk_color = 'red'
            recommendation = 'Student needs immediate intervention and academic support.'
        
        return jsonify({
            'success': True,
            'predicted_gpa': round(pred_gpa, 2),
            'risk_level': risk_level,
            'risk_color': risk_color,
            'recommendation': recommendation,
            'input_features': input_data.to_dict('records')[0]
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/predict/gpa', methods=['POST'])
def predict_gpa():
    """Predict GPA based on student features"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    try:
        data = request.get_json()
        if not data:
            return jsonify({'error': 'No data provided'}), 400
        
        # Define features
        feature_cols = ['StudyTimeWeekly', 'Absences', 'ParentalSupport', 'Tutoring', 
                       'Extracurricular', 'Sports', 'Music', 'Volunteering']
        
        # Prepare input with defaults
        input_data = {}
        for col in feature_cols:
            input_data[col] = float(data.get(col.lower(), data.get(col, 0)))
        
        input_df = pd.DataFrame([input_data])
        
        # Train model
        X = student_data[feature_cols]
        y = student_data['GPA']
        
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X, y)
        
        # Predict
        pred_gpa = float(model.predict(input_df)[0])
        
        return jsonify({
            'success': True,
            'predicted_gpa': round(pred_gpa, 3),
            'input_features': input_data
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/visualizations/dashboard', methods=['GET'])
def get_dashboard():
    """Get dashboard visualizations as base64 images"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    try:
        visualizations = {}
        
        # 1. GPA Distribution
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.hist(student_data['GPA'], bins=20, edgecolor='black', color='skyblue', alpha=0.7)
        ax.set_title('GPA Distribution')
        ax.set_xlabel('GPA')
        ax.set_ylabel('Frequency')
        visualizations['gpa_distribution'] = fig_to_base64(fig)
        
        # 2. Study Time vs GPA
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.scatter(student_data['StudyTimeWeekly'], student_data['GPA'], alpha=0.5, color='green')
        ax.set_xlabel('Study Time (hours/week)')
        ax.set_ylabel('GPA')
        ax.set_title('Study Time vs GPA')
        visualizations['study_vs_gpa'] = fig_to_base64(fig)
        
        # 3. Absences vs GPA
        fig, ax = plt.subplots(figsize=(8, 6))
        ax.scatter(student_data['Absences'], student_data['GPA'], alpha=0.5, color='red')
        ax.set_xlabel('Number of Absences')
        ax.set_ylabel('GPA')
        ax.set_title('Absences vs GPA')
        visualizations['absences_vs_gpa'] = fig_to_base64(fig)
        
        return jsonify(visualizations)
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/cluster/students', methods=['POST'])
def cluster_students():
    """Perform K-means clustering on students"""
    global student_data
    
    if student_data is None:
        return jsonify({'error': 'No data loaded'}), 400
    
    try:
        data = request.get_json() or {}
        n_clusters = int(data.get('n_clusters', 4))
        features = data.get('features', ['StudyTimeWeekly', 'Absences', 'GPA'])
        
        # Prepare data
        X = student_data[features].copy()
        X = X.fillna(X.mean())
        
        # Standardize
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        
        # Perform clustering
        kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
        clusters = kmeans.fit_predict(X_scaled)
        
        # Get cluster profiles
        temp_df = student_data.copy()
        temp_df['Cluster'] = clusters
        cluster_profiles = temp_df.groupby('Cluster')[features].mean().to_dict()
        
        # Get cluster sizes
        cluster_sizes = temp_df['Cluster'].value_counts().sort_index().to_dict()
        
        # PCA for visualization
        pca = PCA(n_components=2)
        X_pca = pca.fit_transform(X_scaled)
        
        # Create visualization
        fig, ax = plt.subplots(figsize=(10, 8))
        scatter = ax.scatter(X_pca[:, 0], X_pca[:, 1], c=clusters, cmap='viridis', alpha=0.6, s=50)
        ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%})')
        ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%})')
        ax.set_title(f'Student Clusters (k={n_clusters})')
        plt.colorbar(scatter, ax=ax, label='Cluster')
        
        return jsonify({
            'success': True,
            'n_clusters': n_clusters,
            'features_used': features,
            'cluster_profiles': cluster_profiles,
            'cluster_sizes': {str(k): int(v) for k, v in cluster_sizes.items()},
            'cluster_visualization': fig_to_base64(fig),
            'explained_variance': pca.explained_variance_ratio_.tolist()
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

# ============================================
# Run the API
# ============================================

if __name__ == '__main__':
    # Generate sample data on startup
    print("\nüìä Generating sample data...")
    student_data = generate_sample_data(500)
    print(f"‚úÖ Sample data generated with {len(student_data)} students")
    print("\n" + "="*60)
    print("üöÄ SERVER STARTING...")
    print("="*60)
    print("\nüì° Try these URLs in your browser:")
    print("   http://127.0.0.1:5000/")
    print("   http://localhost:5000/")
    print("   http://127.0.0.1:5000/health")
    print("\nüí° If connection refused, try:")
    print("   1. Check if port 5000 is available")
    print("   2. Run: netstat -ano | findstr :5000 (Windows)")
    print("   3. Or: lsof -i :5000 (Mac/Linux)")
    print("   4. Try a different port: app.run(port=5001)")
    print("\n" + "="*60)
    
    # Try to run on different ports if 5000 is busy
    ports_to_try = [5000, 5001, 5002, 8080, 8000]
    
    for port in ports_to_try:
        try:
            print(f"\nüîÑ Attempting to start on port {port}...")
            app.run(debug=False, host='0.0.0.0', port=port)
            break
        except OSError:
            print(f"‚ùå Port {port} is busy, trying next...")
            continue

STUDENT PERFORMANCE ANALYTICS API
Python version: 3.12.3 (tags/v3.12.3:f6650f9, Apr  9 2024, 14:05:25) [MSC v.1938 64 bit (AMD64)]


AttributeError: type object 'Flask' has no attribute '__version__'