### ===============================================
# Project: Student Exam Score Analysis & Predictor
# Author: Islam Abdul Rahim
# Description: This Colab notebook performs EDA (Exploratory Data Analysis)
#              on student performance dataset and builds predictive models
#              to forecast exam scores based on habits and lifestyle.
# ===============================================

In [None]:
# --- Step 1: Import Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# --- Step 2: Load Dataset ---
df = pd.read_csv('/content/student_habits_performance2.csv')
df.head()  # Display first 5 rows

In [None]:
df.info()  # Check data types and missing values

In [None]:
# --- Step 3: Explore Categorical Columns ---
List_categorical = ['gender', 'diet_quality', 'parental_education_level', 'internet_quality', 'part_time_job']
for col in List_categorical:
    print(f"{col} unique values: {df[col].unique()}")

In [None]:
# --- Step 4: Drop unnecessary column ---
df.drop('student_id', axis=1, inplace=True)  # Drop ID column as it's not needed

In [None]:
# --- Step 5: Encode categorical variables ---
df['gender'] = df['gender'].map({'Other':0, 'Male':1, 'Female':2})
df['diet_quality'] = df['diet_quality'].map({'Poor':1, 'Fair':2, 'Good':3})
df['parental_education_level'] = df['parental_education_level'].map({'High School':1, 'Bachelor':2, 'Master':3})
df['parental_education_level'] = df['parental_education_level'].fillna(0)  # Fill missing with 0
df['internet_quality'] = df['internet_quality'].map({'Poor':1, 'Average':2, 'Good':3})
df['part_time_job'] = df['part_time_job'].map({'No':0, 'Yes':1})
df['extracurricular_participation'] = df['extracurricular_participation'].map({'No':0, 'Yes':1})


In [None]:
df.head()  # Verify changes

In [None]:
# --- Step 6: Set Seaborn Style ---
sns.set(style="whitegrid", palette="viridis", font_scale=1.1)

In [None]:
# ===============================================
# EDA: Age vs Exam Score
# Scatter plot with regression line to check
# how age affects exam performance.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='age', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='age', y='exam_score', scatter=False, color='red')
plt.title("Age vs Exam Score")
plt.xlabel("Age")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Gender vs Exam Score
# Boxplot to visualize distribution of exam scores
# across different genders.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='gender', y='exam_score', palette='Set2')
plt.title("Gender vs Exam Score")
plt.xlabel("Gender")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Study Hours per Day vs Exam Score
# Scatter plot + regression line to check
# effect of daily study hours on exam performance.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='study_hours_per_day', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='study_hours_per_day', y='exam_score', scatter=False, color='red')
plt.title("Study Hours per Day vs Exam Score")
plt.xlabel("Study Hours per Day")
plt.ylabel("Exam Score")
plt.show()


In [None]:
# ===============================================
# EDA: Social Media Hours vs Exam Score
# Scatter plot + regression to observe
# relationship between social media usage and exam performance.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='social_media_hours', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='social_media_hours', y='exam_score', scatter=False, color='red')
plt.title("Social Media Hours vs Exam Score")
plt.xlabel("Social Media Hours")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
#EDA: Netflix Hours vs Exam Score
# Scatter plot + regression line to examine
# how Netflix watching hours impact exam scores.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='netflix_hours', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='netflix_hours', y='exam_score', scatter=False, color='red')
plt.title("Netflix Hours vs Exam Score")
plt.xlabel("Netflix Hours")
plt.ylabel("Exam Score")
plt.show()


In [None]:
# ===============================================
# EDA: Part-Time Job vs Exam Score
# Boxplot to analyze effect of having a part-time job
# on students' exam scores.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='part_time_job', y='exam_score', palette='cool')
plt.title("Part-Time Job vs Exam Score")
plt.xlabel("Has Part-Time Job")
plt.ylabel("Exam Score")
plt.show()


In [None]:
# ===============================================
# EDA: Attendance Percentage vs Exam Score
# Scatter + regression line to see if better attendance
# correlates with higher exam scores.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='attendance_percentage', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='attendance_percentage', y='exam_score', scatter=False, color='red')
plt.title("Attendance Percentage vs Exam Score")
plt.xlabel("Attendance Percentage")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Sleep Hours vs Exam Score
# Scatter + regression to check relationship
# between sleep duration and exam performance.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='sleep_hours', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='sleep_hours', y='exam_score', scatter=False, color='red')
plt.title("Sleep Hours vs Exam Score")
plt.xlabel("Sleep Hours")
plt.ylabel("Exam Score")
plt.show()


In [None]:
# ===============================================
# EDA: Diet Quality vs Exam Score
# Boxplot to observe how diet quality impacts exam scores.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='diet_quality', y='exam_score', palette='Set3')
plt.title("Diet Quality vs Exam Score")
plt.xlabel("Diet Quality")
plt.ylabel("Exam Score")
plt.show()


In [None]:
# ===============================================
# EDA: Exercise Frequency vs Exam Score
# Boxplot to see if students with higher exercise frequency
# achieve better exam scores.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='exercise_frequency', y='exam_score', palette='husl')
plt.title("Exercise Frequency vs Exam Score")
plt.xlabel("Exercise Frequency")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Parental Education Level vs Exam Score
# Boxplot to analyze impact of parental education level
# on student's exam performance.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='parental_education_level', y='exam_score', palette='pastel')
plt.title("Parental Education Level vs Exam Score")
plt.xlabel("Parental Education Level")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Internet Quality vs Exam Score
# Boxplot to study the effect of internet quality on exam scores.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='internet_quality', y='exam_score', palette='coolwarm')
plt.title("Internet Quality vs Exam Score")
plt.xlabel("Internet Quality")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Mental Health Rating vs Exam Score
# Scatter + regression to see if mental health rating
# correlates with exam performance.
# ===============================================
plt.figure(figsize=(8,5))
sns.scatterplot(data=df, x='mental_health_rating', y='exam_score', alpha=0.7)
sns.regplot(data=df, x='mental_health_rating', y='exam_score', scatter=False, color='red')
plt.title("Mental Health Rating vs Exam Score")
plt.xlabel("Mental Health Rating")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# EDA: Extracurricular Participation vs Exam Score
# Boxplot to visualize if participation in extracurricular activities
# affects exam performance.
# ===============================================
plt.figure(figsize=(7,5))
sns.boxplot(data=df, x='extracurricular_participation', y='exam_score', palette='muted')
plt.title("Extracurricular Participation vs Exam Score")
plt.xlabel("Extracurricular Participation")
plt.ylabel("Exam Score")
plt.show()



In [None]:
# ===============================================
# Correlation Heatmap of All Variables
# Visualize the correlation matrix to identify relationships
# between all numeric features and exam_score.
# ===============================================
plt.figure(figsize=(8,6))
corr = df.corr(numeric_only=True)  # Compute correlation matrix
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Correlation Heatmap of All Variables", fontsize=14, pad=12)
plt.show()


In [None]:
# ===============================================
# Top Correlations with Exam Score
# Display the features most positively or negatively
# correlated with exam_score.
# ===============================================
corr_target = corr["exam_score"].sort_values(ascending=False)
print("🔍 Top Correlations with Exam Score:\n")
print(corr_target)


In [None]:
# ===============================================
# Automated Correlation Interpretation
# Classify correlation strength (None, Weak, Moderate, Strong, Very Strong)
# and direction (Positive or Negative) for each feature.
# ===============================================
def interpret_corr(value):
    """Interpret the correlation value as strength and direction."""
    if abs(value) >= 0.7:
        strength = "Very Strong"
    elif abs(value) >= 0.5:
        strength = "Strong"
    elif abs(value) >= 0.3:
        strength = "Moderate"
    elif abs(value) >= 0.1:
        strength = "Weak"
    else:
        strength = "None"

    direction = "Positive" if value > 0 else "Negative"
    if abs(value) < 0.05:
        direction = "None"

    return f"{direction}, {strength}"

# Generate correlation report
report = pd.DataFrame({
    "Correlation with Exam Score": corr_target,
    "Relationship": [interpret_corr(v) for v in corr_target]
}).sort_values(by="Correlation with Exam Score", key=abs, ascending=False)

display(report)  # Display nicely in Colab



In [None]:
# ===============================================
# Step 7: Preprocessing & Train-Test Split
# ===============================================
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = df.drop('exam_score', axis=1)
y = df['exam_score']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# ===============================================
# Step 8: Model Training & Evaluation
# ===============================================
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    print(f"📊 {model_name} Results:")
    print(f"R2 Score : {r2_score(y_test, y_pred)*100:.3f}")
    print(f"RMSE     : {mean_squared_error(y_test, y_pred):.3f}")
    print(f"MAE      : {mean_absolute_error(y_test, y_pred):.3f}")
    print()
    plt.figure(figsize=(7,5))
    sns.scatterplot(x=y_test, y=y_pred)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.xlabel("Actual Exam Score")
    plt.ylabel("Predicted Exam Score")
    plt.title(f"{model_name}: Actual vs Predicted")
    plt.show()

In [None]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
evaluate_model(lr, X_test, y_test, "Linear Regression")

In [None]:
# Gradient Boosting
gbr = GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, max_depth=3, random_state=42)
gbr.fit(X_train, y_train)
evaluate_model(gbr, X_test, y_test, "Gradient Boosting Regressor")

In [None]:
# Random Forest
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
evaluate_model(rf, X_test, y_test, "Random Forest Regressor")

In [None]:
# --- Step 9: Save Model & Scaler ---
import joblib

joblib.dump(lr, 'best_model_linear_regression.pkl')
joblib.dump(scaler, 'scaler_X.pkl')
joblib.dump(X, 'features.pkl')
print("✅ Model and scaler saved successfully!")

In [None]:
# Optional: Download files in Colab
from google.colab import files
files.download('best_model_linear_regression.pkl')
files.download('scaler_X.pkl')
files.download('features.pkl')

In [None]:
import gradio as gr
import joblib
import numpy as np

# Load saved model & scaler
try:
    model = joblib.load('best_model_linear_regression.pkl')
    scaler = joblib.load('scaler_X.pkl')
except FileNotFoundError:
    print("Error: Model or scaler files not found. Please make sure 'best_model_linear_regression.pkl' and 'scaler_X.pkl' are in the same directory.")

    from sklearn.linear_model import LinearRegression
    from sklearn.preprocessing import StandardScaler
    model = LinearRegression()
    scaler = StandardScaler()

    dummy_X = np.random.rand(100, 14)
    dummy_y = np.random.rand(100) * 100
    model.fit(dummy_X, dummy_y)
    scaler.fit(dummy_X)


def predict_exam_score(
    age, gender, study_hours_per_day, social_media_hours, netflix_hours,
    part_time_job, attendance_percentage, sleep_hours, diet_quality,
    exercise_frequency, parental_education_level, internet_quality,
    mental_health_rating, extracurricular_participation
):
    gender_map = {"Other":0, "Male":1, "Female":2}
    diet_map = {"Poor":1, "Fair":2, "Good":3}
    parental_map = {"High School":1, "Bachelor":2, "Master":3}
    internet_map = {"Poor":1, "Average":2, "Good":3}
    job_map = {"No":0, "Yes":1}
    extra_map = {"No":0, "Yes":1}

    gender = gender_map[gender]
    diet_quality = diet_map[diet_quality]
    parental_education_level = parental_map[parental_education_level]
    internet_quality = internet_map[internet_quality]
    part_time_job = job_map[part_time_job]
    extracurricular_participation = extra_map[extracurricular_participation]

    input_data = np.array([[
        age, gender, study_hours_per_day, social_media_hours, netflix_hours,
        part_time_job, attendance_percentage, sleep_hours, diet_quality,
        exercise_frequency, parental_education_level, internet_quality,
        mental_health_rating, extracurricular_participation
    ]])

    input_scaled = scaler.transform(input_data)
    pred = model.predict(input_scaled)


    score = round(pred[0], 2)
    if score >= 85:
        emoji = "🏆"
        color = "#4ade80" # Green
    elif score >= 70:
        emoji = "🎯"
        color = "#60a5fa" # Blue
    else:
        emoji = "📈"
        color = "#fbbf24" # Yellow

    return f"<h1 style='font-size: 3.5em; margin: 0; color: {color};'>{emoji} {score}</h1><p style='color: #8892b0; margin-top: 10px;'>Predicted Exam Score</p>"


custom_css = """
/* General Body & Font */
body {
    background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
    color: #e2e8f0;
    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
    margin: 0;
    padding: 20px;
}

/* Main App Container */
.gradio-container {
    max-width: 1000px;
    margin: 0 auto;
    background: #1e293b;
    border: 1px solid #334155;
    border-radius: 20px;
    box-shadow: 0 20px 25px -5px rgba(0, 0, 0, 0.4), 0 10px 10px -5px rgba(0, 0, 0, 0.4);
    padding: 10px; /* Inner padding for the container */
    overflow: hidden; /* Ensures the border-radius clips content */
}

/* Header Section */
#header {
    text-align: center;
    padding: 30px 20px;
    background: linear-gradient(135deg, #1e3a8a, #312e81);
    border-radius: 15px 15px 0 0;
    margin-bottom: 20px;
}
#header h1 {
    font-size: 3em;
    font-weight: 800;
    background: linear-gradient(90deg, #22d3ee, #a78bfa, #22d3ee);
    background-size: 200% auto;
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
    animation: shine 4s linear infinite;
    margin: 0;
}
@keyframes shine {
    to { background-position: 200% center; }
}
#header p {
    color: #cbd5e1;
    font-size: 1.1em;
    margin-top: 10px;
}

/* Input Groups (Cards) */
.input-group {
    background: #334155;
    border: 1px solid #475569;
    border-radius: 12px;
    padding: 20px;
    margin-bottom: 20px;
    transition: transform 0.2s ease, box-shadow 0.2s ease;
}
.input-group:hover {
    transform: translateY(-2px);
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
}
.input-group h3 {
    color: #e2e8f0;
    font-size: 1.2em;
    margin-top: 0;
    margin-bottom: 20px;
    border-bottom: 2px solid #22d3ee;
    padding-bottom: 10px;
}

/* Form Controls */
label {
    color: #94a3b8;
    font-weight: 600;
    font-size: 0.9em;
}
input[type="number"], textarea, .wrap.svelte-1g805jv {
    background-color: #1e293b;
    border: 1px solid #475569;
    color: #e2e8f0;
    border-radius: 8px;
    padding: 10px;
    transition: all 0.3s ease;
}
input[type="number"]:focus, textarea:focus, .wrap.svelte-1g805jv:focus-within {
    border-color: #22d3ee;
    box-shadow: 0 0 0 3px rgba(34, 211, 238, 0.1);
    outline: none;
}

/* The Big Predict Button */
#predict-btn {
    background: linear-gradient(135deg, #3b82f6, #8b5cf6);
    color: white;
    font-size: 1.3em;
    font-weight: bold;
    border: none;
    border-radius: 50px; /* Pill shape */
    padding: 18px 45px;
    box-shadow: 0 10px 20px rgba(59, 130, 246, 0.4);
    transition: all 0.3s ease;
    width: 100%;
    margin: 20px 0;
}
#predict-btn:hover {
    transform: translateY(-3px);
    box-shadow: 0 15px 30px rgba(59, 130, 246, 0.6);
    background: linear-gradient(135deg, #2563eb, #7c3aed);
}

/* Output Box */
#output-box {
    background: linear-gradient(135deg, #0f172a, #1e293b);
    border: 2px solid #22d3ee;
    border-radius: 12px;
    padding: 40px;
    text-align: center;
    box-shadow: 0 0 30px rgba(34, 211, 238, 0.3), inset 0 0 20px rgba(34, 211, 238, 0.1);
    margin-top: 20px;
}

/* Footer */
#footer p {
    color: #64748b;
    text-align: center;
    margin-top: 20px;
    font-size: 0.9em;
}
"""

with gr.Blocks(css=custom_css, title="Exam Score Predictor - Pro") as demo:

    with gr.Column(elem_id="main-container"):
        # Header
        gr.HTML("""
        <div id="header">
            <h1>🎓 Exam Score Predictor</h1>
            <p>Unlock your academic potential with AI-powered insights.</p>
        </div>
        """)

        with gr.Row():
            with gr.Column(elem_classes="input-group"):
                gr.HTML("<h3>👤 Personal Profile</h3>")
                age = gr.Number(label="🎂 Age", value=16)
                gender = gr.Dropdown(label="⚧️ Gender", choices=["Other","Male","Female"], value="Female")
                parental_education_level = gr.Dropdown(label="👨‍🎓 Parental Education", choices=["High School","Bachelor","Master"], value="Bachelor")
                part_time_job = gr.Dropdown(label="💼 Part-Time Job", choices=["No","Yes"], value="No")

            with gr.Column(elem_classes="input-group"):
                gr.HTML("<h3>📚 Academic Habits</h3>")
                study_hours_per_day = gr.Number(label="📖 Study Hours/Day", value=5)
                attendance_percentage = gr.Number(label="🏫 Attendance %", value=90)
                internet_quality = gr.Dropdown(label="🌐 Internet Quality", choices=["Poor","Average","Good"], value="Good")
                extracurricular_participation = gr.Dropdown(label="🎨 Extracurriculars", choices=["No","Yes"], value="Yes")

        with gr.Row():
            with gr.Column(elem_classes="input-group"):
                gr.HTML("<h3>🌍 Lifestyle & Well-being</h3>")
                sleep_hours = gr.Number(label="😴 Sleep Hours", value=7)
                diet_quality = gr.Dropdown(label="🥗 Diet Quality", choices=["Poor","Fair","Good"], value="Good")
                exercise_frequency = gr.Number(label="💪 Exercise Freq./Week", value=3)
                mental_health_rating = gr.Number(label="🧠 Mental Health (1-10)", value=4)

            with gr.Column(elem_classes="input-group"):
                gr.HTML("<h3>📱 Digital Life</h3>")
                social_media_hours = gr.Number(label="📲 Social Media Hours", value=2)
                netflix_hours = gr.Number(label="📺 Netflix Hours", value=1)

        predict_btn = gr.Button("Predict My Score 🚀", elem_id="predict-btn")

        output = gr.HTML("<h1 id='output-result' style='color: #475569;'>Your score will appear here</h1>", elem_id="output-box")

        gr.HTML("<div id='footer'><p>Designed by : Islam &$</p></div>")

    predict_btn.click(
        fn=predict_exam_score,
        inputs=[
            age, gender, study_hours_per_day, social_media_hours, netflix_hours,
            part_time_job, attendance_percentage, sleep_hours, diet_quality,
            exercise_frequency, parental_education_level, internet_quality,
            mental_health_rating, extracurricular_participation
        ],
        outputs=output
    )

demo.launch()