
# üéì Student Performance Prediction Assignment
Applied AI & Machine Learning (CS-333)  
Instructor: Dr. Abbas Hussain  

---

## Objective
Predict **Performance Index** using multiple regression models.


## 1Ô∏è‚É£ Import Libraries

In [None]:

# Import required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error


## 2Ô∏è‚É£ Load Dataset

In [None]:

# Load dataset
df = pd.read_csv("Student_Performance.csv")

df.head()


## 3Ô∏è‚É£ Exploratory Data Analysis

In [None]:

df.info()
df.describe()


In [None]:

# Check missing values
df.isnull().sum()


## 4Ô∏è‚É£ Visualization

In [None]:

# Correlation heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.show()


## 5Ô∏è‚É£ Encoding (if needed)

In [None]:

# Encode categorical columns if any
# Example:
# df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})

df.head()


## 6Ô∏è‚É£ Feature Selection

In [None]:

X = df.drop("Performance Index", axis=1)
y = df["Performance Index"]


## 7Ô∏è‚É£ Train-Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


## 8Ô∏è‚É£ Feature Scaling

In [None]:

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


## 9Ô∏è‚É£ Apply Regression Models

In [None]:

models = {
    "Linear Regression": LinearRegression(),
    "SVR": SVR(kernel='rbf'),
    "Random Forest": RandomForestRegressor(),
    "KNN": KNeighborsRegressor(),
    "Naive Bayes": GaussianNB()
}

results = []

for name, model in models.items():
    
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    
    results.append([name, r2, rmse, mae])

results_df = pd.DataFrame(results, columns=["Model", "R2", "RMSE", "MAE"])

results_df


## üîü Visualize Best Model

In [None]:

# Select best model based on R2
best_model_name = results_df.sort_values("R2", ascending=False).iloc[0]["Model"]
print("Best Model:", best_model_name)

best_model = models[best_model_name]
best_model.fit(X_train_scaled, y_train)
y_pred_best = best_model.predict(X_test_scaled)

plt.scatter(y_test, y_pred_best)
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.title(f"Actual vs Predicted ({best_model_name})")
plt.show()
