In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.feature_selection import SelectFromModel
from imblearn.over_sampling import SMOTE
import pickle
import joblib

In [2]:
df = pd.read_csv(r'C:\Users\SreeKeerthiReddyThat\Desktop\statistis\Student_Performance.csv')

In [3]:
df

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,Yes,4,2,23.0
9996,7,64,Yes,8,5,58.0
9997,6,83,Yes,8,5,74.0
9998,9,97,Yes,7,0,95.0


In [4]:
# 2.1 Handle Null Values
null_values = df.isnull().sum()
print(null_values)

Hours Studied                       0
Previous Scores                     0
Extracurricular Activities          0
Sleep Hours                         0
Sample Question Papers Practiced    0
Performance Index                   0
dtype: int64


In [5]:
# 2.2 Handle Categorical Variables
df['Extracurricular Activities'] = df['Extracurricular Activities'].map({'Yes': 1, 'No': 0})

In [6]:
df

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,1,9,1,91.0
1,4,82,0,4,2,65.0
2,8,51,1,7,2,45.0
3,5,52,1,5,2,36.0
4,7,75,0,8,5,66.0
...,...,...,...,...,...,...
9995,1,49,1,4,2,23.0
9996,7,64,1,8,5,58.0
9997,6,83,1,8,5,74.0
9998,9,97,1,7,0,95.0


In [8]:
# Define features (X) and target (y)
X = df[['Hours Studied', 'Previous Scores', 'Extracurricular Activities', 
        'Sleep Hours', 'Sample Question Papers Practiced']]
y = df['Performance Index']

In [None]:

# Train the AdaBoost Regressor
ada_model = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=3), 
                              n_estimators=100, random_state=42)
ada_model.fit(X, y)

# Get feature importances
importances = ada_model.feature_importances_

# Create a DataFrame
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# Reshape for heatmap
importance_df_heatmap = importance_df.set_index('Feature').T

# Plot heatmap
plt.figure(figsize=(12, 2))  # Adjust the height for horizontal display
sns.heatmap(importance_df_heatmap, annot=True, cmap='YlGnBu', fmt=".3f", cbar=False)

plt.title('AdaBoost Feature Importances Heatmap')
plt.yticks(rotation=0)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
