In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline
import seaborn as sns 
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from catboost import CatBoostRegressor
import xgboost as xgb

In [2]:
df=pd.read_csv('stud.csv')

In [3]:
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer

In [47]:
ohe=OneHotEncoder()
ss=StandardScaler()

In [61]:
X,y=df.drop(columns=['math_score'],axis=1),df['math_score']

In [62]:
neum_feature=X.select_dtypes(exclude='object').columns
cat_feature=X.select_dtypes(include='object').columns

In [63]:
preprocessor=ColumnTransformer([
    ('OneHotEncoder',ohe,cat_feature),
    ('StanderdScaler',ss,neum_feature)
])

In [64]:
X=preprocessor.fit_transform(X)

In [65]:
X.shape

(1000, 19)

In [66]:
X

array([[ 1.        ,  0.        ,  0.        , ...,  1.        ,
         0.19399858,  0.39149181],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         1.42747598,  1.31326868],
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
         1.77010859,  1.64247471],
       ...,
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.12547206, -0.20107904],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.60515772,  0.58901542],
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
         1.15336989,  1.18158627]])

In [67]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.2)

In [68]:
def evaluate_model(true,predict):
    mse=mean_squared_error(true,predict)
    rmse=np.sqrt(mean_squared_error(true,predict))
    r2=r2_score(true,predict)
    return mse,rmse,r2

In [69]:
models={
    "lr":LinearRegression(),
    "lasso":Lasso(),
    "ridge":Ridge(),
    "KNeighborsRegressor":KNeighborsRegressor(),
    "RandomForestRegressor":RandomForestRegressor(),
    "xgb":xgb.XGBRegressor(),
    "CatBoostRegressor":CatBoostRegressor()
}
model_list=[]
r2_list=[]
mse_list=[]
for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)
    
    y_train_pred=model.predict(X_train)
    y_pred=model.predict(X_test)
    
    model_train_mse,model_train_rmse,model_train_r2_score=evaluate_model(y_train,y_train_pred)
    model_mse,model_rmse,model_r2_score=evaluate_model(y_test,y_pred)
    
    mse_list.append(model_mse)
    r2_list.append(model_r2_score)
    model_list.append(list(models.keys())[i])

Learning rate set to 0.03175
0:	learn: 15.4398384	total: 2.06ms	remaining: 2.06s
1:	learn: 15.1764655	total: 2.98ms	remaining: 1.49s
2:	learn: 14.9001346	total: 3.77ms	remaining: 1.25s
3:	learn: 14.6334930	total: 4.32ms	remaining: 1.07s
4:	learn: 14.3938542	total: 4.95ms	remaining: 985ms
5:	learn: 14.1700184	total: 5.35ms	remaining: 886ms
6:	learn: 13.9350660	total: 6ms	remaining: 852ms
7:	learn: 13.7204121	total: 6.75ms	remaining: 837ms
8:	learn: 13.5311993	total: 7.71ms	remaining: 849ms
9:	learn: 13.3430258	total: 8.65ms	remaining: 857ms
10:	learn: 13.1258281	total: 9.27ms	remaining: 833ms
11:	learn: 12.9220764	total: 10ms	remaining: 823ms
12:	learn: 12.7453825	total: 10.8ms	remaining: 823ms
13:	learn: 12.5512206	total: 11.4ms	remaining: 806ms
14:	learn: 12.3476676	total: 12.2ms	remaining: 802ms
15:	learn: 12.1566123	total: 12.8ms	remaining: 788ms
16:	learn: 11.9924963	total: 14ms	remaining: 811ms
17:	learn: 11.8017632	total: 14.8ms	remaining: 810ms
18:	learn: 11.6158261	total: 15.7m

In [70]:
for i in r2_list:
    print(i)

0.8608981642779314
0.8039505677800521
0.8621310905352981
0.7405007307519298
0.8011498525538316
0.7601574841356891
0.8021125999965191


In [71]:
for i in mse_list:
    print(i)

31.366238989785632
44.207420505285754
31.088224976206302
58.514799999999994
44.838957125
54.08237521424679
44.621866054868654


In [72]:
for i in model_list:
    print(i)

lr
lasso
ridge
KNeighborsRegressor
RandomForestRegressor
xgb
CatBoostRegressor
