In [None]:
import pandas as pd
import seaborn as sea
import matplotlib.pyplot as plt
import numpy as np


from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
import warnings

warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
df=pd.read_csv("data/stud.csv")

In [3]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [7]:
X=df.drop(columns=["math score"],axis=1)
y=df["math score"]

In [19]:
num_features=[features for features in X.columns if X[features].dtype!="O"]
cat_features=[features for features in X.columns if X[features].dtype=="O"]

In [20]:
num_features

['reading score', 'writing score']

In [21]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

scaler=StandardScaler()
oh=OneHotEncoder()

preprocessor=ColumnTransformer(
    [
    ("OneHotEncoder",oh,cat_features),
    ("StandardScaler",scaler,num_features)
    ]
)

In [22]:
X=preprocessor.fit_transform(X)

In [23]:
X.shape

(1000, 19)

In [24]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=42)

In [35]:
models={
    "RandomForestRegressor":RandomForestRegressor(),
    "AdaBoostRegressor":AdaBoostRegressor(),
    "LinearRegression": LinearRegression(),
    "Lasso":Lasso(),
    "Ridge":Ridge(),
    "XGBRegressor":XGBRegressor(),
    "CatBoostRegressor":CatBoostRegressor(verbose=False),
    "SVR":SVR(),
    "DecisionTreeRegressor":DecisionTreeRegressor(),
    "KNeighborsRegressor":KNeighborsRegressor()
    
}

models_list=[]
r2_list=[]


for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    y_pred=model.predict(X_test)
    y_pred_train=model.predict(X_train)

    mae_train=mean_absolute_error(y_pred_train,y_train)
    mse_train=mean_squared_error(y_pred_train,y_train)
    r2score_train=r2_score(y_pred_train,y_train)

    mae=mean_absolute_error(y_pred,y_test)
    mse=mean_squared_error(y_pred,y_test)
    r2score=r2_score(y_pred,y_test)

    print(list(models.keys())[i])


    print("MAE_Train:",mae_train)
    print("MSE_Train:",mse_train)
    print("R2_Score_Train:",r2score_train)
    print("MAE:",mae)
    print("MSE:",mse)
    print("R2_Score:",r2score)

    print("---------------------------")


RandomForestRegressor
MAE_Train: 1.8348854166666666
MSE_Train: 5.257106034722223
R2_Score_Train: 0.974458722449499
MAE: 4.6271458333333335
MSE: 35.50850837847223
R2_Score: 0.8262727959795229
---------------------------
AdaBoostRegressor
MAE_Train: 4.795114825265343
MSE_Train: 34.11377637541121
R2_Score_Train: 0.8040426097241651
MAE: 4.686615378306871
MSE: 36.27200738201693
R2_Score: 0.8031554197121648
---------------------------
LinearRegression
MAE_Train: 4.272265625
MSE_Train: 28.5175634765625
R2_Score_Train: 0.8560179779781416
MAE: 4.2253125
MSE: 29.39126953125
R2_Score: 0.8659424994992475
---------------------------
Lasso
MAE_Train: 5.206302661246526
MSE_Train: 43.47840400585579
R2_Score_Train: 0.6903948906164419
MAE: 5.157881810347763
MSE: 42.5064168384116
R2_Score: 0.727536565502517
---------------------------
Ridge
MAE_Train: 4.26498782372598
MSE_Train: 28.337788233082442
R2_Score_Train: 0.8557734008261899
MAE: 4.211100688014259
MSE: 29.056272192348274
R2_Score: 0.86679545310447