In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,mean_absolute_error,root_mean_squared_error,r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR

In [51]:
df=pd.read_pickle('stud.pkl')

In [52]:
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score,total_score,average
0,female,group B,bachelor's degree,standard,none,72,72,74,218,72.666667
1,female,group C,some college,standard,completed,69,90,88,247,82.333333
2,female,group B,master's degree,standard,none,90,95,93,278,92.666667
3,male,group A,associate's degree,free/reduced,none,47,57,44,148,49.333333
4,male,group C,some college,standard,none,76,78,75,229,76.333333


In [60]:
X=df.drop(columns=['math_score'],axis=1)
X.shape

(1000, 9)

In [67]:
X

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score,total_score,average
0,female,group B,bachelor's degree,standard,none,72,74,218,72.666667
1,female,group C,some college,standard,completed,90,88,247,82.333333
2,female,group B,master's degree,standard,none,95,93,278,92.666667
3,male,group A,associate's degree,free/reduced,none,57,44,148,49.333333
4,male,group C,some college,standard,none,78,75,229,76.333333
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,99,95,282,94.000000
996,male,group C,high school,free/reduced,none,55,55,172,57.333333
997,female,group C,high school,free/reduced,completed,71,65,195,65.000000
998,female,group D,some college,standard,completed,78,77,223,74.333333


In [54]:
y=df['math_score']

In [68]:
cat_features=X.select_dtypes(include=['O']).columns
num_features=X.select_dtypes(exclude=['O']).columns

In [69]:
cat_features

Index(['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch',
       'test_preparation_course'],
      dtype='object')

In [70]:
from sklearn.preprocessing import OneHotEncoder
scalar=StandardScaler()
ohe=OneHotEncoder(drop='first')
processor=ColumnTransformer(
    [
        ("OneHotEncoder",ohe,cat_features),
        ("StandardScaler",scalar,num_features),
    ]
)


In [71]:
X=processor.fit_transform(X)

In [77]:
X

array([[ 0.        ,  1.        ,  0.        , ...,  0.39149181,
         0.34357423,  0.34357423],
       [ 0.        ,  0.        ,  1.        , ...,  1.31326868,
         1.0219275 ,  1.0219275 ],
       [ 0.        ,  1.        ,  0.        , ...,  1.64247471,
         1.74706375,  1.74706375],
       ...,
       [ 0.        ,  0.        ,  1.        , ..., -0.20107904,
        -0.19443008, -0.19443008],
       [ 0.        ,  0.        ,  0.        , ...,  0.58901542,
         0.46053169,  0.46053169],
       [ 0.        ,  0.        ,  0.        , ...,  1.18158627,
         1.06871048,  1.06871048]], shape=(1000, 16))

In [73]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [80]:
def evaluate_model(y_ptest,y_ptrain):
    mae=mean_absolute_error(y_ptest,y_ptrain)
    rmse=root_mean_squared_error(y_ptest,y_ptrain)
    r2=r2_score(y_ptest,y_ptrain)
    return mae,rmse,r2

In [87]:
models_={
    "liner":LinearRegression(),
    "ridge":Ridge(),
    "lasso":Lasso(),
    "svm":SVR(),
    "knn":KNeighborsRegressor(),
    "Dtree":DecisionTreeRegressor(),
    "ada":AdaBoostRegressor(),
}

In [90]:
for i in range(len(list(models_))):
    model=list(models_.values())[i]
    model.fit(X_train,y_train)
    y_pred_test=model.predict(X_test)
    y_pred_train=model.predict(X_train)

    a,b,c=evaluate_model(y_train,y_pred_train)
    x,y,z=evaluate_model(y_test,y_pred_test)

    print("This is for train data")
    print(a,b,c)
    print("______________________")
    print("This is for test data")
    print(x,y,z)

This is for train data
1.7848409317675353e-14 2.3047114409264145e-14 1.0
______________________
This is for test data
1.9722943816856115e-14 2.5469250339396124e-14 1.0
This is for train data
0.3200887607301325 0.40073531953649233 0.9992780303289466
______________________
This is for test data
0.33348332485626486 0.4185113007492575 0.9992784440293204
This is for train data
3.694479412921368 4.664027094469576 0.9022029307261018
______________________
This is for test data
3.8976033468286384 4.917774215911471 0.9003693836395572
This is for train data
3.5787041799467154 5.479849973295196 0.8649977371231934
______________________
This is for test data
4.155398402001839 6.943468028562559 0.8013866091059132
This is for train data
3.503880597014926 4.39234068765367 0.9132647274147592
______________________
This is for test data
4.254545454545455 5.428030503922685 0.8786219362986398
This is for train data
0.0 0.0 1.0
______________________
This is for test data
3.396969696969697 4.5974301121782