In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.svm import SVR


from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

from sklearn.preprocessing import StandardScaler,OneHotEncoder

from sklearn.model_selection import train_test_split

from sklearn.compose import ColumnTransformer

In [48]:
Dataset=pd.read_csv("/Users/kanwal/Desktop/Mlproject1/notebook/data/StudentsPerformance.csv")

In [49]:
Dataset.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [50]:
#splitting of Data 
X=Dataset.drop('math score',axis=1)
y=Dataset['math score']

In [51]:
X

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,74
1,female,group C,some college,standard,completed,90,88
2,female,group B,master's degree,standard,none,95,93
3,male,group A,associate's degree,free/reduced,none,57,44
4,male,group C,some college,standard,none,78,75
...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,99,95
996,male,group C,high school,free/reduced,none,55,55
997,female,group C,high school,free/reduced,completed,71,65
998,female,group D,some college,standard,completed,78,77


In [52]:
#Train Test split 


In [53]:
Numerical_F=X.select_dtypes(exclude='object').columns
Categorical_F=X.select_dtypes(include='object').columns

In [54]:
#Encoding and normal
one_hot_encoder=OneHotEncoder()
scaler=StandardScaler()

Transform=ColumnTransformer(
    [('OneHotEncoder',one_hot_encoder,Categorical_F),
     ('StandardScaler',scaler,Numerical_F)
    ],remainder='passthrough'
)

In [55]:
X=Transform.fit_transform(X)

In [56]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=42)

In [57]:
models={
    'LinearRegression':LinearRegression(),
    'Ridge':Ridge(),
    'Lasso':Lasso(),
    'SVR':SVR(),
    'DecisionTreeRegressor':DecisionTreeRegressor(),
    'RandomForestRegressor':RandomForestRegressor(),
    'KNeighborsRegressor':KNeighborsRegressor(),
    'GradientBoostingRegressor':GradientBoostingRegressor()
}

In [58]:
def evaluate_accuracy(true,predicted):
    mse=mean_squared_error(true,predicted)
    mae=mean_absolute_error(true,predicted)
    score=r2_score(true,predicted)
    return mse,mae,score

In [64]:
Models=[]
R2_Scores=[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    y_train_predict=model.predict(X_train)
    y_test_predict=model.predict(X_test)

    train_mse,train_mae,train_score=evaluate_accuracy(y_train,y_train_predict)
    test_mse,test_mae,test_score=evaluate_accuracy(y_test,y_test_predict)
    Models.append(list(models.keys())[i])
    R2_Scores.append(test_score)

    print("Model Name:",list(models.keys())[i])
    print("Training_Mean_Sqaured Error:",train_mse)
    print("Traing_Mean_Absolute_Error:",train_mae)
    print("Traing_R2_Score:",train_score)
    print("----------------------------------------------------------------")
    print("Test_Mean_Sqaured Error:",test_mse)
    print("Test_Mean_Absolute_Error:",test_mae)
    print("Test_R2_Score:",test_score)
    print("\n\n")
    
    

Model Name: LinearRegression
Training_Mean_Sqaured Error: 27.544689325007592
Traing_Mean_Absolute_Error: 4.198519565142324
Traing_R2_Score: 0.8751313605993937
----------------------------------------------------------------
Test_Mean_Sqaured Error: 30.886593188073075
Test_Mean_Absolute_Error: 4.418261320297315
Test_R2_Score: 0.8758630443016734



Model Name: Ridge
Training_Mean_Sqaured Error: 27.54838217384225
Traing_Mean_Absolute_Error: 4.197676960902202
Traing_R2_Score: 0.8751146197676473
----------------------------------------------------------------
Test_Mean_Sqaured Error: 30.875711669044243
Test_Mean_Absolute_Error: 4.415505801902548
Test_R2_Score: 0.8759067784434542



Model Name: Lasso
Training_Mean_Sqaured Error: 42.38746971802188
Traing_Mean_Absolute_Error: 5.158226125142767
Traing_R2_Score: 0.807844422971275
----------------------------------------------------------------
Test_Mean_Sqaured Error: 47.20410059865967
Test_Mean_Absolute_Error: 5.392913577222256
Test_R2_Score: 0

In [65]:
Models

['LinearRegression',
 'Ridge',
 'Lasso',
 'SVR',
 'DecisionTreeRegressor',
 'RandomForestRegressor',
 'KNeighborsRegressor',
 'GradientBoostingRegressor']

In [67]:
pd.DataFrame(list(zip(Models,R2_Scores)),columns=['Model_Name','R2_Scores']).sort_values(by=["R2_Scores"],ascending=False)

Unnamed: 0,Model_Name,R2_Scores
1,Ridge,0.875907
0,LinearRegression,0.875863
7,GradientBoostingRegressor,0.865422
5,RandomForestRegressor,0.847664
2,Lasso,0.810281
6,KNeighborsRegressor,0.77202
3,SVR,0.726848
4,DecisionTreeRegressor,0.716799
