In [35]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
import matplotlib.pyplot as plt
%matplotlib inline

In [36]:
df=pd.read_csv('StudentsPerformance.csv')
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [37]:
x=df.drop(columns=['math score'],axis=1)

In [38]:
x.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,74
1,female,group C,some college,standard,completed,90,88
2,female,group B,master's degree,standard,none,95,93
3,male,group A,associate's degree,free/reduced,none,57,44
4,male,group C,some college,standard,none,78,75


In [39]:
y=df['math score']

In [40]:
y.head()

0    72
1    69
2    90
3    47
4    76
Name: math score, dtype: int64

In [41]:
num_features=x.select_dtypes(exclude='object').columns
cat_features=x.select_dtypes(include='object').columns

from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
newtansform=OneHotEncoder()
ohtransform=StandardScaler()

processor=ColumnTransformer(
[
    ("OneHotEncoder",newtansform,cat_features),
    ("StandardScaler",ohtransform,num_features)
    ]
)

In [42]:
x=processor.fit_transform(x)

In [43]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [44]:
x_test.shape

(200, 19)

In [45]:
x_train.shape

(800, 19)

In [46]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
def evauate(true,predicted):
    mse = mean_squared_error(true,predicted)
    mae= mean_absolute_error(true,predicted)
    r2_scor= r2_score(true,predicted)
    return mse,mae,r2_scor

In [47]:
models = {
    'LinearRegression': LinearRegression(),
    'Lasso': Lasso(),
    'Ridge': Ridge(),
    'RandomForestRegressor': RandomForestRegressor(),
    'AdaBoostRegressor': AdaBoostRegressor(),
    'DecisionTreeRegressor': DecisionTreeRegressor(),
    'KNeighborsRegressor': KNeighborsRegressor(),
    'XGBRegressor': XGBRegressor(),
}

model_list = []
evaluation_results = []

for model_name, model in models.items():
    model.fit(x_train, y_train)
    
    y_train_predict = model.predict(x_train)
    y_test_predict = model.predict(x_test)
    
    train_mse, train_mae, train_r2 = evauate(y_train, y_train_predict)
    test_mse, test_mae, test_r2 = evauate(y_test, y_test_predict)
    
    model_list.append(model)
    evaluation_results.append({
        'model_name': model_name,
        'train_mse': train_mse,
        'train_mae': train_mae,
        'train_r2': train_r2,
        'test_mse': test_mse,
        'test_mae': test_mae,
        'test_r2': test_r2
    })

for result in evaluation_results:
    print(f"Model: {result['model_name']}")
    print(f"Train MSE: {result['train_mse']}, Train MAE: {result['train_mae']}, Train R2: {result['train_r2']}")
    print(f"Test MSE: {result['test_mse']}, Test MAE: {result['test_mae']}, Test R2: {result['test_r2']}")
    print("----------------------------------------------------")


Model: LinearRegression
Train MSE: 28.4013037109375, Train MAE: 4.271484375, Train R2: 0.874022530822051
Test MSE: 29.43263671875, Test MAE: 4.2221875, Test R2: 0.8790464771743793
----------------------------------------------------
Model: Lasso
Train MSE: 43.47840400585577, Train MAE: 5.206302661246526, Train R2: 0.8071462015863458
Test MSE: 42.5064168384116, Test MAE: 5.157881810347763, Test R2: 0.8253197323627853
----------------------------------------------------
Model: Ridge
Train MSE: 28.337788233082442, Train MAE: 4.26498782372598, Train R2: 0.8743042615212909
Test MSE: 29.05627219234824, Test MAE: 4.211100688014257, Test R2: 0.8805931485028741
----------------------------------------------------
Model: RandomForestRegressor
Train MSE: 5.295041526041667, Train MAE: 1.832727083333333, Train R2: 0.9765131932874621
Test MSE: 36.60953226388889, Test MAE: 4.712041666666667, Test R2: 0.8495530000037442
----------------------------------------------------
Model: AdaBoostRegressor
Trai

In [48]:
# Convert evaluation results into a DataFrame
df_results = pd.DataFrame(evaluation_results)
# Sort the DataFrame by 'test_r2' in descending order
df_results.sort_values(by='test_r2', ascending=False)

Unnamed: 0,model_name,train_mse,train_mae,train_r2,test_mse,test_mae,test_r2
2,Ridge,28.337788,4.264988,0.874304,29.056272,4.211101,0.880593
0,LinearRegression,28.401304,4.271484,0.874023,29.432637,4.222188,0.879046
3,RandomForestRegressor,5.295042,1.832727,0.976513,36.609532,4.712042,0.849553
4,AdaBoostRegressor,33.416727,4.738046,0.851776,36.745323,4.79053,0.848995
7,XGBRegressor,1.014616,0.687467,0.9955,41.903708,5.057731,0.827797
1,Lasso,43.478404,5.206303,0.807146,42.506417,5.157882,0.82532
6,KNeighborsRegressor,32.5957,4.5175,0.855418,52.6834,5.637,0.783497
5,DecisionTreeRegressor,0.078125,0.01875,0.999653,62.97,6.34,0.741225
