Model Training


1.1 Import data and Required Packages


In [11]:
#basic import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#modelling
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
import warnings

Import the CSV as PAndas DataFrame

In [3]:
df = pd.read_csv("notebook\data\StudentsPerformance.csv")

Preparing X and Y variables

In [5]:
x = df.drop(columns=['math score'],axis=1)
y = df['math score']

In [7]:
#create coloumn Transformer with 3 types of transformers
num_features = x.select_dtypes(exclude="object").columns
cat_features = x.select_dtypes(include="object").columns

from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

num_transformers = StandardScaler()
oh_transformers = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ("OneHotEncode",oh_transformers,cat_features),
        ("StandardScaler",num_transformers,num_features)
    ]
)

In [18]:
x = preprocessor.fit_transform(x)

In [19]:
#separate dataset into train test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
x_train.shape,x_test.shape

((800, 19), (200, 19))

Create and Evalute Function to give all metrics after model training


In [22]:
def evaluate_model(true,predicted):
    mae = mean_absolute_error(true,predicted)
    mse = mean_squared_error(true,predicted)
    rmse = np.sqrt(mean_squared_error(true,predicted))
    r2score = r2_score(true,predicted)
    return mae,rmse,r2score

In [23]:
models = {
    "Linear Regression":LinearRegression(),
    "Lasso":Lasso(),
    "Ridge":Ridge(),
    "K-Neighbour Regressor":KNeighborsRegressor(),
    "Decision Tree":DecisionTreeRegressor(),
    "Random Forest Regressor":RandomForestRegressor(),
    "XGBRegressor":XGBRegressor(),
    "CatBoostRegressor":CatBoostRegressor(verbose=False),
    "AdaBoostRegressor":AdaBoostRegressor()
}
model_list = []
r2_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(x_train,y_train)
    
    #make predictions
    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)

    #Evalutae  train and test
    model_train_mae,model_train_rmse,model_train_r2 = evaluate_model(y_train,y_train_pred)
    model_test_mae,model_test_rmse,model_test_r2 = evaluate_model(y_test,y_test_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model performance for training set')
    print(" - Root mean square Error: {:.4f}".format(model_test_rmse))
    print(" - Absolute mean square Error: {:.4f}".format(model_test_mae))
    print(" - R2 score: {:.4f}".format(model_test_r2))
    r2_list.append(model_test_r2)

    print('='*35)
    print('\n')

Linear Regression
Model performance for training set
 - Root mean square Error: 5.4102
 - Absolute mean square Error: 4.2272
 - R2 score: 0.8797


Lasso
Model performance for training set
 - Root mean square Error: 6.5197
 - Absolute mean square Error: 5.1579
 - R2 score: 0.8253


Ridge
Model performance for training set
 - Root mean square Error: 5.3904
 - Absolute mean square Error: 4.2111
 - R2 score: 0.8806




[WinError 2] The system cannot find the file specified
  File "c:\Projects\MLProject\venv\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
  File "c:\Projects\MLProject\venv\lib\subprocess.py", line 493, in run
    with Popen(*popenargs, **kwargs) as process:
  File "c:\Projects\MLProject\venv\lib\subprocess.py", line 858, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Projects\MLProject\venv\lib\subprocess.py", line 1327, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


K-Neighbour Regressor
Model performance for training set
 - Root mean square Error: 7.2516
 - Absolute mean square Error: 5.6160
 - R2 score: 0.7839


Decision Tree
Model performance for training set
 - Root mean square Error: 7.7321
 - Absolute mean square Error: 6.1450
 - R2 score: 0.7543


Random Forest Regressor
Model performance for training set
 - Root mean square Error: 6.0275
 - Absolute mean square Error: 4.6886
 - R2 score: 0.8507


XGBRegressor
Model performance for training set
 - Root mean square Error: 6.4733
 - Absolute mean square Error: 5.0577
 - R2 score: 0.8278


CatBoostRegressor
Model performance for training set
 - Root mean square Error: 6.0086
 - Absolute mean square Error: 4.6125
 - R2 score: 0.8516


AdaBoostRegressor
Model performance for training set
 - Root mean square Error: 6.1433
 - Absolute mean square Error: 4.7887
 - R2 score: 0.8449


