Step 1: Importing the required libraries

In [11]:
# Basic Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Model Training

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error,mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
import warnings

Step 2: Reading the dataset

In [6]:
# Importing the dataset

df = pd.read_csv('Data/stud.csv')
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


Step 3: Segregating Independent & Target Variables

In [9]:
# Seggregating Independent & Dependent Variables

target_column = 'math_score'

X = df.drop(columns= [target_column], axis =1)
y = df[target_column]



Step 4: Train Test Split

In [14]:
# Dividing the dataset into train and test data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state= 42)

Step 5: Feature Engineering Automation using Pipeline and Column Transformer

In [12]:
# Feature Engineering Pipeline

num_features = X.select_dtypes(exclude= 'object').columns
cate_features = X.select_dtypes(include= 'object').columns


num_pipeline = Pipeline(steps=
                        [('imputer', SimpleImputer(strategy= 'median')),
                         ('Scaler', StandardScaler())])

cat_pipeline = Pipeline(steps= 
                        [
                            ('imputer', SimpleImputer(strategy= 'most_frequent')),
                            ('Encoder', OneHotEncoder())
                        ])

preprocessor = ColumnTransformer([
    ('numerical_pipeline', num_pipeline, num_features),
    ('categorical_pipeline', cat_pipeline, cate_features)])

Step 6: Applying preprocessing on Train and Test Data

In [16]:
# Applying feature enginnering on train and test dataset

X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

Stap 7: Model Training Automation 

In [22]:
# Step 1: Making a dictionary of models

models = {'Linear Regression': LinearRegression(),
          'Lasso': Lasso(),
          'Ridge': Ridge(),
          'ElasticNet': ElasticNet(),
          'Decision_Tree': DecisionTreeRegressor(),
          'SVM': SVR(),
          'KNeighborsRegressor': KNeighborsRegressor(),
          'RandonForest': RandomForestRegressor(),
          'AdaBoost': AdaBoostRegressor(),
          'XGB': XGBRegressor(),
          'CatBoost': CatBoostRegressor()}


# Step 2: Defining a Funnction for Automated Model Training

def evaluate_model(X_train_preprocessed, y_train, X_test_preprocessed, y_test):
    report = {}
    for i in range(len(models)):
        model = list(models.values())[i]                  # Selecting model

        model.fit(X_train_preprocessed, y_train)         # Fitting the model

        y_pred = model.predict(X_test_preprocessed)      # Predict using model

        model_accuracy_score = r2_score(y_test, y_pred)  # Finding model accuracy score

        report[list(models.keys())[i]] = model_accuracy_score

    return report


# Step 3: Call the Function

model_report = evaluate_model(X_train_preprocessed, y_train, X_test_preprocessed, y_test)

for key, value in model_report.items():
    print(key, value)

Learning rate set to 0.039525
0:	learn: 14.5987177	total: 1.14ms	remaining: 1.14s
1:	learn: 14.2251886	total: 2.13ms	remaining: 1.06s
2:	learn: 13.8866124	total: 3.02ms	remaining: 1s
3:	learn: 13.5235688	total: 3.86ms	remaining: 960ms
4:	learn: 13.1887021	total: 4.74ms	remaining: 942ms
5:	learn: 12.9124226	total: 5.58ms	remaining: 924ms
6:	learn: 12.6000335	total: 6.51ms	remaining: 924ms
7:	learn: 12.3299057	total: 7.48ms	remaining: 928ms
8:	learn: 12.0660619	total: 8.52ms	remaining: 938ms
9:	learn: 11.7730981	total: 9.44ms	remaining: 934ms
10:	learn: 11.4922764	total: 10.3ms	remaining: 926ms
11:	learn: 11.2626483	total: 11.3ms	remaining: 933ms
12:	learn: 11.0426039	total: 12.6ms	remaining: 954ms
13:	learn: 10.7991693	total: 13.7ms	remaining: 962ms
14:	learn: 10.5541002	total: 15ms	remaining: 983ms
15:	learn: 10.3191811	total: 16ms	remaining: 982ms
16:	learn: 10.1000444	total: 16.9ms	remaining: 978ms
17:	learn: 9.8945567	total: 17.8ms	remaining: 970ms
18:	learn: 9.6901741	total: 18.7ms

Step 7: Model with best accuracy score

In [23]:
# Best Score

best_model_score = max(sorted(model_report.values()))

# Best Model Name

best_model_name = list(model_report.keys())[list(model_report.values()).index(best_model_score)]

# Best Modal

best_modal = models[best_model_name]

print(f'The best model is {best_model_name} with a r2 acore of {best_model_score}')

The best model is Ridge with a r2 acore of 0.8805917946912826


Step 8: Training Model with the best model

In [25]:
selected_estimator = Ridge()

selected_estimator.fit(X_train_preprocessed, y_train)

y_pred = selected_estimator.predict(X_test_preprocessed)

accuracy_score = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'Accuracy: {accuracy_score:.4f}, Mean Abosulte Error: {mae:.4f}, Mean Squared Error: {mse:.4f}')




Accuracy: 0.8806, Mean Abosulte Error: 4.2111, Mean Squared Error: 29.0566
