### Importing Data And Required Packages 

In [48]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,r2_score, mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [49]:
df = pd.read_csv('data/stud.csv')

In [50]:
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [51]:
X= df.drop(columns=['math_score'],axis=1)

In [52]:
X.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,74
1,female,group C,some college,standard,completed,90,88
2,female,group B,master's degree,standard,none,95,93
3,male,group A,associate's degree,free/reduced,none,57,44
4,male,group C,some college,standard,none,78,75


In [53]:
y=df['math_score']

In [54]:
y

0      72
1      69
2      90
3      47
4      76
       ..
995    88
996    62
997    59
998    68
999    77
Name: math_score, Length: 1000, dtype: int64

#### Transforming Columns into numerical columns and standardisatio

In [55]:
num_features=X.select_dtypes(exclude="object").columns
cat_features=X.select_dtypes(include='object').columns

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
numeric_transformer=StandardScaler()
oh_transformer=OneHotEncoder()

preprocessor = ColumnTransformer(
  [
    ("OneHotEncoder", oh_transformer, cat_features),
    ("StandardScaler", numeric_transformer, num_features)
  ]
)

In [56]:
X=preprocessor.fit_transform(X)

In [57]:
X.shape

(1000, 19)

In [58]:
from sklearn.model_selection import train_test_split
X_train,y_train,X_test,y_test= train_test_split(X,y,test_size=0.3,random_state=42)
X_train.shape,y_train.shape

((700, 19), (300, 19))

In [59]:
def evaluate_model(true,predicted):
  mae=mean_absolute_error(true,predicted)
  mse=mean_squared_error(true,predicted)
  rmse=np.sqrt(mean_squared_error(true,predicted))
  r2_score= r2_score(true, predicted)
  return mae,rmse,r2_score,mse

In [61]:
models={
  "Linear Regression": LinearRegression(),
  "Lasso": Lasso(),
  "Ridge": Ridge(),
  "K-Neighbors Regressor": KNeighborsRegressor(),
  "Decesion Tree": DecisionTreeRegressor(),
  "Randome Forest Regressor": RandomForestRegressor(),
  "XGBRegressor": XGBRegressor(),
  "Cat Boost regressor": CatBoostRegressor(),
  "Adaboost Regressor": AdaBoostRegressor()

}
model_list=[]
r2_list=[]

for i in range(len(list(models))):
  model = list(models.values())[i]
  model.fit(X_train,y_train)

  y_train_pred= model.predict(X_train)
  y_test_pred= model.predict(X_test)

  model_train_mae, model_train_rmse, model_train_r2= evaluate_model(y_train,y_train_pred)
  model_test_mae, model_test_rmse, model_test_r2= evaluate_model(y_test,y_test_pred)

  print(list(models.keys())[i])
  model_list.append(list(models.keys())[i])
  
  print('Model performance for Training set')
  print("- Root Mean Squared Error: {:.4f}".format(model_train_rmse))
  print("- Mean Absolute Error: {:.4f}".format(model_train_mae))
  print("- R2 Score: {:.4f}".format(model_train_r2))

  print('----------------------------------')
  
  print('Model performance for Test set')
  print("- Root Mean Squared Error: {:.4f}".format(model_test_rmse))
  print("- Mean Absolute Error: {:.4f}".format(model_test_mae))
  print("- R2 Score: {:.4f}".format(model_test_r2))
  r2_list.append(model_test_r2)
  
  print('='*35)
  print('\n')


ValueError: Found input variables with inconsistent numbers of samples: [700, 300]