In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl.metadata (1.5 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting plotly (from catboost)
  Downloading plotly-6.2.0-py3-none-any.whl.metadata (8.5 kB)
Collecting narwhals>=1.15.1 (from plotly->catboost)
  Downloading narwhals-2.0.1-py3-none-any.whl.metadata (11 kB)
Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl (102.4 MB)
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   ---------------------------------------- 0.8/102.4 MB 6.7 MB/s eta 0:00:16
    --------------------------------------- 1.6/102.4 MB 4.0 MB/s eta 0:00:26
   - -------------------------------------- 2.9/102.4 MB 4.2 MB/s eta 0:00:24
   - -------------------------------------- 3.7/102.4 MB 4.1 MB/s eta 0:00:24
   - -------------------------------------- 4.5/102.4 MB 4.1 MB/s eta 0:00:24
   -- ------------------------------------- 5.2/102.4 MB 4.0 MB/s eta 0:0

In [3]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.3-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.3-py3-none-win_amd64.whl (149.9 MB)
   ---------------------------------------- 0.0/149.9 MB ? eta -:--:--
   ---------------------------------------- 0.8/149.9 MB 6.7 MB/s eta 0:00:23
   ---------------------------------------- 1.6/149.9 MB 4.7 MB/s eta 0:00:32
    --------------------------------------- 2.1/149.9 MB 4.5 MB/s eta 0:00:33
    --------------------------------------- 3.1/149.9 MB 4.2 MB/s eta 0:00:36
   - -------------------------------------- 3.9/149.9 MB 4.1 MB/s eta 0:00:36
   - -------------------------------------- 4.7/149.9 MB 4.1 MB/s eta 0:00:36
   - -------------------------------------- 5.5/149.9 MB 4.1 MB/s eta 0:00:36
   - -------------------------------------- 6.3/149.9 MB 4.0 MB/s eta 0:00:36
   - -------------------------------------- 7.3/149.9 MB 4.0 MB/s eta 0:00:36
   -- ------------------------------------- 8.1/149.9 MB 4.0 MB/s eta 0:00:36
 

In [5]:
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

In [6]:
df = pd.read_csv("stud.csv")

In [7]:
df.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [13]:
X = df.drop(columns=['math_score'],axis=1)

In [16]:
X.columns

Index(['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch',
       'test_preparation_course', 'reading_score', 'writing_score'],
      dtype='object')

In [9]:
y = df['math_score']

In [None]:
X.columns

TypeError: 'Index' object is not callable

In [19]:
cat_features = [feature for feature in X.columns if df[feature].dtype=="O"]
num_features = [feature for feature in X.columns if df[feature].dtype!="O"]

In [18]:
num_features,cat_features

(['gender',
  'race_ethnicity',
  'parental_level_of_education',
  'lunch',
  'test_preparation_course'],
 ['reading_score', 'writing_score'])

In [None]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer

numeric_transformer = StandardScaler()
oh_transformer = OneHotEncoder()

preprocessor = ColumnTransformer([("OneHotEncoder",oh_transformer,cat_features),
                                  ("StandardScaler",numeric_transformer,num_features)]
                                )

In [22]:
X = preprocessor.fit_transform(X)

In [23]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20)

In [24]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((800, 19), (200, 19), (800,), (200,))

In [36]:
y_train

631    79
334    83
281    45
863    71
667    77
       ..
853    82
728    73
413    63
825    62
658    43
Name: math_score, Length: 800, dtype: int64

In [40]:
models = [LinearRegression(),
          Ridge(),
          KNeighborsRegressor(),
          DecisionTreeRegressor()
          ]
for i in range(len(models)):
    print(models[i])

LinearRegression()
Ridge()
KNeighborsRegressor()
DecisionTreeRegressor()


In [42]:
models = [LinearRegression(),
          Ridge(),
          KNeighborsRegressor(),
          DecisionTreeRegressor()
          ]

for i in range(len(models)):
    model_train = models[i].fit(X_train,y_train)
    pred_y_train = model_train.predict(X_train)
    mse_training_model = mean_squared_error(pred_y_train,y_train)
    r2_training_model = r2_score(pred_y_train,y_train)
    mae_training_model = mean_absolute_error(pred_y_train,y_train)
    pred_y_test = model_train.predict(X_test)
    mse_testing_model = mean_squared_error(pred_y_test,y_test)
    r2_testing_model = r2_score(pred_y_test,y_test)
    mae_testing_model = mean_absolute_error(pred_y_test,y_test)
    print(list(models)[i])
    print("Training_Accuracy")
    print(f"r2_score:{r2_training_model}")
    print(f"mean_squared_error:{mse_training_model}")
    print(f"mean_absolute_error:{mae_training_model}")
    print("--"*32)
    print("Testing_Accuracy")
    print(f"r2_score:{r2_testing_model }")
    print(f"mean_squared_error:{mse_testing_model}")
    print(f"mean_absolute_error:{mae_testing_model}")
    print("**"*32)

LinearRegression()
Training_Accuracy
r2_score:0.8554910194681247
mean_squared_error:28.53761662244453
mean_absolute_error:4.272746564668031
----------------------------------------------------------------
Testing_Accuracy
r2_score:0.863348853837397
mean_squared_error:27.869228061053835
mean_absolute_error:4.171352082349253
****************************************************************
Ridge()
Training_Accuracy
r2_score:0.8550149746745288
mean_squared_error:28.540574178114237
mean_absolute_error:4.27121511997466
----------------------------------------------------------------
Testing_Accuracy
r2_score:0.8629044187934044
mean_squared_error:27.843798756382416
mean_absolute_error:4.170968919954931
****************************************************************
KNeighborsRegressor()
Training_Accuracy
r2_score:0.7832613054753079
mean_squared_error:34.3203
mean_absolute_error:4.704
----------------------------------------------------------------
Testing_Accuracy
r2_score:0.6726711391575511