In [49]:
from xgboost import XGBClassifier, XGBRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from catboost import CatBoostClassifier, CatBoostRegressor

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report,log_loss,r2_score,mean_squared_error
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression,LinearRegression,ElasticNet
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer

In [19]:

#classification models
dtc = DecisionTreeClassifier(random_state=25,max_depth=1)
knn = KNeighborsClassifier()
nb = GaussianNB()
lR = LogisticRegression()

#Regression models
lr = LinearRegression()
dtr = DecisionTreeRegressor(random_state=25,max_depth=1)
en = ElasticNet()

<h1 style = 'color : Orange'>XG Boost</h1>

In [13]:
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')

X = sonar.drop('Class',axis = 1)
y = sonar['Class']
le = LabelEncoder()

y_le = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y_le,random_state=25,test_size=0.3,stratify=y)

xgb = XGBClassifier(learning_rate=0.8,n_estimators=50,max_depth=3,random_state= 25)
xgb.fit(X_train,y_train)
y_pred = xgb.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.82      0.91      0.86        34
           1       0.88      0.76      0.81        29

    accuracy                           0.84        63
   macro avg       0.85      0.84      0.84        63
weighted avg       0.85      0.84      0.84        63



In [15]:
# with different learning rate,deapth,estimators

sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')

X = sonar.drop('Class',axis = 1)
y = sonar['Class']


le = LabelEncoder()
y_le = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y_le,random_state=25,test_size=0.3,stratify=y)

learning_rates = np.linspace(0.01,0.8,20)
n_est = [50,100,200]
depths = [3,5,None]
scores = []

for n in n_est:
    for l in learning_rates:
        for d in depths:
            xgb = XGBClassifier(random_state=25,learning_rate=l,n_estimators=n,max_depth=d)
            xgb.fit(X_train,y_train)
            y_pred = xgb.predict_proba(X_test)
            scores.append([l,n,d,log_loss(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','log loss'])
scores.sort_values('log loss')

Unnamed: 0,Learning rate,N estimators,Deapth,log loss
23,0.301053,50,,0.422442
22,0.301053,50,5.0,0.422442
49,0.675263,50,5.0,0.429255
50,0.675263,50,,0.429255
40,0.550526,50,5.0,0.431690
...,...,...,...,...
1,0.010000,50,5.0,0.577834
2,0.010000,50,,0.577834
0,0.010000,50,3.0,0.588441
153,0.467368,200,3.0,0.599991


<h1 style = 'color : Orange'>Light GBM</h1>

In [20]:
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')

X = sonar.drop('Class',axis = 1)
y = sonar['Class']
le = LabelEncoder()

y_le = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y_le,random_state=25,test_size=0.3,stratify=y)

lgbm = LGBMClassifier(learning_rate=0.8,n_estimators=50,max_depth=3,random_state= 25,verbose = -1)
lgbm.fit(X_train,y_train)
y_pred = lgbm.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.74      0.94      0.83        34
           1       0.90      0.62      0.73        29

    accuracy                           0.79        63
   macro avg       0.82      0.78      0.78        63
weighted avg       0.82      0.79      0.79        63



In [28]:
# with different learning rate,deapth,estimators

sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')

X = sonar.drop('Class',axis = 1)
y = sonar['Class']


le = LabelEncoder()
y_le = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y_le,random_state=25,test_size=0.3,stratify=y)

learning_rates = np.linspace(0.01,0.8,20)
n_est = [50,100,200]
depths = [3,5,None]
scores = []

for n in n_est:
    for l in learning_rates:
        for d in depths:
            lbgm = LGBMClassifier(random_state=25,learning_rate=l,n_estimators=n,max_depth=d,verbose = -1)
            lbgm.fit(X_train,y_train)
            y_pred = lbgm.predict_proba(X_test)
            scores.append([l,n,d,log_loss(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','log loss'])
scores.sort_values('log loss')

Unnamed: 0,Learning rate,N estimators,Deapth,log loss
7,0.093158,50,5.0,0.390546
8,0.093158,50,,0.390546
10,0.134737,50,5.0,0.393635
11,0.134737,50,,0.393635
13,0.176316,50,5.0,0.398391
...,...,...,...,...
54,0.758421,50,3.0,1.136319
109,0.675263,100,5.0,1.155843
110,0.675263,100,,1.155843
169,0.675263,200,5.0,1.155843


<h1 style = 'color : Orange'>Cat Boost</h1>

In [33]:
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')

X = sonar.drop('Class',axis = 1)
y = sonar['Class']
le = LabelEncoder()

y_le = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y_le,random_state=25,test_size=0.3,stratify=y)

cb = CatBoostClassifier(learning_rate=0.8,n_estimators=50,max_depth=3,random_state= 25,verbose = 0)
cb.fit(X_train,y_train)
y_pred = cb.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.69      0.85      0.76        34
           1       0.76      0.55      0.64        29

    accuracy                           0.71        63
   macro avg       0.73      0.70      0.70        63
weighted avg       0.72      0.71      0.71        63



In [35]:
# with different learning rate,deapth,estimators

sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')
sonar = pd.read_csv('../Cases/Sonar/Sonar.csv')

X = sonar.drop('Class',axis = 1)
y = sonar['Class']


le = LabelEncoder()
y_le = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y_le,random_state=25,test_size=0.3,stratify=y)

learning_rates = np.linspace(0.01,0.8,20)
n_est = [50,100,200]
depths = [3,5,None]
scores = []

for n in n_est:
    for l in learning_rates:
        for d in depths:
            cb = CatBoostClassifier(random_state=25,learning_rate=l,n_estimators=n,max_depth=d,verbose = 0)
            cb.fit(X_train,y_train)
            y_pred = cb.predict_proba(X_test)
            scores.append([l,n,d,log_loss(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','log loss'])
scores.sort_values('log loss')

Unnamed: 0,Learning rate,N estimators,Deapth,log loss
66,0.093158,100,3.0,0.399040
64,0.051579,100,5.0,0.401615
67,0.093158,100,5.0,0.405820
125,0.051579,200,,0.408290
124,0.051579,200,5.0,0.409212
...,...,...,...,...
157,0.508947,200,5.0,0.786656
114,0.758421,100,3.0,0.791089
177,0.800000,200,3.0,0.815231
117,0.800000,100,3.0,0.837950


<h1 style = 'color : Orange'>Regressors</h1>

In [76]:
house = pd.read_csv('../Datasets/Housing.csv')
X = house.drop('price', axis=1)
y = house['price']

encoder = OneHotEncoder(sparse_output=False).set_output(transform='pandas')
# transformer = make_column_transformer((encoder, make_column_selector(dtype_include=object))).set_output(transform='pandas')

X = encoder.fit_transform(X)

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=25,test_size=0.3)



In [77]:
learning_rates = np.linspace(0.01,0.8,20)
n_est = [50,100,200]
depths = [3,5,None]

scores = []
for n in n_est:
    for l in learning_rates:
        for d in depths:
            xgb = XGBRegressor(random_state=25,learning_rate=l,n_estimators=n,max_depth=d)
            xgb.fit(X_train,y_train)
            y_pred = xgb.predict(X_test)
            scores.append([l,n,d,r2_score(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','score'])
scores.sort_values('score', ascending=False)

Unnamed: 0,Learning rate,N estimators,Deapth,score
30,0.425789,50,3.0,0.630222
90,0.425789,100,3.0,0.621723
129,0.134737,200,3.0,0.618699
150,0.425789,200,3.0,0.613751
126,0.093158,200,3.0,0.613574
...,...,...,...,...
62,0.010000,100,,0.398226
60,0.010000,100,3.0,0.392327
1,0.010000,50,5.0,0.273251
2,0.010000,50,,0.272740


In [79]:
scores = []
for n in n_est:
    for l in learning_rates:
        for d in depths:
            lgbm = LGBMRegressor(random_state=25,learning_rate=l,n_estimators=n,max_depth=d, verbose=-1)
            lgbm.fit(X_train,y_train)
            y_pred = lgbm.predict(X_test)
            scores.append([l,n,d,r2_score(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','score'])
scores.sort_values('score', ascending=False)

Unnamed: 0,Learning rate,N estimators,Deapth,score
12,0.176316,50,3.0,0.574931
7,0.093158,50,5.0,0.571944
63,0.051579,100,3.0,0.571488
9,0.134737,50,3.0,0.570287
64,0.051579,100,5.0,0.568341
...,...,...,...,...
178,0.800000,200,5.0,0.218786
173,0.716842,200,,0.209035
176,0.758421,200,,0.208229
175,0.758421,200,5.0,0.207492


In [80]:
scores = []
for n in n_est:
    for l in learning_rates:
        for d in depths:
            catboost = CatBoostRegressor(random_state=25,learning_rate=l,n_estimators=n,max_depth=d, verbose=0)
            catboost.fit(X_train,y_train)
            y_pred = catboost.predict(X_test)
            scores.append([l,n,d,r2_score(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','score'])
scores.sort_values('score', ascending=False)

Unnamed: 0,Learning rate,N estimators,Deapth,score
93,0.467368,100,3.0,0.655143
153,0.467368,200,3.0,0.654404
148,0.384211,200,5.0,0.648866
162,0.592105,200,3.0,0.648692
147,0.384211,200,3.0,0.648588
...,...,...,...,...
62,0.010000,100,,0.373143
60,0.010000,100,3.0,0.369748
1,0.010000,50,5.0,0.245966
2,0.010000,50,,0.236001


In [91]:
house = pd.read_csv('../Datasets/Housing.csv')
X = house.drop('price', axis=1)
y = house['price']

categories = X.columns[X.dtypes == object].tolist()
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=25,test_size=0.3)



scores = []
for n in n_est:
    for l in learning_rates:
        for d in depths:
            catboost = CatBoostRegressor(random_state=25,learning_rate=l,n_estimators=n,max_depth=d, verbose=0)
            catboost.fit(X_train,y_train, cat_features=categories)
            y_pred = catboost.predict(X_test)
            scores.append([l,n,d,r2_score(y_test,y_pred)])
scores = pd.DataFrame(scores,columns=['Learning rate','N estimators','Deapth','score'])
scores.sort_values('score', ascending=False)

Unnamed: 0,Learning rate,N estimators,Deapth,score
71,0.134737,100,,0.638831
66,0.093158,100,3.0,0.635498
11,0.134737,50,,0.635407
65,0.051579,100,,0.634436
123,0.051579,200,3.0,0.632475
...,...,...,...,...
62,0.010000,100,,0.434501
60,0.010000,100,3.0,0.416712
1,0.010000,50,5.0,0.283400
2,0.010000,50,,0.275816
