In [3]:
import optuna
import pandas as pd
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [4]:
data = pd.read_csv("dataset\Admission_Predict.csv")

In [5]:
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No.         400 non-null    int64  
 1   GRE Score          400 non-null    int64  
 2   TOEFL Score        400 non-null    int64  
 3   University Rating  400 non-null    int64  
 4   SOP                400 non-null    float64
 5   LOR                400 non-null    float64
 6   CGPA               400 non-null    float64
 7   Research           400 non-null    int64  
 8   Chance of Admit    400 non-null    float64
dtypes: float64(4), int64(5)
memory usage: 28.2 KB


In [7]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Serial No.,400.0,200.5,115.614301,1.0,100.75,200.5,300.25,400.0
GRE Score,400.0,316.8075,11.473646,290.0,308.0,317.0,325.0,340.0
TOEFL Score,400.0,107.41,6.069514,92.0,103.0,107.0,112.0,120.0
University Rating,400.0,3.0875,1.143728,1.0,2.0,3.0,4.0,5.0
SOP,400.0,3.4,1.006869,1.0,2.5,3.5,4.0,5.0
LOR,400.0,3.4525,0.898478,1.0,3.0,3.5,4.0,5.0
CGPA,400.0,8.598925,0.596317,6.8,8.17,8.61,9.0625,9.92
Research,400.0,0.5475,0.498362,0.0,0.0,1.0,1.0,1.0
Chance of Admit,400.0,0.72435,0.142609,0.34,0.64,0.73,0.83,0.97


In [8]:
data.isnull().sum()

Serial No.           0
GRE Score            0
TOEFL Score          0
University Rating    0
SOP                  0
LOR                  0
CGPA                 0
Research             0
Chance of Admit      0
dtype: int64

In [10]:
X = data.drop(["Serial No.", "Chance of Admit "], axis=1)

In [12]:
y = data["Chance of Admit "]

In [13]:
y.head()

0    0.92
1    0.76
2    0.72
3    0.80
4    0.65
Name: Chance of Admit , dtype: float64

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=25)

In [15]:
sc = StandardScaler()

In [18]:
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

In [30]:
def objective(trail, data=X, target=y):
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25, random_state=25)
    param = {
        "tree_method": "auto", # {'approx', 'auto', 'exact', 'gpu_hist', 'hist'}
        "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
        "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),
        "colsample_bytree": trail.suggest_categorical('colsample_bytree', [.1, .2, .3, .4, .5, .6, .7, .8, .9, 1]),
        "subsample": trail.suggest_categorical('subsample', [.1, .2, .3, .4, .5, .6, .7, .8, .9, 1]),
        "learning_rate": trail.suggest_categorical('learning_rate', [.00001, .0003, .008, .02, .01, 1, 8]),
        "n_estimators": 3000,
        "max_depth": trail.suggest_categorical('max_depth', [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
        "random_state": trail.suggest_categorical('random_state', [10, 20, 30, 2000, 3454, 243123]),
        "min_child_weight": trail.suggest_int("min_child_weight", 1, 200)
    }

    model = xgb.XGBRegressor(**param)
    model.fit(train_x, train_y, eval_set=[(test_x, test_y)], verbose=True)
    pred = model.predict(test_x)
    mse = mean_squared_error(test_y, pred)
    return mse

In [31]:
find_params=optuna.create_study()
find_params.optimize(objective, n_trials=10)
find_params.best_trial.params

[I 2024-08-03 12:42:16,888] A new study created in memory with name: no-name-ab4e3d1d-cc51-4b15-8cf5-87d40e24e128


[0]	validation_0-rmse:0.15112
[1]	validation_0-rmse:0.15028
[2]	validation_0-rmse:0.14948
[3]	validation_0-rmse:0.14898
[4]	validation_0-rmse:0.14830
[5]	validation_0-rmse:0.14772


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[6]	validation_0-rmse:0.14716
[7]	validation_0-rmse:0.14655
[8]	validation_0-rmse:0.14600
[9]	validation_0-rmse:0.14534
[10]	validation_0-rmse:0.14473
[11]	validation_0-rmse:0.14395
[12]	validation_0-rmse:0.14329
[13]	validation_0-rmse:0.14260
[14]	validation_0-rmse:0.14205
[15]	validation_0-rmse:0.14136
[16]	validation_0-rmse:0.14082
[17]	validation_0-rmse:0.14019
[18]	validation_0-rmse:0.13949
[19]	validation_0-rmse:0.13899
[20]	validation_0-rmse:0.13836
[21]	validation_0-rmse:0.13771
[22]	validation_0-rmse:0.13707
[23]	validation_0-rmse:0.13659
[24]	validation_0-rmse:0.13600
[25]	validation_0-rmse:0.13543
[26]	validation_0-rmse:0.13484
[27]	validation_0-rmse:0.13420
[28]	validation_0-rmse:0.13366
[29]	validation_0-rmse:0.13307
[30]	validation_0-rmse:0.13258
[31]	validation_0-rmse:0.13206
[32]	validation_0-rmse:0.13162
[33]	validation_0-rmse:0.13123
[34]	validation_0-rmse:0.13064
[35]	validation_0-rmse:0.13004
[36]	validation_0-rmse:0.12955
[37]	validation_0-rmse:0.12905
[38]	validat

[I 2024-08-03 12:43:10,323] Trial 0 finished with value: 0.0049207508572086025 and parameters: {'lambda': 0.04484249268881329, 'alpha': 0.004967066999970753, 'colsample_bytree': 0.7, 'subsample': 0.5, 'learning_rate': 0.008, 'max_depth': 4, 'random_state': 10, 'min_child_weight': 35}. Best is trial 0 with value: 0.0049207508572086025.


[0]	validation_0-rmse:0.15173
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15173
[3]	validation_0-rmse:0.15173
[4]	validation_0-rmse:0.15173
[5]	validation_0-rmse:0.15173


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[6]	validation_0-rmse:0.15173
[7]	validation_0-rmse:0.15173
[8]	validation_0-rmse:0.15173
[9]	validation_0-rmse:0.15173
[10]	validation_0-rmse:0.15173
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15173
[13]	validation_0-rmse:0.15173
[14]	validation_0-rmse:0.15173
[15]	validation_0-rmse:0.15173
[16]	validation_0-rmse:0.15173
[17]	validation_0-rmse:0.15173
[18]	validation_0-rmse:0.15173
[19]	validation_0-rmse:0.15173
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15173
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15173
[25]	validation_0-rmse:0.15173
[26]	validation_0-rmse:0.15173
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15173
[32]	validation_0-rmse:0.15173
[33]	validation_0-rmse:0.15173
[34]	validation_0-rmse:0.15173
[35]	validation_0-rmse:0.15173
[36]	validation_0-rmse:0.15173
[37]	validation_0-rmse:0.15173
[38]	validat

[I 2024-08-03 12:44:21,235] Trial 1 finished with value: 0.02302184499679639 and parameters: {'lambda': 0.00012536371512610265, 'alpha': 0.8997962680334993, 'colsample_bytree': 0.8, 'subsample': 0.8, 'learning_rate': 0.01, 'max_depth': 5, 'random_state': 10, 'min_child_weight': 158}. Best is trial 0 with value: 0.0049207508572086025.
  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[0]	validation_0-rmse:0.15091
[1]	validation_0-rmse:0.15020
[2]	validation_0-rmse:0.14939
[3]	validation_0-rmse:0.14896
[4]	validation_0-rmse:0.14833
[5]	validation_0-rmse:0.14793
[6]	validation_0-rmse:0.14750
[7]	validation_0-rmse:0.14683
[8]	validation_0-rmse:0.14624
[9]	validation_0-rmse:0.14553
[10]	validation_0-rmse:0.14496
[11]	validation_0-rmse:0.14460
[12]	validation_0-rmse:0.14377
[13]	validation_0-rmse:0.14311
[14]	validation_0-rmse:0.14255
[15]	validation_0-rmse:0.14208
[16]	validation_0-rmse:0.14160
[17]	validation_0-rmse:0.14101
[18]	validation_0-rmse:0.14042
[19]	validation_0-rmse:0.13996
[20]	validation_0-rmse:0.13920
[21]	validation_0-rmse:0.13867
[22]	validation_0-rmse:0.13812
[23]	validation_0-rmse:0.13769
[24]	validation_0-rmse:0.13721
[25]	validation_0-rmse:0.13662
[26]	validation_0-rmse:0.13630
[27]	validation_0-rmse:0.13566
[28]	validation_0-rmse:0.13505
[29]	validation_0-rmse:0.13460
[30]	validation_0-rmse:0.13417
[31]	validation_0-rmse:0.13357
[32]	validation_0-

[I 2024-08-03 12:45:26,973] Trial 2 finished with value: 0.0047339128137354695 and parameters: {'lambda': 2.171092612858092, 'alpha': 0.0006434751422041868, 'colsample_bytree': 0.4, 'subsample': 0.3, 'learning_rate': 0.008, 'max_depth': 7, 'random_state': 3454, 'min_child_weight': 15}. Best is trial 2 with value: 0.0047339128137354695.


[0]	validation_0-rmse:0.15173
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15173
[3]	validation_0-rmse:0.15173
[4]	validation_0-rmse:0.15173
[5]	validation_0-rmse:0.15173
[6]	validation_0-rmse:0.15173


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[7]	validation_0-rmse:0.15173
[8]	validation_0-rmse:0.15173
[9]	validation_0-rmse:0.15173
[10]	validation_0-rmse:0.15173
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15173
[13]	validation_0-rmse:0.15173
[14]	validation_0-rmse:0.15173
[15]	validation_0-rmse:0.15173
[16]	validation_0-rmse:0.15173
[17]	validation_0-rmse:0.15173
[18]	validation_0-rmse:0.15173
[19]	validation_0-rmse:0.15173
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15173
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15173
[25]	validation_0-rmse:0.15173
[26]	validation_0-rmse:0.15173
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15173
[32]	validation_0-rmse:0.15173
[33]	validation_0-rmse:0.15173
[34]	validation_0-rmse:0.15173
[35]	validation_0-rmse:0.15173
[36]	validation_0-rmse:0.15173
[37]	validation_0-rmse:0.15173
[38]	validation_0-rmse:0.15173
[39]	valida

[I 2024-08-03 12:46:29,801] Trial 3 finished with value: 0.02303801401559398 and parameters: {'lambda': 0.00139453553640973, 'alpha': 0.726967552281977, 'colsample_bytree': 0.1, 'subsample': 0.5, 'learning_rate': 1, 'max_depth': 6, 'random_state': 20, 'min_child_weight': 168}. Best is trial 2 with value: 0.0047339128137354695.


[0]	validation_0-rmse:0.15173
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15173
[3]	validation_0-rmse:0.15173
[4]	validation_0-rmse:0.15173
[5]	validation_0-rmse:0.15173
[6]	validation_0-rmse:0.15173
[7]	validation_0-rmse:0.15173
[8]	validation_0-rmse:0.15173
[9]	validation_0-rmse:0.15173
[10]	validation_0-rmse:0.15173
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15173
[13]	validation_0-rmse:0.15173


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[14]	validation_0-rmse:0.15173
[15]	validation_0-rmse:0.15173
[16]	validation_0-rmse:0.15173
[17]	validation_0-rmse:0.15173
[18]	validation_0-rmse:0.15173
[19]	validation_0-rmse:0.15173
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15173
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15173
[25]	validation_0-rmse:0.15173
[26]	validation_0-rmse:0.15173
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15173
[32]	validation_0-rmse:0.15173
[33]	validation_0-rmse:0.15173
[34]	validation_0-rmse:0.15173
[35]	validation_0-rmse:0.15173
[36]	validation_0-rmse:0.15173
[37]	validation_0-rmse:0.15173
[38]	validation_0-rmse:0.15173
[39]	validation_0-rmse:0.15173
[40]	validation_0-rmse:0.15173
[41]	validation_0-rmse:0.15173
[42]	validation_0-rmse:0.15173
[43]	validation_0-rmse:0.15173
[44]	validation_0-rmse:0.15173
[45]	validation_0-rmse:0.15173
[46]	val

[I 2024-08-03 12:47:47,979] Trial 4 finished with value: 0.023022244426214495 and parameters: {'lambda': 0.02406494140485643, 'alpha': 0.0007066069094560425, 'colsample_bytree': 0.2, 'subsample': 0.2, 'learning_rate': 0.01, 'max_depth': 3, 'random_state': 2000, 'min_child_weight': 164}. Best is trial 2 with value: 0.0047339128137354695.


[0]	validation_0-rmse:0.15173
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15173


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[3]	validation_0-rmse:0.15173
[4]	validation_0-rmse:0.15173
[5]	validation_0-rmse:0.15173
[6]	validation_0-rmse:0.15173
[7]	validation_0-rmse:0.15173
[8]	validation_0-rmse:0.15173
[9]	validation_0-rmse:0.15173
[10]	validation_0-rmse:0.15173
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15173
[13]	validation_0-rmse:0.15173
[14]	validation_0-rmse:0.15173
[15]	validation_0-rmse:0.15173
[16]	validation_0-rmse:0.15173
[17]	validation_0-rmse:0.15173
[18]	validation_0-rmse:0.15173
[19]	validation_0-rmse:0.15173
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15173
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15173
[25]	validation_0-rmse:0.15173
[26]	validation_0-rmse:0.15173
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15173
[32]	validation_0-rmse:0.15173
[33]	validation_0-rmse:0.15173
[34]	validation_0-rmse:0.15173
[35]	validation

[I 2024-08-03 12:49:08,817] Trial 5 finished with value: 0.023022244426214495 and parameters: {'lambda': 0.0012404281771776421, 'alpha': 0.038768502534155445, 'colsample_bytree': 1, 'subsample': 0.2, 'learning_rate': 0.01, 'max_depth': 9, 'random_state': 20, 'min_child_weight': 194}. Best is trial 2 with value: 0.0047339128137354695.


[0]	validation_0-rmse:0.15178
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15190
[3]	validation_0-rmse:0.15203
[4]	validation_0-rmse:0.15180
[5]	validation_0-rmse:0.15188
[6]	validation_0-rmse:0.15173
[7]	validation_0-rmse:0.15185


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[8]	validation_0-rmse:0.15174
[9]	validation_0-rmse:0.15174
[10]	validation_0-rmse:0.15177
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15183
[13]	validation_0-rmse:0.15175
[14]	validation_0-rmse:0.15174
[15]	validation_0-rmse:0.15185
[16]	validation_0-rmse:0.15178
[17]	validation_0-rmse:0.15175
[18]	validation_0-rmse:0.15187
[19]	validation_0-rmse:0.15185
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15174
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15178
[25]	validation_0-rmse:0.15174
[26]	validation_0-rmse:0.15180
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15186
[32]	validation_0-rmse:0.15179
[33]	validation_0-rmse:0.15175
[34]	validation_0-rmse:0.15174
[35]	validation_0-rmse:0.15176
[36]	validation_0-rmse:0.15175
[37]	validation_0-rmse:0.15192
[38]	validation_0-rmse:0.15176
[39]	validation_0-rmse:0.15178
[40]	valid

[I 2024-08-03 12:49:58,632] Trial 6 finished with value: 0.02302242879533329 and parameters: {'lambda': 0.0024978521289077908, 'alpha': 0.00036561445042064425, 'colsample_bytree': 0.1, 'subsample': 0.8, 'learning_rate': 1, 'max_depth': 4, 'random_state': 2000, 'min_child_weight': 169}. Best is trial 2 with value: 0.0047339128137354695.


[0]	validation_0-rmse:0.15088
[1]	validation_0-rmse:0.15004
[2]	validation_0-rmse:0.14921
[3]	validation_0-rmse:0.14873
[4]	validation_0-rmse:0.14795
[5]	validation_0-rmse:0.14748


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[6]	validation_0-rmse:0.14700
[7]	validation_0-rmse:0.14619
[8]	validation_0-rmse:0.14551
[9]	validation_0-rmse:0.14477
[10]	validation_0-rmse:0.14408
[11]	validation_0-rmse:0.14369
[12]	validation_0-rmse:0.14296
[13]	validation_0-rmse:0.14224
[14]	validation_0-rmse:0.14180
[15]	validation_0-rmse:0.14142
[16]	validation_0-rmse:0.14099
[17]	validation_0-rmse:0.14033
[18]	validation_0-rmse:0.13959
[19]	validation_0-rmse:0.13895
[20]	validation_0-rmse:0.13827
[21]	validation_0-rmse:0.13768
[22]	validation_0-rmse:0.13701
[23]	validation_0-rmse:0.13666
[24]	validation_0-rmse:0.13604
[25]	validation_0-rmse:0.13538
[26]	validation_0-rmse:0.13504
[27]	validation_0-rmse:0.13434
[28]	validation_0-rmse:0.13370
[29]	validation_0-rmse:0.13319
[30]	validation_0-rmse:0.13260
[31]	validation_0-rmse:0.13202
[32]	validation_0-rmse:0.13148
[33]	validation_0-rmse:0.13116
[34]	validation_0-rmse:0.13063
[35]	validation_0-rmse:0.13002
[36]	validation_0-rmse:0.12941
[37]	validation_0-rmse:0.12894
[38]	validat

[I 2024-08-03 12:50:56,390] Trial 7 finished with value: 0.004683172723299316 and parameters: {'lambda': 0.7348079202263718, 'alpha': 0.025518117492534194, 'colsample_bytree': 0.4, 'subsample': 1, 'learning_rate': 0.008, 'max_depth': 12, 'random_state': 243123, 'min_child_weight': 51}. Best is trial 7 with value: 0.004683172723299316.


[0]	validation_0-rmse:0.15173
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15173
[3]	validation_0-rmse:0.15173
[4]	validation_0-rmse:0.15173
[5]	validation_0-rmse:0.15173


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[6]	validation_0-rmse:0.15173
[7]	validation_0-rmse:0.15173
[8]	validation_0-rmse:0.15173
[9]	validation_0-rmse:0.15173
[10]	validation_0-rmse:0.15173
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15173
[13]	validation_0-rmse:0.15173
[14]	validation_0-rmse:0.15173
[15]	validation_0-rmse:0.15173
[16]	validation_0-rmse:0.15173
[17]	validation_0-rmse:0.15173
[18]	validation_0-rmse:0.15173
[19]	validation_0-rmse:0.15173
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15173
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15173
[25]	validation_0-rmse:0.15173
[26]	validation_0-rmse:0.15173
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15173
[32]	validation_0-rmse:0.15173
[33]	validation_0-rmse:0.15173
[34]	validation_0-rmse:0.15173
[35]	validation_0-rmse:0.15173
[36]	validation_0-rmse:0.15173
[37]	validation_0-rmse:0.15173
[38]	validat

[I 2024-08-03 12:51:57,338] Trial 8 finished with value: 0.023022239432270757 and parameters: {'lambda': 0.019667395504607457, 'alpha': 0.014991589632984244, 'colsample_bytree': 0.4, 'subsample': 0.7, 'learning_rate': 1e-05, 'max_depth': 4, 'random_state': 3454, 'min_child_weight': 157}. Best is trial 7 with value: 0.004683172723299316.


[0]	validation_0-rmse:0.15173
[1]	validation_0-rmse:0.15173
[2]	validation_0-rmse:0.15173
[3]	validation_0-rmse:0.15173
[4]	validation_0-rmse:0.15173
[5]	validation_0-rmse:0.15173


  "lambda": trail.suggest_loguniform('lambda', 1e-4, 10.0),
  "alpha": trail.suggest_loguniform('alpha', 1e-4, 10.0),


[6]	validation_0-rmse:0.15173
[7]	validation_0-rmse:0.15173
[8]	validation_0-rmse:0.15173
[9]	validation_0-rmse:0.15173
[10]	validation_0-rmse:0.15173
[11]	validation_0-rmse:0.15173
[12]	validation_0-rmse:0.15173
[13]	validation_0-rmse:0.15173
[14]	validation_0-rmse:0.15173
[15]	validation_0-rmse:0.15173
[16]	validation_0-rmse:0.15173
[17]	validation_0-rmse:0.15173
[18]	validation_0-rmse:0.15173
[19]	validation_0-rmse:0.15173
[20]	validation_0-rmse:0.15173
[21]	validation_0-rmse:0.15173
[22]	validation_0-rmse:0.15173
[23]	validation_0-rmse:0.15173
[24]	validation_0-rmse:0.15173
[25]	validation_0-rmse:0.15173
[26]	validation_0-rmse:0.15173
[27]	validation_0-rmse:0.15173
[28]	validation_0-rmse:0.15173
[29]	validation_0-rmse:0.15173
[30]	validation_0-rmse:0.15173
[31]	validation_0-rmse:0.15173
[32]	validation_0-rmse:0.15173
[33]	validation_0-rmse:0.15173
[34]	validation_0-rmse:0.15173
[35]	validation_0-rmse:0.15173
[36]	validation_0-rmse:0.15173
[37]	validation_0-rmse:0.15173
[38]	validat

[I 2024-08-03 12:52:40,184] Trial 9 finished with value: 0.023024072334074976 and parameters: {'lambda': 0.004662083450761026, 'alpha': 0.05949985666780798, 'colsample_bytree': 0.5, 'subsample': 0.5, 'learning_rate': 0.01, 'max_depth': 8, 'random_state': 30, 'min_child_weight': 143}. Best is trial 7 with value: 0.004683172723299316.


{'lambda': 0.7348079202263718,
 'alpha': 0.025518117492534194,
 'colsample_bytree': 0.4,
 'subsample': 1,
 'learning_rate': 0.008,
 'max_depth': 12,
 'random_state': 243123,
 'min_child_weight': 51}

In [45]:
best_params1 = {
    'lambda': 1.10382994152563448,
    'alpha' : 0.2002246174362676,
    'colsample_bytree': 0.3,
    'subsample': 0.9,
    'learning_rate': 0.008,
    'max_depth': 4,
    'random_state':3454,
    'min_child_weight': 30
}

In [46]:
best_params = {'lambda': 0.7348079202263718,
 'alpha': 0.025518117492534194,
 'colsample_bytree': 0.4,
 'subsample': 1,
 'learning_rate': 0.008,
 'max_depth': 12,
 'random_state': 243123,
 'min_child_weight': 51}

In [42]:
from sklearn.metrics import r2_score

In [49]:
model = xgb.XGBRegressor(**best_params)
model.fit(X_train_sc, y_train)
pred_train = model.predict(X_train_sc)
pred_test = model.predict(X_test_sc)
mse_train = mean_squared_error(y_train, pred_train)
mse_test = mean_squared_error(y_test, pred_test)
print("MSE train:", mse_train)
print("MSE test:", mse_test)
print('-'*25)
print("RMSE train:", np.sqrt(mse_train))
print("RMSE test:", np.sqrt(mse_test))
print('-'*25)
print("R2 train:", r2_score(y_train, pred_train))
print("R2 test:", r2_score(y_test, pred_test))

MSE train: 0.008482528591370062
MSE test: 0.010799997029211543
-------------------------
RMSE train: 0.09210064381626255
RMSE test: 0.10392303416091903
-------------------------
R2 train: 0.5621874529180377
R2 test: 0.5308624388725951


In [50]:
from sklearn.ensemble import RandomForestRegressor

In [51]:
model = RandomForestRegressor()
model.fit(X_train_sc, y_train)
pred_train = model.predict(X_train_sc)
pred_test = model.predict(X_test_sc)
mse_train = mean_squared_error(y_train, pred_train)
mse_test = mean_squared_error(y_test, pred_test)
print("MSE train:", mse_train)
print("MSE test:", mse_test)
print('-'*25)
print("RMSE train:", np.sqrt(mse_train))
print("RMSE test:", np.sqrt(mse_test))
print('-'*25)
print("R2 train:", r2_score(y_train, pred_train))
print("R2 test:", r2_score(y_test, pred_test))

MSE train: 0.0006682105666666666
MSE test: 0.004915453900000006
-------------------------
RMSE train: 0.025849769180142916
RMSE test: 0.07011029810234731
-------------------------
R2 train: 0.9655113487649132
R2 test: 0.7864791954809875
