## Importing the modules

In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load the dataset
df = pd.read_csv("abroad  - Sheet1.csv")

## Data preprocessing

In [3]:
# Step 2: Convert non-numeric values in "FEES" to NaN and handle missing values
df["FEES"] = pd.to_numeric(df["FEES"], errors='coerce')
df["FEES"].fillna(df["FEES"].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["FEES"].fillna(df["FEES"].median(), inplace=True)


In [4]:
# Step 3: Apply Label Encoding to categorical columns
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['COUNTRY'] = label_encoder.fit_transform(df['COUNTRY'])
df['COURSE TYPE'] = label_encoder.fit_transform(df['COURSE TYPE'])
df['COURSE (SPECIALIZATION)'] = label_encoder.fit_transform(df['COURSE (SPECIALIZATION)'])

In [5]:
# Step 4: Prepare features and target variable
X = df[['COUNTRY', 'COURSE TYPE', 'COURSE (SPECIALIZATION)']]  # Features
y = df['FEES']  # Target

## Training the model

In [6]:
# Step 5: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Define the objective function for Hyperopt
def objective(params):
    gbm = lgb.LGBMRegressor(
        objective='regression',
        metric='rmse',
        boosting_type='gbdt',
        num_leaves=max(int(params['num_leaves']), 2),  # Ensure num_leaves is at least 2
        learning_rate=params['learning_rate'],
        feature_fraction=params['feature_fraction'],
        bagging_fraction=params['bagging_fraction'],
        bagging_freq=int(params['bagging_freq']),
        max_depth=int(params['max_depth']),
        n_estimators=100
    )
    
    gbm.fit(X_train, y_train)
    y_pred = gbm.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    
    return {'loss': rmse, 'status': STATUS_OK}


In [8]:
# Define the search space for hyperparameters
space = {
    'num_leaves': hp.quniform('num_leaves', 2, 256, 1),  # Ensure minimum value is 2
    'learning_rate': hp.loguniform('learning_rate', -5, -1),
    'feature_fraction': hp.uniform('feature_fraction', 0.5, 1.0),
    'bagging_fraction': hp.uniform('bagging_fraction', 0.5, 1.0),
    'bagging_freq': hp.quniform('bagging_freq', 1, 10, 1),
    'max_depth': hp.quniform('max_depth', 3, 20, 1)
}

In [9]:
# Optimize the hyperparameters using Hyperopt
trials = Trials()
best = fmin(
    fn=objective,
    space=space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials
)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000041 seconds.                
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
  0%|                                                                           | 0/50 [00:00<?, ?trial/s, best loss=?]

[WinError 2] The system cannot find the file specified

  File "C:\Users\TSA\anaconda3\envs\tf\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(

  File "C:\Users\TSA\anaconda3\envs\tf\lib\subprocess.py", line 505, in run
    with Popen(*popenargs, **kwargs) as process:

  File "C:\Users\TSA\anaconda3\envs\tf\lib\subprocess.py", line 951, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,

  File "C:\Users\TSA\anaconda3\envs\tf\lib\subprocess.py", line 1436, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000075 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000064 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
  4%|█▉                                               | 2/50 [00:00<00:17,  2.68trial/s, best loss: 2274636.0901026814]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000064 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
  6%|███                                                | 3/50 [00:01<00:15,  2.97trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
  8%|████                                               | 4/50 [00:01<00:15,  3.02trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000052 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 10%|█████                                              | 5/50 [00:01<00:14,  3.10trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000342 seconds.                
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 12%|██████                                             | 6/50 [00:02<00:14,  3.03trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000089 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 14%|███████▏                                           | 7/50 [00:02<00:14,  2.93trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000065 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




 16%|████████▏                                          | 8/50 [00:02<00:14,  2.88trial/s, best loss: 2125104.29349959]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000055 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 22%|███████████                                       | 11/50 [00:03<00:13,  2.95trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000060 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 26%|█████████████                                     | 13/50 [00:04<00:11,  3.10trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 28%|██████████████                                    | 14/50 [00:04<00:11,  3.11trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000189 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000064 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 32%|████████████████                                  | 16/50 [00:05<00:10,  3.11trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000044 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 36%|██████████████████                                | 18/50 [00:06<00:10,  3.11trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 60%|██████████████████████████████                    | 30/50 [00:09<00:05,  3.38trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000073 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000081 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 64%|████████████████████████████████                  | 32/50 [00:09<00:05,  3.19trial/s, best loss: 2125104.29349959]




                                                                                                                       

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000060 seconds.                
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000065 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 72%|████████████████████████████████████              | 36/50 [00:11<00:04,  2.94trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 74%|█████████████████████████████████████             | 37/50 [00:11<00:04,  2.97trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000054 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000130 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 80%|████████████████████████████████████████          | 40/50 [00:12<00:03,  3.01trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000067 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 82%|█████████████████████████████████████████         | 41/50 [00:13<00:03,  2.92trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000072 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 84%|██████████████████████████████████████████        | 42/50 [00:13<00:02,  2.92trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 86%|███████████████████████████████████████████       | 43/50 [00:13<00:02,  2.93trial/s, best loss: 2125104.29349959]




                                                                                                                       

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 92%|██████████████████████████████████████████████    | 46/50 [00:14<00:01,  2.81trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000056 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 94%|███████████████████████████████████████████████   | 47/50 [00:15<00:01,  2.71trial/s, best loss: 2125104.29349959]




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
                                                                                                                       




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000078 seconds.                
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105                                                                                       
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3                              
[LightGBM] [Info] Start training from score 3223760.872323                                                             
 98%|█████████████████████████████████████████████████ | 49/50 [00:15<00:00,  2.80trial/s, best loss: 2125104.29349959]




100%|██████████████████████████████████████████████████| 50/50 [00:15<00:00,  3.13trial/s, best loss: 2125104.29349959]





In [10]:
# Print the best hyperparameters
print("Best Hyperparameters:", best)

Best Hyperparameters: {'bagging_fraction': 0.7553040682932186, 'bagging_freq': 2.0, 'feature_fraction': 0.8660324784827688, 'learning_rate': 0.12322783161178008, 'max_depth': 18.0, 'num_leaves': 127.0}


In [11]:
# Train the final model with the best hyperparameters
final_gbm = lgb.LGBMRegressor(
    objective='regression',
    metric='rmse',
    boosting_type='gbdt',
    num_leaves=max(int(best['num_leaves']), 2),  # Ensure num_leaves is at least 2
    learning_rate=best['learning_rate'],
    feature_fraction=best['feature_fraction'],
    bagging_fraction=best['bagging_fraction'],
    bagging_freq=int(best['bagging_freq']),
    max_depth=int(best['max_depth']),
    n_estimators=100
)

In [12]:
final_gbm.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000059 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 105
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 3
[LightGBM] [Info] Start training from score 3223760.872323


In [13]:
# Save the model and label encoders
joblib.dump(final_gbm, "lgbm_model.joblib")
joblib.dump(label_encoder, "label_encoder.joblib")

['label_encoder.joblib']

## Evaluation metrics

In [14]:
# Step 10: Make predictions and evaluate the final model
y_final_pred = final_gbm.predict(X_test)
final_rmse = mean_squared_error(y_test, y_final_pred, squared=False)
final_r2 = r2_score(y_test, y_final_pred)





In [15]:
# Print the evaluation results
print(f"Final RMSE: {final_rmse}")
print(f"Final R²: {final_r2}")

Final RMSE: 2125104.29349959
Final R²: -1.1137325707510257
