In [11]:
import pickle
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from lightgbm import LGBMClassifier, early_stopping

In [12]:
df = pd.read_csv('reddit_preprocessing.csv').dropna(subset=['clean_comment'])
df.head()

Unnamed: 0,clean_comment,category
0,"film absolutely awful, but nevertheless, hilar...",0
1,well since seeing part 1 3 honestly say never ...,0
2,got see film preview dazzled it. not typical r...,1
3,adaptation positively butcher classic beloved ...,0
4,rzone awful movie! simple. seems tried make mo...,0


In [13]:
df = df.dropna(subset=['clean_comment', 'category'])

# Train/Test split (stratified)
X_train_text, X_test_text, y_train, y_test = train_test_split(
    df['clean_comment'],
    df['category'],
    test_size=0.2,
    random_state=42,
    stratify=df['category']
)

In [14]:
# TF-IDF 
vectorizer = TfidfVectorizer(
    ngram_range=(1, 3),
    max_features=10000,
    lowercase=True,
    strip_accents='unicode'
)
X_train_vec = vectorizer.fit_transform(X_train_text)
X_test_vec  = vectorizer.transform(X_test_text)

In [15]:
# Imbalance handling via weighting 
# scale_pos_weight = (#negative / #positive) on TRAIN set only
pos = (y_train == 1).sum()
neg = (y_train == 0).sum()
scale_pos_weight = (neg / max(pos, 1))

In [16]:
# 4) Optuna objective (optimize for Accuracy on test set)
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1200),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.2, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 16, 256),
        "max_depth": trial.suggest_int("max_depth", -1, 32),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 1.0),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "subsample_freq": trial.suggest_int("subsample_freq", 0, 10),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 5.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 5.0),
        "random_state": 42,
        "n_jobs": -1,
        "class_weight": "balanced",
        "objective": "binary",
        "metric": "binary_logloss",
        "verbose": -1
    }

    model = LGBMClassifier(**params)

    # early_stopping() callback instead of early_stopping_rounds arg
    model.fit(
        X_train_vec, y_train,
        eval_set=[(X_test_vec, y_test)],
        eval_metric="binary_logloss",
        callbacks=[early_stopping(stopping_rounds=100)]
    )

    y_pred = model.predict(X_test_vec)
    acc = accuracy_score(y_test, y_pred)
    return acc

# Run Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=40, show_progress_bar=False)

print("Best trial:", study.best_trial.number)
print("Best value (Accuracy):", study.best_value)
print("Best params:", study.best_params)

[I 2025-08-14 15:20:39,430] A new study created in memory with name: no-name-a435da8a-03a4-4fd5-b73d-c54b2630588c


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[842]	valid_0's binary_logloss: 0.340198


[I 2025-08-14 15:22:15,506] Trial 0 finished with value: 0.8562537643043565 and parameters: {'n_estimators': 842, 'learning_rate': 0.012976483348855468, 'num_leaves': 178, 'max_depth': 19, 'min_child_samples': 92, 'min_split_gain': 0.3135781464219839, 'subsample': 0.6421987485366988, 'subsample_freq': 3, 'colsample_bytree': 0.8131390231861435, 'reg_alpha': 4.866504919191747, 'reg_lambda': 0.3400571935390123}. Best is trial 0 with value: 0.8562537643043565.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[370]	valid_0's binary_logloss: 0.295631


[I 2025-08-14 15:23:58,625] Trial 1 finished with value: 0.8703071672354948 and parameters: {'n_estimators': 371, 'learning_rate': 0.061079869969120536, 'num_leaves': 69, 'max_depth': 29, 'min_child_samples': 31, 'min_split_gain': 0.15262424078030123, 'subsample': 0.9316017363486365, 'subsample_freq': 0, 'colsample_bytree': 0.9485204768457853, 'reg_alpha': 1.6802995511936496, 'reg_lambda': 4.756802518496831}. Best is trial 1 with value: 0.8703071672354948.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[816]	valid_0's binary_logloss: 0.298675


[I 2025-08-14 15:25:23,668] Trial 2 finished with value: 0.8741216623168039 and parameters: {'n_estimators': 826, 'learning_rate': 0.08470580309094776, 'num_leaves': 42, 'max_depth': 8, 'min_child_samples': 13, 'min_split_gain': 0.8232896951551518, 'subsample': 0.6983878583458687, 'subsample_freq': 4, 'colsample_bytree': 0.7498044169398077, 'reg_alpha': 2.7759030191402285, 'reg_lambda': 2.5527340441017383}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[103]	valid_0's binary_logloss: 0.33747


[I 2025-08-14 15:25:58,386] Trial 3 finished with value: 0.8554507127082915 and parameters: {'n_estimators': 103, 'learning_rate': 0.11033446949539422, 'num_leaves': 234, 'max_depth': 21, 'min_child_samples': 30, 'min_split_gain': 0.5944543433622608, 'subsample': 0.9931552139529022, 'subsample_freq': 4, 'colsample_bytree': 0.8606106939149634, 'reg_alpha': 4.909408184761089, 'reg_lambda': 4.481443751940468}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[621]	valid_0's binary_logloss: 0.472315


[I 2025-08-14 15:27:41,809] Trial 4 finished with value: 0.8020477815699659 and parameters: {'n_estimators': 621, 'learning_rate': 0.0033025944485569375, 'num_leaves': 96, 'max_depth': 19, 'min_child_samples': 83, 'min_split_gain': 0.7023048911022888, 'subsample': 0.8718682099720323, 'subsample_freq': 9, 'colsample_bytree': 0.7229769014147627, 'reg_alpha': 3.3889626188904307, 'reg_lambda': 1.525963743777869}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[577]	valid_0's binary_logloss: 0.340351


[I 2025-08-14 15:29:50,832] Trial 5 finished with value: 0.8546476611122265 and parameters: {'n_estimators': 577, 'learning_rate': 0.024453392145919645, 'num_leaves': 222, 'max_depth': 11, 'min_child_samples': 33, 'min_split_gain': 0.7905232228757288, 'subsample': 0.9908685731608273, 'subsample_freq': 2, 'colsample_bytree': 0.8716011911734616, 'reg_alpha': 1.0228445903177452, 'reg_lambda': 3.9915244857087013}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[648]	valid_0's binary_logloss: 0.509037


[I 2025-08-14 15:30:34,827] Trial 6 finished with value: 0.7789600481830957 and parameters: {'n_estimators': 648, 'learning_rate': 0.005618067619527488, 'num_leaves': 150, 'max_depth': 4, 'min_child_samples': 93, 'min_split_gain': 0.9185095845285519, 'subsample': 0.8258428271498115, 'subsample_freq': 5, 'colsample_bytree': 0.8609065921238233, 'reg_alpha': 4.74848759396546, 'reg_lambda': 0.18888326660079224}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[693]	valid_0's binary_logloss: 0.55877


[I 2025-08-14 15:32:08,533] Trial 7 finished with value: 0.7586829953824533 and parameters: {'n_estimators': 693, 'learning_rate': 0.001678142571491476, 'num_leaves': 52, 'max_depth': 7, 'min_child_samples': 55, 'min_split_gain': 0.8731855431627822, 'subsample': 0.6738228675617245, 'subsample_freq': 2, 'colsample_bytree': 0.9654168561116784, 'reg_alpha': 0.94724575169018, 'reg_lambda': 4.55172678829045}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[962]	valid_0's binary_logloss: 0.29978


[I 2025-08-14 15:33:42,631] Trial 8 finished with value: 0.8729170849227063 and parameters: {'n_estimators': 1011, 'learning_rate': 0.0829084423195356, 'num_leaves': 178, 'max_depth': 10, 'min_child_samples': 70, 'min_split_gain': 0.27117363018790297, 'subsample': 0.8952787415912717, 'subsample_freq': 6, 'colsample_bytree': 0.7935092940843261, 'reg_alpha': 4.987152301545532, 'reg_lambda': 2.0071366570306566}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[288]	valid_0's binary_logloss: 0.616436


[I 2025-08-14 15:34:50,200] Trial 9 finished with value: 0.7612929130696647 and parameters: {'n_estimators': 288, 'learning_rate': 0.0013209077747352266, 'num_leaves': 252, 'max_depth': 11, 'min_child_samples': 17, 'min_split_gain': 0.6336297027050911, 'subsample': 0.6373014597265817, 'subsample_freq': 5, 'colsample_bytree': 0.9036495410188723, 'reg_alpha': 4.277573709375543, 'reg_lambda': 3.3208709153385225}. Best is trial 2 with value: 0.8741216623168039.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1021]	valid_0's binary_logloss: 0.296875


[I 2025-08-14 15:36:46,767] Trial 10 finished with value: 0.8745231881148364 and parameters: {'n_estimators': 1021, 'learning_rate': 0.029591129361568304, 'num_leaves': 19, 'max_depth': -1, 'min_child_samples': 12, 'min_split_gain': 0.9920978667919311, 'subsample': 0.7327543188222236, 'subsample_freq': 8, 'colsample_bytree': 0.6044719163539097, 'reg_alpha': 2.7747470736323976, 'reg_lambda': 2.827544893316557}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1186]	valid_0's binary_logloss: 0.291271


[I 2025-08-14 15:38:57,000] Trial 11 finished with value: 0.8743224252158202 and parameters: {'n_estimators': 1188, 'learning_rate': 0.03141549774340385, 'num_leaves': 18, 'max_depth': -1, 'min_child_samples': 8, 'min_split_gain': 0.9793082585134245, 'subsample': 0.7348653033141622, 'subsample_freq': 8, 'colsample_bytree': 0.6063183706599284, 'reg_alpha': 2.6720308734355256, 'reg_lambda': 2.8430098092584397}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1179]	valid_0's binary_logloss: 0.293639


[I 2025-08-14 15:41:04,757] Trial 12 finished with value: 0.8745231881148364 and parameters: {'n_estimators': 1188, 'learning_rate': 0.029137995371679097, 'num_leaves': 16, 'max_depth': -1, 'min_child_samples': 5, 'min_split_gain': 0.9576133153445361, 'subsample': 0.7477850439328552, 'subsample_freq': 9, 'colsample_bytree': 0.608067105897892, 'reg_alpha': 2.3870210574782473, 'reg_lambda': 2.9928192808929035}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1172]	valid_0's binary_logloss: 0.30258


[I 2025-08-14 15:47:16,154] Trial 13 finished with value: 0.8699056414374624 and parameters: {'n_estimators': 1172, 'learning_rate': 0.009901701735378944, 'num_leaves': 101, 'max_depth': 0, 'min_child_samples': 5, 'min_split_gain': 0.46018532179738336, 'subsample': 0.7691444001061163, 'subsample_freq': 10, 'colsample_bytree': 0.6149584149455177, 'reg_alpha': 1.744382454878454, 'reg_lambda': 3.4677830727106045}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[994]	valid_0's binary_logloss: 0.365936


[I 2025-08-14 15:48:21,397] Trial 14 finished with value: 0.8452118048584621 and parameters: {'n_estimators': 994, 'learning_rate': 0.031040926970482895, 'num_leaves': 16, 'max_depth': 3, 'min_child_samples': 47, 'min_split_gain': 0.027731987515689727, 'subsample': 0.7899086651852644, 'subsample_freq': 7, 'colsample_bytree': 0.6614760338156054, 'reg_alpha': 3.343778974499765, 'reg_lambda': 1.320349918439701}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[284]	valid_0's binary_logloss: 0.30351


[I 2025-08-14 15:49:02,257] Trial 15 finished with value: 0.8687010640433648 and parameters: {'n_estimators': 1007, 'learning_rate': 0.17625282431665823, 'num_leaves': 89, 'max_depth': 32, 'min_child_samples': 22, 'min_split_gain': 0.983114929911821, 'subsample': 0.7263534611211464, 'subsample_freq': 10, 'colsample_bytree': 0.6726797038683096, 'reg_alpha': 1.8757199818472712, 'reg_lambda': 3.2010606415219014}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1092]	valid_0's binary_logloss: 0.332032


[I 2025-08-14 15:50:12,770] Trial 16 finished with value: 0.8618751254768119 and parameters: {'n_estimators': 1092, 'learning_rate': 0.04518121338373786, 'num_leaves': 119, 'max_depth': 3, 'min_child_samples': 47, 'min_split_gain': 0.7501570825815118, 'subsample': 0.8342049399955463, 'subsample_freq': 8, 'colsample_bytree': 0.6705291373627018, 'reg_alpha': 0.013744598192632296, 'reg_lambda': 2.1437112775538676}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[862]	valid_0's binary_logloss: 0.327888


[I 2025-08-14 15:52:34,452] Trial 17 finished with value: 0.8624774141738607 and parameters: {'n_estimators': 862, 'learning_rate': 0.016936051076957843, 'num_leaves': 49, 'max_depth': 15, 'min_child_samples': 23, 'min_split_gain': 0.48938617312323074, 'subsample': 0.6032521722577343, 'subsample_freq': 7, 'colsample_bytree': 0.7144715539733518, 'reg_alpha': 3.481830795562865, 'reg_lambda': 3.955185564902144}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[916]	valid_0's binary_logloss: 0.360795


[I 2025-08-14 15:56:04,476] Trial 18 finished with value: 0.8476209596466573 and parameters: {'n_estimators': 916, 'learning_rate': 0.006078217594469989, 'num_leaves': 72, 'max_depth': 25, 'min_child_samples': 40, 'min_split_gain': 0.8460298327316074, 'subsample': 0.7371165968244263, 'subsample_freq': 9, 'colsample_bytree': 0.6322522649943497, 'reg_alpha': 2.255298298359483, 'reg_lambda': 1.0390547574393039}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1108]	valid_0's binary_logloss: 0.301947


[I 2025-08-14 15:58:38,320] Trial 19 finished with value: 0.8701064043364786 and parameters: {'n_estimators': 1108, 'learning_rate': 0.016671180764918036, 'num_leaves': 33, 'max_depth': -1, 'min_child_samples': 63, 'min_split_gain': 0.605394337933693, 'subsample': 0.770743114189258, 'subsample_freq': 7, 'colsample_bytree': 0.7169323989379423, 'reg_alpha': 3.0210540932319754, 'reg_lambda': 2.000960272622864}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[750]	valid_0's binary_logloss: 0.30774


[I 2025-08-14 16:00:21,803] Trial 20 finished with value: 0.8693033527404136 and parameters: {'n_estimators': 751, 'learning_rate': 0.04557490043838436, 'num_leaves': 141, 'max_depth': 15, 'min_child_samples': 6, 'min_split_gain': 0.9898354315523747, 'subsample': 0.6906974263386526, 'subsample_freq': 9, 'colsample_bytree': 0.6500605292337707, 'reg_alpha': 3.9720265312828125, 'reg_lambda': 2.7777235283837673}. Best is trial 10 with value: 0.8745231881148364.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1188]	valid_0's binary_logloss: 0.289358


[I 2025-08-14 16:02:38,896] Trial 21 finished with value: 0.8767315800040153 and parameters: {'n_estimators': 1198, 'learning_rate': 0.032807298141969456, 'num_leaves': 20, 'max_depth': -1, 'min_child_samples': 13, 'min_split_gain': 0.9883348068615785, 'subsample': 0.7393625970567357, 'subsample_freq': 8, 'colsample_bytree': 0.6047870972168443, 'reg_alpha': 2.3475097631390973, 'reg_lambda': 2.9388671102951722}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1192]	valid_0's binary_logloss: 0.40488


[I 2025-08-14 16:03:55,730] Trial 22 finished with value: 0.8359767115037141 and parameters: {'n_estimators': 1192, 'learning_rate': 0.02294166828942059, 'num_leaves': 66, 'max_depth': 2, 'min_child_samples': 17, 'min_split_gain': 0.9043142734182754, 'subsample': 0.8078054314252933, 'subsample_freq': 8, 'colsample_bytree': 0.6067661497987287, 'reg_alpha': 2.234348079521556, 'reg_lambda': 3.8001862488308724}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1073]	valid_0's binary_logloss: 0.3986


[I 2025-08-14 16:05:48,989] Trial 23 finished with value: 0.8331660309174864 and parameters: {'n_estimators': 1073, 'learning_rate': 0.008964924213924906, 'num_leaves': 27, 'max_depth': 6, 'min_child_samples': 24, 'min_split_gain': 0.9986427719370626, 'subsample': 0.7547041478145765, 'subsample_freq': 6, 'colsample_bytree': 0.654185272499018, 'reg_alpha': 2.290095027766449, 'reg_lambda': 3.061568950096628}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[937]	valid_0's binary_logloss: 0.430042


[I 2025-08-14 16:06:35,447] Trial 24 finished with value: 0.8239309375627384 and parameters: {'n_estimators': 937, 'learning_rate': 0.04121362334438595, 'num_leaves': 40, 'max_depth': 1, 'min_child_samples': 14, 'min_split_gain': 0.7178511798413125, 'subsample': 0.7146372964871645, 'subsample_freq': 10, 'colsample_bytree': 0.6963030859157999, 'reg_alpha': 1.2434730700960626, 'reg_lambda': 2.282929447834447}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[587]	valid_0's binary_logloss: 0.302001


[I 2025-08-14 16:07:21,110] Trial 25 finished with value: 0.8713109817305762 and parameters: {'n_estimators': 1128, 'learning_rate': 0.19929436594363978, 'num_leaves': 16, 'max_depth': 5, 'min_child_samples': 39, 'min_split_gain': 0.9050196211447598, 'subsample': 0.6503193368235708, 'subsample_freq': 8, 'colsample_bytree': 0.600063364348036, 'reg_alpha': 3.770155326472576, 'reg_lambda': 2.5717577972260286}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[516]	valid_0's binary_logloss: 0.314446


[I 2025-08-14 16:09:25,167] Trial 26 finished with value: 0.8638827544669745 and parameters: {'n_estimators': 516, 'learning_rate': 0.02192391604715544, 'num_leaves': 59, 'max_depth': -1, 'min_child_samples': 11, 'min_split_gain': 0.8229994605501478, 'subsample': 0.7998774194131185, 'subsample_freq': 6, 'colsample_bytree': 0.7645925669592886, 'reg_alpha': 2.886134349567441, 'reg_lambda': 3.514328709068838}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1012]	valid_0's binary_logloss: 0.345592


[I 2025-08-14 16:10:33,197] Trial 27 finished with value: 0.8526400321220639 and parameters: {'n_estimators': 1012, 'learning_rate': 0.06353690613641673, 'num_leaves': 82, 'max_depth': 2, 'min_child_samples': 19, 'min_split_gain': 0.9246555108623714, 'subsample': 0.85408818391377, 'subsample_freq': 9, 'colsample_bytree': 0.6396360716485734, 'reg_alpha': 1.4343137423825258, 'reg_lambda': 3.014533175361994}. Best is trial 21 with value: 0.8767315800040153.


Training until validation scores don't improve for 100 rounds


[W 2025-08-14 16:12:35,142] Trial 28 failed with parameters: {'n_estimators': 1068, 'learning_rate': 0.010403168924217247, 'num_leaves': 37, 'max_depth': 9, 'min_child_samples': 30, 'min_split_gain': 0.40560450384897834, 'subsample': 0.6729542736802457, 'subsample_freq': 7, 'colsample_bytree': 0.6873427307866279, 'reg_alpha': 2.419226401226351, 'reg_lambda': 4.207301559204048} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\jupyter\venv\Lib\site-packages\optuna\study\_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\latif\AppData\Local\Temp\ipykernel_9112\2468078835.py", line 26, in objective
    model.fit(
    ~~~~~~~~~^
        X_train_vec, y_train,
        ^^^^^^^^^^^^^^^^^^^^^
    ...<2 lines>...
        callbacks=[early_stopping(stopping_rounds=100)]
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\jupyter\venv\Lib\site-packages\lightgbm\sklearn.py", line 1560, 

KeyboardInterrupt: 

In [17]:
best_params = study.best_params.copy()
final_model = LGBMClassifier(
    **best_params,
    random_state=42,
    n_jobs=-1,
    class_weight="balanced",
    objective="binary",
    metric="binary_logloss",
    verbose=-1,
    # LightGBM-এ scale_pos_weight অতিরিক্ত bias যোগ করে; চাইলে টিউনও করা যায়।
    # এখানে আমরা ইমব্যাল্যান্স থাকলে ব্যবহার করছি:
    scale_pos_weight=scale_pos_weight if scale_pos_weight > 1 else 1.0
)

final_model.fit(
    X_train_vec, y_train,
    eval_set=[(X_test_vec, y_test)],
    eval_metric="binary_logloss",
    callbacks=[],
    early_stopping_rounds=100
)

TypeError: LGBMClassifier.fit() got an unexpected keyword argument 'early_stopping_rounds'