In [1]:
import os, sys
import numpy as np
import pandas as pd

from config_local import local_config

In [None]:
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV

In [3]:
train = pd.read_csv(local_config.TRAIN_PROCESS6_CSV)
test  = pd.read_csv(local_config.TEST_PROCESS6_CSV)
testRaw = pd.read_csv(local_config.TEST_CSV, index_col="Id")

In [4]:
y = train["logSP"]                 # your target column (log SalePrice)
X = train.drop(columns=["logSP"])  # all features except target

In [None]:
# ==== Cross-validation + Random Search ====
kf = KFold(n_splits=5, shuffle=True, random_state=42)

lgbm = LGBMRegressor(
    objective="regression",
    random_state=42,
    n_jobs=-1,
    device_type="gpu",
)

param_dist = {
    "num_leaves": [31, 63],
    "max_depth": [5, 7, -1],
    "learning_rate": [0.05, 0.04, 0.03],
    "n_estimators": [800, 1000, 1200],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.7, 0.9],
    "min_child_samples": [10, 20],
}

random_search = RandomizedSearchCV(
    estimator=lgbm,
    param_distributions=param_dist,
    n_iter=30,                  # a bit more combos
    scoring="neg_mean_squared_error",
    cv=5,                       # more reliable CV
    n_jobs=-1,
    verbose=2,
    random_state=42,
)


print("Running RandomizedSearchCV for LightGBM...")
random_search.fit(X, y)

print("\nBest params found:")
print(random_search.best_params_)

best_mse = -random_search.best_score_
best_rmse = best_mse ** 0.5
print(f"\nBest CV RMSE from RandomizedSearch: {best_rmse:.4f}")

SyntaxError: positional argument follows keyword argument (2913136424.py, line 8)

In [None]:

# ==== Final model (already refit on all data) ====
best_model = random_search.best_estimator_

# ==== Predictions on test ====
test_pred_log = best_model.predict(test)

# Inverse log transform
test_pred_real = np.expm1(test_pred_log)

# ==== Submission (same style as ElasticNet) ====
submission = pd.DataFrame({
    "Id": testRaw.index,
    "SalePrice": test_pred_real
})

out_path = os.path.join(local_config.SUBMISSIONS_DIR, "lightGBM_Model.csv")
submission.to_csv(out_path, index=False)

print(f"Submission saved: {out_path}")

Submission saved: D:\Project\Kaggle\house-prices-starter\data\submissions\lightGBM_Model.csv
