In [19]:
import numpy as np
import pandas as pd
import lightgbm as lgb
import xgboost as xgb
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)

train = pd.read_csv("/content/sample_data/train (1).csv")
test  = pd.read_csv("/content/sample_data/test (1).csv")

LAGS = 30
BASE_COLS = ["Open", "High", "Low", "Close", "Volume"]

df = train.copy()

for col in BASE_COLS:
    for lag in range(LAGS):
        df[f"{col}_Lag_{lag}"] = df[col].shift(lag)

df = df.dropna().reset_index(drop=True)

y = df["Target"].values
lag_features = [c for c in df.columns if "Lag_" in c]

X_train = df[lag_features]
X_test  = test[lag_features]

def add_features(X):
    X = X.copy()
    close_cols  = [f"Close_Lag_{i}" for i in range(30)]
    volume_cols = [f"Volume_Lag_{i}" for i in range(30)]

    X["close_mean_30"] = X[close_cols].mean(axis=1)
    X["close_std_30"]  = X[close_cols].std(axis=1)
    X["close_min_30"]  = X[close_cols].min(axis=1)
    X["close_max_30"]  = X[close_cols].max(axis=1)

    X["close_trend_30"] = X["Close_Lag_0"] - X["Close_Lag_29"]
    X["return_1"] = (X["Close_Lag_0"] - X["Close_Lag_1"]) / (X["Close_Lag_1"] + 1e-6)

    X["candle_body"]  = X["Close_Lag_0"] - X["Open_Lag_0"]
    X["candle_range"] = X["High_Lag_0"] - X["Low_Lag_0"]

    X["volume_mean_30"] = X[volume_cols].mean(axis=1)
    X["volume_std_30"]  = X[volume_cols].std(axis=1)
    X["volume_ratio"]   = X["Volume_Lag_0"] / (X["volume_mean_30"] + 1e-6)

    return X

X_train = add_features(X_train).astype(np.float32)
X_test  = add_features(X_test).astype(np.float32)

last_close = df["Close_Lag_0"].values
y_delta = y - last_close
y_delta = np.clip(y_delta, -6, 6)

lgb_model = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.025,
    max_depth=4,
    num_leaves=16,
    min_child_samples=100,
    subsample=0.65,
    colsample_bytree=0.65,
    reg_alpha=3.0,
    reg_lambda=3.0,
    random_state=42
)

lgb_model.fit(X_train, y_delta)

xgb_model = xgb.XGBRegressor(
    n_estimators=800,
    learning_rate=0.03,
    max_depth=4,
    subsample=0.65,
    colsample_bytree=0.65,
    reg_alpha=3.0,
    reg_lambda=3.0,
    objective="reg:squarederror",
    random_state=42
)

xgb_model.fit(X_train, y_delta)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

ridge = Ridge(alpha=15.0)
ridge.fit(X_train_s, y_delta)

delta_lgb   = lgb_model.predict(X_test)
delta_xgb   = xgb_model.predict(X_test)
delta_ridge = ridge.predict(X_test_s)

delta_pred = (
    0.45 * delta_lgb +
    0.35 * delta_xgb +
    0.20 * delta_ridge
)

delta_pred *= 0.65

base = test["Close_Lag_0"].values
final_pred = base + delta_pred

final_pred = np.clip(
    final_pred,
    base - 5,
    base + 5
)

submission = pd.DataFrame({
    "ID": test["ID"],
    "TARGET": final_pred
})

submission.to_csv("submission.csv", index=False)
submission


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40784
[LightGBM] [Info] Number of data points in the train set: 1008, number of used features: 161
[LightGBM] [Info] Start training from score 0.167631


Unnamed: 0,ID,TARGET
0,Test_0,206.751346
1,Test_1,214.840693
2,Test_2,213.237345
3,Test_3,205.95116
4,Test_4,229.008121
5,Test_5,213.350736
6,Test_6,207.440604
7,Test_7,213.839005
8,Test_8,210.303533
9,Test_9,219.419213
