<a href="https://www.kaggle.com/code/tydusg/gold-price-prediction-yahoo?scriptVersionId=291800133" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
!pip -q install yfinance lightgbm ipywidgets

import yfinance as yf
import pandas as pd
import numpy as np

raw = yf.download(
    "GC=F",
    start="1978-01-01",
    interval="1d",
    auto_adjust=False,
    progress=False
)

if isinstance(raw.columns, pd.MultiIndex):
    raw.columns = raw.columns.get_level_values(0)

raw = raw.reset_index()

df = raw.rename(columns={"Close": "USD"})[["Date", "USD"]].dropna()
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").reset_index(drop=True)

print(df.head())
print(df.tail())
print("Range:", df["Date"].min().date(), "‚Üí", df["Date"].max().date(), "Rows:", len(df))


Price       Date         USD
0     2000-08-30  273.899994
1     2000-08-31  278.299988
2     2000-09-01  277.000000
3     2000-09-05  275.799988
4     2000-09-06  274.200012
Price       Date          USD
6362  2026-01-08  4449.700195
6363  2026-01-09  4490.299805
6364  2026-01-12  4604.299805
6365  2026-01-13  4589.200195
6366  2026-01-14  4643.000000
Range: 2000-08-30 ‚Üí 2026-01-14 Rows: 6367


In [2]:
import numpy as np
import pandas as pd
import lightgbm as lgb

# Safety check
if "df" not in globals():
    raise NameError("df not found. Run your Yahoo Finance download cell first.")
if df.empty:
    raise ValueError("df is empty. Yahoo download returned no data (internet disabled or ticker failed).")

# Ensure clean types
df = df.copy()
df["Date"] = pd.to_datetime(df["Date"])
df["USD"] = pd.to_numeric(df["USD"], errors="coerce")
df = df.dropna(subset=["Date", "USD"]).sort_values("Date").reset_index(drop=True)

print("Yahoo df range:", df["Date"].min().date(), "‚Üí", df["Date"].max().date(), "rows:", len(df))


def price_from_log_return(price_t, log_ret):
    price_t = np.asarray(price_t, dtype=float)
    log_ret = np.asarray(log_ret, dtype=float)
    return price_t * np.exp(log_ret)

feat = df.copy()
feat["log_price"] = np.log(feat["USD"])
feat["log_return"] = feat["log_price"].diff()

for l in range(1, 11):
    feat[f"lr_lag_{l}"] = feat["log_return"].shift(l)

for w in [5, 10, 20, 60, 120]:
    feat[f"lr_roll_mean_{w}"] = feat["log_return"].shift(1).rolling(w).mean()
    feat[f"lr_roll_std_{w}"]  = feat["log_return"].shift(1).rolling(w).std()

for w in [5, 20, 60, 120]:
    feat[f"price_roll_mean_{w}"] = feat["USD"].shift(1).rolling(w).mean()
    feat[f"price_roll_std_{w}"]  = feat["USD"].shift(1).rolling(w).std()
    feat[f"price_roll_min_{w}"]  = feat["USD"].shift(1).rolling(w).min()
    feat[f"price_roll_max_{w}"]  = feat["USD"].shift(1).rolling(w).max()

# Calendar features
feat["dow"] = feat["Date"].dt.dayofweek
feat["month"] = feat["Date"].dt.month

feat["target_log_return_t_plus_1"] = feat["log_return"].shift(-1)

#feature columns
feature_cols = (
    [c for c in feat.columns if c.startswith("lr_lag_")]
    + [c for c in feat.columns if c.startswith("lr_roll_")]
    + [c for c in feat.columns if c.startswith("price_roll_")]
    + ["dow", "month"]
)

# Build model_df for interface
model_df = feat.dropna(subset=feature_cols + ["target_log_return_t_plus_1"]).reset_index(drop=True)

print("model_df range:", model_df["Date"].min().date(), "‚Üí", model_df["Date"].max().date(), "rows:", len(model_df))


# Train 
split_date = model_df["Date"].max() - pd.DateOffset(years=4)

train_full = model_df[model_df["Date"] < split_date].copy()
test_df    = model_df[model_df["Date"] >= split_date].copy()

val_cut = split_date - pd.DateOffset(months=6)
train_df = model_df[model_df["Date"] < val_cut].copy()
val_df   = model_df[(model_df["Date"] >= val_cut) & (model_df["Date"] < split_date)].copy()

X_train = train_df[feature_cols]
y_train = train_df["target_log_return_t_plus_1"].values

X_val = val_df[feature_cols]
y_val = val_df["target_log_return_t_plus_1"].values

final = lgb.LGBMRegressor(
    objective="regression",
    n_estimators=5000,
    learning_rate=0.02,
    num_leaves=64,
    max_depth=-1,
    min_child_samples=50,
    subsample=0.85,
    colsample_bytree=0.85,
    reg_alpha=0.1,
    reg_lambda=1.0,
    random_state=42,
    verbosity=-1
)

final.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    callbacks=[lgb.early_stopping(200, verbose=False)]
)

# Quick check on test set (in USD space)
price_t = test_df["USD"].values.astype(float)
y_true_lr = test_df["target_log_return_t_plus_1"].values.astype(float)
y_pred_lr = final.predict(test_df[feature_cols])

true_t1 = price_from_log_return(price_t, y_true_lr)
pred_t1 = price_from_log_return(price_t, y_pred_lr)

rmse_usd = float(np.sqrt(np.mean((true_t1 - pred_t1) ** 2)))
mape_pct = float(np.mean(np.abs((true_t1 - pred_t1) / (true_t1 + 1e-9))) * 100)

print("\n Trained from Yahoo only")
print("Test period:", test_df["Date"].min().date(), "‚Üí", test_df["Date"].max().date())
print(f"RMSE (USD): {rmse_usd:,.2f}")
print(f"MAPE (%):  {mape_pct:.2f}%")


  if entities is not ():


Yahoo df range: 2000-08-30 ‚Üí 2026-01-14 rows: 6367
model_df range: 2001-02-26 ‚Üí 2026-01-13 rows: 6245

 Trained from Yahoo only
Test period: 2022-01-13 ‚Üí 2026-01-13
RMSE (USD): 29.37
MAPE (%):  0.76%


In [3]:
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.dates as mdates
from matplotlib.collections import LineCollection
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning) # removes annoying popup for the chart

feat_by_date = model_df.set_index("Date")
min_date = model_df["Date"].min().date()
max_date = model_df["Date"].max().date()

# Compute evaluation metrics 
_eval = model_df.dropna(subset=feature_cols + ["target_log_return_t_plus_1"]).reset_index(drop=True)
_eval_split_date = _eval["Date"].max() - pd.DateOffset(years=4)
_eval = _eval[_eval["Date"] >= _eval_split_date].copy()

y_true_lr = _eval["target_log_return_t_plus_1"].values.astype(float)
price_t_eval = _eval["USD"].values.astype(float)
y_pred_lr = final.predict(_eval[feature_cols]).astype(float)

price_true_eval = price_from_log_return(price_t_eval, y_true_lr)
price_pred_eval = price_from_log_return(price_t_eval, y_pred_lr)

rmse_usd = float(np.sqrt(np.mean((price_true_eval - price_pred_eval) ** 2)))
mape_pct = float(np.mean(np.abs((price_true_eval - price_pred_eval) / (price_true_eval + 1e-9))) * 100)

ss_res = float(np.sum((y_true_lr - y_pred_lr) ** 2))
ss_tot = float(np.sum((y_true_lr - np.mean(y_true_lr)) ** 2))
r2_lr = float(1 - ss_res / ss_tot) if ss_tot != 0 else float("nan")

true_t1 = price_true_eval
pred_t1 = price_pred_eval

true_move = np.sign(true_t1 - price_t_eval)
pred_move = np.sign(pred_t1 - price_t_eval)
direction_acc_pct = float(np.mean(true_move == pred_move) * 100)

abs_pct_err = np.abs((pred_t1 - true_t1) / (true_t1 + 1e-9)) * 100
within_05_pct = float(np.mean(abs_pct_err <= 0.5) * 100)
within_1_pct  = float(np.mean(abs_pct_err <= 1.0) * 100)
within_2_pct  = float(np.mean(abs_pct_err <= 2.0) * 100)

#CSS
def dark_css() -> str:
    return """
    <style>
 
    .app-root .app-title { font-size:22px; font-weight:700; margin:0 0 8px 0; color:#ffffff; }
    .app-root .app-subtitle { color:#aab2c0; margin:0 0 14px 0; }

    .app-root .panel{
      border:1px solid #1b2942; border-radius:14px; padding:14px; background:#101a2b;
      box-shadow: 0 1px 2px rgba(0,0,0,0.35);
    }
    .app-root .card-row{ display:flex; gap:10px; flex-wrap:wrap; }
    .app-root .card{
      flex:1; min-width:200px;
      border:1px solid #1b2942; border-radius:14px; padding:12px 14px;
      background: linear-gradient(180deg, #111c2f 0%, #0f1829 100%);
    }
    .app-root .card-label{ font-size:12px; color:#aab2c0; margin-bottom:6px; }
    .app-root .card-value{ font-size:20px; font-weight:700; color:#ffffff; }
    .app-root .card-delta{ font-size:13px; margin-top:6px; }

    .app-root .good{ color:#3bd671; }
    .app-root .bad{ color:#ff5c5c; }
    .app-root .neutral{ color:#e6e8ee; }

    .app-root .small-muted{ font-size:12px; color:#aab2c0; }
    .app-root .hr{ height:1px; background:#1b2942; margin:12px 0; border:0; }

    .app-root table{ width:100%; border-collapse:collapse; }
    .app-root th{ color:#aab2c0; }

    /* --- ipywidgets styling (dark) --- */
    .app-root .widget-label { color:#aab2c0; font-weight:600; }
    .app-root .widget-button > button,
    .app-root .widget-togglebutton > button,
    .app-root .widget-togglebuttons .widget-togglebutton > button {
      background: #111c2f;
      border: 1px solid #1b2942;
      color: #e6e8ee;
      border-radius: 12px;
      padding: 10px 12px;
      box-shadow: none;
    }
    .app-root .widget-togglebuttons .widget-togglebutton.mod-active > button {
      background: #162744;
      border-color: #2b3f63;
    }
    .app-root .widget-checkbox label { color:#e6e8ee; }
    .app-root .widget-checkbox input { accent-color: #3bd671; }

    /* Date + dropdown (dark mode) */
    .app-root .widget-date-picker input {
      background: #0f1829;
      border: 1px solid #1b2942;
      color: #e6e8ee;
      -webkit-text-fill-color: #e6e8ee;
      border-radius: 10px;
      padding: 8px 10px;
      height: 30px;
    }
    .app-root .widget-dropdown > select {
      background: #0f1829;
      border: 1px solid #1b2942;
      color: #e6e8ee;
      -webkit-text-fill-color: #e6e8ee;
      border-radius: 10px;
      height: 30px;
      padding: 2px 8px;
    }

    .app-root .widget-box, .app-root .widget-vbox, .app-root .widget-hbox { margin: 0; }
    .app-root .widget-inline-hbox { margin: 0; }

    .app-root .predict-text-black button,
    .app-root .predict-text-black button span,
    .app-root .predict-text-black button * {
      color: #000000;
      -webkit-text-fill-color: #000000;
      text-shadow: none;
      font-weight: 800;
    }
    }
    </style>
    """

def nearest_date(ts: pd.Timestamp) -> pd.Timestamp:
    idx = (model_df["Date"] - ts).abs().idxmin()
    return model_df.loc[idx, "Date"]

def metric_cards(ref_date, price_t, pred_price_t1):
    delta = pred_price_t1 - price_t
    delta_pct = (delta / price_t) * 100 if price_t != 0 else 0.0
    cls = "good" if delta > 0 else ("bad" if delta < 0 else "neutral")
    sign = "+" if delta > 0 else ""

    return f"""
    <div class="panel">
      <div class="app-title">ü™ôü™ô Gold Price Predictor (Next Trading Day USD)ü™ôü™ô</div>
      <div class="app-subtitle">LightGBM model ‚Ä¢ lag + rolling features ‚Ä¢ predicts next trading day</div>

      <div class="card-row">
        <div class="card">
          <div class="card-label">Reference date</div>
          <div class="card-value">{ref_date}</div>
          <div class="small-muted">Nearest available date used if needed</div>
        </div>

        <div class="card">
          <div class="card-label">Last known price (USD)</div>
          <div class="card-value">{price_t:,.2f}</div>
          <div class="small-muted">Price at reference date</div>
        </div>

        <div class="card">
          <div class="card-label">Predicted next trading day price (USD)</div>
          <div class="card-value">{pred_price_t1:,.2f}</div>
          <div class="card-delta {cls}">{sign}{delta:,.2f} USD ({sign}{delta_pct:.2f}%)</div>
        </div>
      </div>
    </div>
    """

def render_table(df_slice: pd.DataFrame) -> str:
    d = df_slice.copy()
    d["Date"] = d["Date"].dt.date
    d = d[["Date", "USD"]].tail(10)
    rows = "".join([f"<tr><td>{r.Date}</td><td style='text-align:right'>{r.USD:,.2f}</td></tr>"
                    for r in d.itertuples(index=False)])

    return f"""
    <div class="panel">
      <div style="font-weight:700; margin-bottom:8px;">Last 10 available days</div>
      <table style="width:100%; border-collapse:collapse;">
        <thead>
          <tr>
            <th style="text-align:left;">Date</th>
            <th style="text-align:right;">USD</th>
          </tr>
        </thead>
        <tbody>{rows}</tbody>
      </table>
    </div>
    """

def render_monthly_table(df_prices: pd.DataFrame, n_months: int) -> str:
    m = df_prices.set_index("Date")["USD"].resample("ME").last().dropna().tail(n_months)
    m_df = m.reset_index()
    m_df["Month"] = m_df["Date"].dt.strftime("%Y-%m")
    m_df = m_df[["Month", "USD"]]

    rows = "".join([f"<tr><td>{r.Month}</td><td style='text-align:right'>{r.USD:,.2f}</td></tr>"
                    for r in m_df.itertuples(index=False)])

    return f"""
    <div class="panel">
      <div style="font-weight:700; margin-bottom:8px;">Monthly history (month-end close)</div>
      <table style="width:100%; border-collapse:collapse;">
        <thead>
          <tr>
            <th style="text-align:left;">Month</th>
            <th style="text-align:right;">USD</th>
          </tr>
        </thead>
        <tbody>{rows}</tbody>
      </table>
    </div>
    """


def plot_price_history(df_prices: pd.DataFrame, ref_date: pd.Timestamp, days: int):
    d = df_prices[["Date", "USD"]].copy().sort_values("Date")
    d = d[d["Date"] <= ref_date].copy()

    if days != -1:
        d = d.tail(int(days))

    d = d.dropna()
    if len(d) < 3:
        raise ValueError("Not enough history to plot.")

    x = mdates.date2num(np.array(pd.to_datetime(d["Date"]).dt.to_pydatetime()))
    y = d["USD"].astype(float).values

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segs = np.concatenate([points[:-1], points[1:]], axis=1)

    colors = ["#22c55e" if y[i+1] >= y[i] else "#ef4444" for i in range(len(y)-1)]

    fig, ax = plt.subplots(figsize=(10, 3.2), dpi=120)
    ax.set_facecolor("#0f1829")
    fig.patch.set_facecolor("#101a2b")
    ax.tick_params(colors="#aab2c0")
    for spine in ax.spines.values():
        spine.set_color("#1b2942")
    ax.grid(True, alpha=0.18)

    lc = LineCollection(segs, colors=colors, linewidths=2.2)
    ax.add_collection(lc)

    ax.set_xlim(x.min(), x.max())
    ax.set_ylim(y.min() * 0.995, y.max() * 1.005)

    ax.set_title("Price history", fontsize=12, fontweight="bold", color="#ffffff")
    ax.xaxis.set_major_locator(mdates.AutoDateLocator(minticks=4, maxticks=8))
    ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(ax.xaxis.get_major_locator()))
    ax.set_ylabel("USD", color="#aab2c0")

    plt.tight_layout()
    plt.show()

# Button Controls 
mode = widgets.ToggleButtons(options=["Latest", "Pick date"], value="Latest", description="Mode:")
date_picker = widgets.DatePicker(description="Date:", value=max_date, disabled=True)

show_table = widgets.Checkbox(value=True, description="Show last 10 days table")
show_model_info = widgets.Checkbox(value=False, description="Show model info")
show_monthly = widgets.Checkbox(value=True, description="Show monthly history")
months_to_show = widgets.Dropdown(options=[6, 12, 24, 36], value=12, description="Months:")

show_chart = widgets.Checkbox(value=False, description="Show price history chart")
chart_range = widgets.Dropdown(
    options=[("1M", 30), ("3M", 90), ("6M", 180), ("1Y", 365), ("5Y", 365*5), ("MAX", -1)],
    value=180,
    description="Range:"
)

predict_btn = widgets.Button(description="Predict", icon="check")
predict_btn.add_class("predict-text-black")

status = widgets.HTML(value=f"<span class='small-muted'>Ready. Data range: {min_date} ‚Üí {max_date}</span>")

for w in [mode, date_picker, predict_btn]:
    w.layout = widgets.Layout(width="100%")

months_to_show.layout = widgets.Layout(width="100%", height="30px")
months_to_show.style = {"description_width": "70px"}

chart_range.layout = widgets.Layout(width="100%", height="30px")
chart_range.style = {"description_width": "70px"}

# Outputs
cards_out = widgets.Output()
chart_out = widgets.Output()
table_out = widgets.Output()
monthly_out = widgets.Output()
info_out = widgets.Output()

chart_out.layout.display = "" if show_chart.value else "none"
table_out.layout.display = "" if show_table.value else "none"
monthly_out.layout.display = "" if show_monthly.value else "none"
info_out.layout.display = "" if show_model_info.value else "none"

def set_picker_state(_=None):
    date_picker.disabled = (mode.value == "Latest")

mode.observe(set_picker_state, names="value")
set_picker_state()

def run_prediction():
    if mode.value == "Latest":
        ref_date = model_df["Date"].iloc[-1]
    else:
        picked = pd.Timestamp(date_picker.value)
        ref_date = nearest_date(picked)

    row = feat_by_date.loc[[ref_date]]
    price_t = float(row["USD"].iloc[0])
    pred_log_ret = float(final.predict(row[feature_cols])[0])
    pred_price_t1 = float(price_from_log_return(price_t, pred_log_ret))
    return ref_date, price_t, pred_price_t1

style_out = widgets.HTML(value=dark_css())

def update_ui(_=None):
    chart_out.layout.display = "" if show_chart.value else "none"
    table_out.layout.display = "" if show_table.value else "none"
    monthly_out.layout.display = "" if show_monthly.value else "none"
    info_out.layout.display = "" if show_model_info.value else "none"

    for o in (cards_out, chart_out, table_out, monthly_out, info_out):
        with o:
            clear_output(wait=True)

    try:
        ref_date, price_t, pred_price_t1 = run_prediction()
        status.value = f"<span class='small-muted'> Predicted from reference date <b>{ref_date.date()}</b></span>"

        with cards_out:
            display(widgets.HTML(metric_cards(str(ref_date.date()), price_t, pred_price_t1)))

        if show_chart.value:
            with chart_out:
                display(widgets.HTML("<div class='panel'><div style='font-weight:700; margin-bottom:8px;'>Price history</div></div>"))
                plot_price_history(model_df[["Date", "USD"]].copy(), ref_date, int(chart_range.value))

        if show_table.value:
            ref_idx = model_df.index[model_df["Date"] == ref_date][0]
            with table_out:
                display(widgets.HTML(render_table(model_df.iloc[max(0, ref_idx-20):ref_idx+1])))

        if show_monthly.value:
            with monthly_out:
                display(widgets.HTML(render_monthly_table(model_df[["Date", "USD"]].copy(), int(months_to_show.value))))

        if show_model_info.value:
            with info_out:
                display(widgets.HTML(f"""
                <div class="panel">
                  <div style="font-weight:700; margin-bottom:8px;">Model info</div>

                  <div class="small-muted">Test period: <b>{_eval["Date"].min().date()} ‚Üí {_eval["Date"].max().date()}</b></div>

                  <div class="small-muted" style="font-weight:700; margin-top:10px;">Accuracy summary</div>
                  <div class="small-muted">Directional accuracy (up/down): <b>{direction_acc_pct:.2f}%</b></div>
                  <div class="small-muted">Within ¬±0.5% error: <b>{within_05_pct:.2f}%</b></div>
                  <div class="small-muted">Within ¬±1.0% error: <b>{within_1_pct:.2f}%</b></div>
                  <div class="small-muted">Within ¬±2.0% error: <b>{within_2_pct:.2f}%</b></div>

                  <hr class="hr"/>

                  <div class="small-muted">RMSE (USD): <b>{rmse_usd:,.2f}</b></div>
                  <div class="small-muted">MAPE (%): <b>{mape_pct:.2f}%</b></div>
                  <div class="small-muted">R¬≤ (log return): <b>{r2_lr:.3f}</b></div>

                  <hr class="hr"/>

                  <div class="small-muted">Features: <b>{len(feature_cols)}</b></div>
                  <div class="small-muted">Algorithm: <b>LightGBM Regressor</b></div>
                  <div class="small-muted">Target: <b>next trading day log return</b></div>
                </div>
                """))

    except Exception as e:
        status.value = f"<span style='color:#ff5c5c;'>‚ùå {str(e)}</span>"

# Events
predict_btn.on_click(update_ui)
show_chart.observe(update_ui, names="value")
chart_range.observe(update_ui, names="value")
show_table.observe(update_ui, names="value")
show_monthly.observe(update_ui, names="value")
months_to_show.observe(update_ui, names="value")
show_model_info.observe(update_ui, names="value")
mode.observe(update_ui, names="value")
date_picker.observe(update_ui, names="value")

# Layout
controls_title = widgets.HTML("<div style='font-weight:700; margin-bottom:10px;'>Controls</div>")

controls_box = widgets.VBox([
    controls_title,
    mode,
    date_picker,
    widgets.VBox([
        show_table,
        show_monthly,
        months_to_show,
        show_chart,
        chart_range,
        show_model_info
    ]),
    predict_btn,
    status
])
controls_box.layout = widgets.Layout(width="360px", overflow="hidden")

main_box = widgets.VBox([
    cards_out,
    chart_out,
    widgets.HTML("<hr class='hr'/>"),
    table_out,
    monthly_out,
    info_out
])
main_box.layout = widgets.Layout(flex="1")

app = widgets.HBox([controls_box, main_box])

# Scope styling
controls_box.add_class("app-root")
controls_box.add_class("panel")
main_box.add_class("app-root")
app.add_class("app-root")

display(style_out)
display(app)

update_ui()


HTML(value='\n    <style>\n \n    .app-root .app-title { font-size:22px; font-weight:700; margin:0 0 8px 0; co‚Ä¶

HBox(children=(VBox(children=(HTML(value="<div style='font-weight:700; margin-bottom:10px;'>Controls</div>"), ‚Ä¶