In [70]:
import pandas as pd
import plotly.graph_objects as go

In [104]:
df_d = pd.read_parquet("data/alphavantage/stock_prices/1d_adj/NVDA.parquet")

In [105]:
df_d

Unnamed: 0_level_0,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-02,238.750,239.910,236.720,239.91,5.971508,5941969,0.0,1.0
2020-01-03,235.100,237.830,234.100,236.07,5.875928,5144308,0.0,1.0
2020-01-06,232.320,237.270,231.270,237.06,5.900569,6572812,0.0,1.0
2020-01-07,238.200,241.770,236.390,239.93,5.972005,7980144,0.0,1.0
2020-01-08,239.760,242.040,238.150,240.38,5.983206,6931017,0.0,1.0
...,...,...,...,...,...,...,...,...
2025-06-24,145.560,147.960,145.500,147.90,147.891653,187566121,0.0,1.0
2025-06-25,149.270,154.450,149.260,154.31,154.301291,269146471,0.0,1.0
2025-06-26,155.975,156.715,154.000,155.02,155.011251,198145746,0.0,1.0
2025-06-27,156.040,158.710,155.255,157.75,157.741097,263234539,0.0,1.0


In [154]:
go.Figure(
    data=[
        go.Scatter(
            x=df_d.index,
            y=df_d["close"],
            mode="lines",
            name="Close",
        ),
    ],
    layout=go.Layout(
        title="NVDA Daily Close Price (Not Adjusted for Splits)",
        xaxis_title="Date",
        yaxis_title="Close Price (USD)",
        margin={"l": 40, "r": 40, "t": 60, "b": 40},
        font={"size": 16},
        autosize=True,
    ),
)


In [155]:
df_d[df_d["split_coefficient"] != 1.0]

Unnamed: 0_level_0,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-07-20,187.3,188.38,181.64,186.12,18.571882,43366188,0.0,4.0
2024-06-10,120.37,123.1,117.01,121.79,121.735399,308134791,0.0,10.0


In [157]:
df_d_adj = df_d.copy()
date_of_4_for_1_split = df_d_adj[df_d_adj["split_coefficient"] == 4.0].index[0]
# any row before that date should have its prices divided by 4
date_of_10_for_1_split = df_d_adj[df_d_adj["split_coefficient"] == 10.0].index[0]
# any row before that date should have its prices divided by 10

df_d_adj.loc[: date_of_10_for_1_split - pd.Timedelta(days=1), ["open", "high", "low", "close"]] /= 10.0
df_d_adj.loc[: date_of_4_for_1_split - pd.Timedelta(days=1), ["open", "high", "low", "close"]] /= 4.0


In [158]:
go.Figure(
    data=[
        go.Scatter(
            x=df_d_adj.index,
            y=df_d_adj["close"],
            mode="lines",
            name="Close",
        ),
    ],
    layout=go.Layout(
        title="NVDA Daily Close Price (Adjusted for Splits)",
        xaxis_title="Date",
        yaxis_title="Close Price (USD)",
        margin={"l": 40, "r": 40, "t": 60, "b": 40},
        font={"size": 16},
        autosize=True,
    ),
)


In [181]:
df_h = pd.read_parquet("data/alphavantage/stock_prices/1h//NVDA.parquet")

In [182]:
go.Figure(
    data=[
        go.Scatter(
            x=df_h.index,
            y=df_h["close"],
            mode="lines",
            name="Close",
        ),
    ],
    layout=go.Layout(
        title="NVDA Hourly Close Price (Adjusted for Splits)",
        xaxis_title="Time",
        yaxis_title="Close Price (USD)",
        margin={"l": 40, "r": 40, "t": 60, "b": 40},
        font={"size": 16},
        autosize=True,
    ),
)


In [148]:
df_h.loc[df_h["high"] > 400, ["high"]] /= 10.0

In [149]:
go.Figure(
    data=[
        go.Candlestick(
            x=df_h.index,
            open=df_h["open"],
            high=df_h["high"],
            low=df_h["low"],
            close=df_h["close"],
        ),
    ],
    layout=go.Layout(
        xaxis_rangeslider_visible=False,
        title="NVDA Hourly Candlestick Chart (Not Adjusted for Splits)",
        xaxis_title="Date",
        yaxis_title="Price (USD)",
        margin={"l": 40, "r": 40, "t": 60, "b": 40},
        font={"size": 16},
        autosize=True,
    ),
)

In [150]:
import numpy as np

# Compute rolling median and IQR
rolling_median = df_h["close"].rolling(window=24, center=True).median()
iqr = df_h["close"].rolling(window=24, center=True).quantile(0.75) - df_h["close"].rolling(
    window=24, center=True
).quantile(0.25)

# Define thresholds for outlier removal (3× IQR rule)
upper_bound = rolling_median + 3 * iqr
lower_bound = rolling_median - 3 * iqr

# Clip high/low columns to those bounds
df_h["high"] = np.minimum(df_h["high"], upper_bound)
df_h["low"] = np.maximum(df_h["low"], lower_bound)

# Optionally, apply additional sanity clipping relative to open/close
df_h["high"] = np.minimum(df_h["high"], df_h[["open", "close"]].max(axis=1) * 1.05)
df_h["low"] = np.maximum(df_h["low"], df_h[["open", "close"]].min(axis=1) * 0.95)


In [152]:
# Additional clipping for 2024 and later
mask_2024 = df_h.index >= pd.Timestamp("2024-01-01")
df_h.loc[mask_2024, "high"] = np.minimum(
    df_h.loc[mask_2024, "high"], df_h.loc[mask_2024, ["open", "close"]].max(axis=1) * 1.03
)
df_h.loc[mask_2024, "low"] = np.maximum(
    df_h.loc[mask_2024, "low"], df_h.loc[mask_2024, ["open", "close"]].min(axis=1) * 0.97
)

In [153]:
import plotly.graph_objects as go

go.Figure(
    data=[
        go.Candlestick(
            x=df_h.index,
            open=df_h["open"],
            high=df_h["high"],
            low=df_h["low"],
            close=df_h["close"],
        ),
    ],
    layout=go.Layout(
        xaxis_rangeslider_visible=False,
        title="NVDA Hourly Candlestick Chart",
        xaxis_title="Date",
        yaxis_title="Price (USD)",
        margin={"l": 40, "r": 40, "t": 60, "b": 40},
        font={"size": 16},
        autosize=True,
    ),
)

In [170]:
(df_h.index >= pd.Timestamp("2022-03-03")).sum()

np.int64(13326)