In [41]:
from datetime import datetime
import pandas as pd 
import yfinance as yf

# import numpy as np
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
# from statsmodels.tsa.stattools import adfuller
from statsmodels.regression import linear_model

pio.templates.default = "plotly_dark"
pio.renderers.default = "jupyterlab"

In [None]:
symbol_1 = 'NVDA'
symbol_2 = 'AMD'

In [43]:
def load_stock_data(symbol, period='1y', interval='1d'):
    """
    Load stock data from Yahoo Finance.
    
    Parameters:
    -----------
    symbol : str
        Stock ticker symbol (e.g., 'AAPL', 'NVDA')
    period : str, optional
        Time period to fetch data for (default: '1y')
        Valid periods: 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max
    interval : str, optional
        Data interval/timeframe (default: '1d')
        Valid intervals: 1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo
    
    Returns:
    --------
    pd.DataFrame
        DataFrame containing stock data with columns: Open, High, Low, Close, Volume, etc.
    """
    ticker = yf.Ticker(symbol)
    df = ticker.history(period=period, interval=interval)
    return df

In [44]:
symbols = [symbol_1, symbol_2]

data = {
    symbol_1: load_stock_data(symbol_1, interval='1h'),
    symbol_2: load_stock_data(symbol_2, interval='1h')
}

prices_df = pd.DataFrame({symbol: data[symbol]['Close'] for symbol in symbols})

prices_df


Unnamed: 0_level_0,NVDA,AMD
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-01 09:30:00-04:00,137.173996,143.278793
2024-11-01 10:30:00-04:00,137.006302,143.259995
2024-11-01 11:30:00-04:00,136.389999,142.190002
2024-11-01 12:30:00-04:00,136.330002,143.070007
2024-11-01 13:30:00-04:00,135.473999,142.409393
...,...,...
2025-10-31 11:30:00-04:00,203.985001,255.899994
2025-10-31 12:30:00-04:00,202.836502,255.258698
2025-10-31 13:30:00-04:00,204.270004,256.339996
2025-10-31 14:30:00-04:00,204.410004,258.009491


In [45]:
fig = px.scatter(x=prices_df[symbol_1], y=prices_df[symbol_2], trendline="ols")
fig.data[1].line.color = 'yellow'

fig

In [46]:
model = linear_model.OLS(prices_df[symbol_1],prices_df[symbol_2])

result = model.fit()

ratio = result.params[0]

ratio


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



np.float64(1.029594538790435)

In [54]:
symbol_1_scaled_column_name = f'{symbol_1}_scaled'
symbol_2_scaled_column_name = f'{symbol_2}_scaled'

prices_df[symbol_1_scaled_column_name] = prices_df[symbol_1]
prices_df[symbol_2_scaled_column_name] = ratio * prices_df[symbol_2]

prices_df['spread'] = prices_df[symbol_1_scaled_column_name] - prices_df[symbol_2_scaled_column_name]
prices_df['spread_ma'] = prices_df['spread'].rolling(window=40).mean()

bolinger_multiplier = 2.5
bolinger_window = 14

prices_df['bolinger_up'] = prices_df['spread_ma'] + bolinger_multiplier * prices_df['spread'].rolling(window=bolinger_window).std()
prices_df['bolinger_down'] = prices_df['spread_ma'] - bolinger_multiplier * prices_df['spread'].rolling(window=bolinger_window).std()

prices_df[f'{symbol_2}_adj'] = ratio * prices_df[symbol_2]

prices_df

Unnamed: 0_level_0,NVDA,AMD,NVDA_scaled,AMD_scaled,spread,spread_ma,bolinger_up,bolinger_down,AMD_adj
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-11-01 09:30:00-04:00,137.173996,143.278793,137.173996,147.519063,-10.345067,,,,147.519063
2024-11-01 10:30:00-04:00,137.006302,143.259995,137.006302,147.499708,-10.493406,,,,147.499708
2024-11-01 11:30:00-04:00,136.389999,142.190002,136.389999,146.398050,-10.008051,,,,146.398050
2024-11-01 12:30:00-04:00,136.330002,143.070007,136.330002,147.304098,-10.974096,,,,147.304098
2024-11-01 13:30:00-04:00,135.473999,142.409393,135.473999,146.623934,-11.149935,,,,146.623934
...,...,...,...,...,...,...,...,...,...
2025-10-31 11:30:00-04:00,203.985001,255.899994,203.985001,263.473236,-59.488236,-67.567606,-62.721015,-72.414197,263.473236
2025-10-31 12:30:00-04:00,202.836502,255.258698,202.836502,262.812961,-59.976459,-67.622181,-62.460481,-72.783880,262.812961
2025-10-31 13:30:00-04:00,204.270004,256.339996,204.270004,263.926260,-59.656256,-67.619462,-62.002406,-73.236518,263.926260
2025-10-31 14:30:00-04:00,204.410004,258.009491,204.410004,265.645163,-61.235159,-67.344650,-61.838806,-72.850493,265.645163


In [55]:
px.line(prices_df, y=[symbol_1, symbol_2_scaled_column_name], title=f'Price Series: {symbol_1} and Scaled {symbol_2}')
fig_combined = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
               subplot_titles=[f'Price Series: {symbol_1} and Scaled {symbol_2}', f'Spread: {symbol_1} vs {symbol_2}'])

fig_combined.add_trace(
  px.line(prices_df, y=[symbol_1, symbol_2_scaled_column_name]).data[0],
  row=1, col=1
)
fig_combined.add_trace(
  px.line(prices_df, y=[symbol_1, symbol_2_scaled_column_name]).data[1],
  row=1, col=1
)
fig_combined.add_trace(
  px.line(prices_df, y=['spread']).data[0],
  row=2, col=1
)

fig_combined.add_trace(
  px.line(prices_df, y=['spread_ma']).update_traces(line=dict(color='white', width=1), name='spread_ma').data[0],
  row=2, col=1
)

fig_combined.add_trace(
  px.line(prices_df, y=['bolinger_up']).update_traces(line=dict(color='green', width=1, dash='dot'), name='bolinger_up').data[0],
  row=2, col=1
)

fig_combined.add_trace(
  px.line(prices_df, y=['bolinger_down']).update_traces(line=dict(color='red', width=1, dash='dot'), name='bolinger_down').data[0],
  row=2, col=1
)

fig_combined.update_layout(height=1000, showlegend=True, title_text="Price Series and Spread")
fig_combined