Install Libraries

In [1]:
!pip install yfinance scikit-learn plotly

import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm
import ipywidgets as widgets
from IPython.display import display, clear_output




Tickers

In [2]:
TICKERS=['AAPL','MSFT','GOOGL','AMZN','TSLA']
START_DATE='2023-01-01'
END_DATE='2025-10-15'
INTERVAL='1d'

Fetch Tickers

In [5]:
def fetch_tickers_data(tickers, start_date, end_date, interval='1d'):
  all_data=[]
  for tk in tqdm(tickers, desc='Downloading'):
    df=yf.download(tk, start=start_date, end=end_date, interval=interval, progress=False)

    if isinstance(df.columns, pd.MultiIndex):
      df.columns=[col[0] for col in df.columns]

    df=df.reset_index()
    df['Ticker']=tk
    all_data.append(df)

  return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

data=fetch_tickers_data(TICKERS, START_DATE, END_DATE, INTERVAL)
data['Date']=pd.to_datetime(data['Date'])
data=data.sort_values(by=['Ticker','Date']).reset_index(drop=True)

data['DailyReturn']=data.groupby('Ticker')['Close'].pct_change()

Downloading:   0%|          | 0/5 [00:00<?, ?it/s]

  df=yf.download(tk, start=start_date, end=end_date, interval=interval, progress=False)
  df=yf.download(tk, start=start_date, end=end_date, interval=interval, progress=False)
  df=yf.download(tk, start=start_date, end=end_date, interval=interval, progress=False)
  df=yf.download(tk, start=start_date, end=end_date, interval=interval, progress=False)
  df=yf.download(tk, start=start_date, end=end_date, interval=interval, progress=False)


In [7]:
print("Unique Tickers:",data['Ticker'].unique())
print("Date Range:",data['Date'].min(), "to", data['Date'].max())

summary=data.groupby('Ticker')[('Close')].describe()
print("Summary Statistics:")
display(summary)

data['DailyReturn']= data.groupby('Ticker')['Close'].pct_change()
print("Added Daily Return Column")

pivoted=data.pivot(index='Date', columns='Ticker', values=('Close'))
corr_matrix=pivoted.corr()
print("Correlation Matrix:")
display(corr_matrix)

Unique Tickers: ['AAPL' 'AMZN' 'GOOGL' 'MSFT' 'TSLA']
Date Range: 2023-01-03 00:00:00 to 2025-10-14 00:00:00
Summary Statistics:


Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAPL,698.0,197.618694,29.696041,123.281334,174.877102,192.996307,223.476616,258.103729
AMZN,698.0,170.281419,42.356491,83.120003,133.334995,179.110001,205.017502,242.059998
GOOGL,698.0,153.815418,34.976646,85.609535,130.09771,156.401817,174.773872,254.720001
MSFT,698.0,388.458485,72.712733,217.525269,329.356255,403.152298,428.164368,534.760925
TSLA,698.0,254.614183,76.235761,108.099998,193.922504,241.284996,300.097496,479.859985


Added Daily Return Column
Correlation Matrix:


Ticker,AAPL,AMZN,GOOGL,MSFT,TSLA
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAPL,1.0,0.847489,0.858841,0.779786,0.76137
AMZN,0.847489,1.0,0.92336,0.928263,0.697486
GOOGL,0.858841,0.92336,1.0,0.912716,0.721304
MSFT,0.779786,0.928263,0.912716,1.0,0.604288
TSLA,0.76137,0.697486,0.721304,0.604288,1.0


In [8]:
fig=go.Figure()

for ticker in data['Ticker'].unique():
  ticker_data=data[data['Ticker']==ticker]
  fig.add_trace(go.Scatter(x=ticker_data['Date'], y=ticker_data['Close'], mode='lines', name=ticker))

fig.update_layout(title='Stock Prices Over Time', xaxis_title='Date', yaxis_title='Close Price(USD)', legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1))
fig.show()

In [9]:
from tempfile import template
fig_corr=px.imshow(
    corr_matrix,
    text_auto=True,
    aspect="auto",
    color_continuous_scale='RdBu_r',
    title="Correlation Heatmap between Tickers"
)

fig_corr.update_layout(template='plotly_dark')
fig_corr.show()

In [12]:
import time
STREAM_INTERVAL='1m'
STREAM_PERIOD='1d'
NUM_UPDATES=5
SLEEP_TIME=60

def fetch_live_prices(tickers):
  df=yf.download(tickers=tickers,period=STREAM_PERIOD,interval=STREAM_INTERVAL,progress=False)
  return df

for i in range(NUM_UPDATES):
  clear_output(wait=True)
  # Corrected f-string syntax for printing update progress
  print(f"Streaming Update {i+1}/{NUM_UPDATES}...")


live_prices=fetch_live_prices(TICKERS)

# Flatten multi-level column names
live_prices.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in live_prices.columns]

fig_live = go.Figure()
# Iterate through the original ticker list
for ticker in TICKERS:
  # Construct the flattened column name for the Close price
  close_column_name = f'Close_{ticker}'
  if close_column_name in live_prices.columns:
      fig_live.add_trace(go.Scatter(x=live_prices.index, y=live_prices[close_column_name], mode='lines', name=ticker)) # Use ticker as trace name

fig_live.update_layout(
    title='Live Stock Prices', # Added a title for clarity
    xaxis_title='Time',
    yaxis_title='Price',
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
)

fig_live.show()
time.sleep(SLEEP_TIME)

Streaming Update 5/5...



YF.download() has changed argument auto_adjust default to True



Github Repo Link: https://github.com/Asfand5214/Multivariate-Market-Data-for-Financial-Trends