In [1]:
import yfinance as yf

ticker = yf.Ticker('TSLA')
df = ticker.history(period='5d')

In [2]:
display(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-10-14 00:00:00-04:00,220.130005,221.910004,213.740005,219.160004,86291900,0.0,0.0
2024-10-15 00:00:00-04:00,220.009995,224.259995,217.119995,219.570007,62988800,0.0,0.0
2024-10-16 00:00:00-04:00,221.399994,222.820007,218.929993,221.330002,49632800,0.0,0.0
2024-10-17 00:00:00-04:00,221.589996,222.080002,217.899994,220.889999,50791800,0.0,0.0
2024-10-18 00:00:00-04:00,220.710007,222.279999,219.229996,220.699997,49526800,0.0,0.0


In [3]:
display(df['Close'])

Date
2024-10-14 00:00:00-04:00    219.160004
2024-10-15 00:00:00-04:00    219.570007
2024-10-16 00:00:00-04:00    221.330002
2024-10-17 00:00:00-04:00    220.889999
2024-10-18 00:00:00-04:00    220.699997
Name: Close, dtype: float64

Shifts the rows by 2 so we can have a comparison on "Close" values for each row in a window of 2 days.

This is achieved by creating a new DataFrame where each row has an extra column `2DaysShift` which respresents the `Close` value 2 days before.

This is possible with this dataset given that each row represents a day and theres no missing row values.

In [5]:
import pandas as pd

display(pd.concat([df['Close'], df['Close'].shift(2)], axis=1, keys=['Close', '2DaysShift']))

Unnamed: 0_level_0,Close,2DaysShift
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-10-14 00:00:00-04:00,219.160004,
2024-10-15 00:00:00-04:00,219.570007,
2024-10-16 00:00:00-04:00,221.330002,219.160004
2024-10-17 00:00:00-04:00,220.889999,219.570007
2024-10-18 00:00:00-04:00,220.699997,221.330002


Get the % difference between the `2DaysShift` and `Close`

$$\text{PC} = \frac{A - B}{B}$$

Where:

**A**: Current Price
**B**: Earlier Price (`2DaysShift`)
**PC**: Percentage Change

In [8]:
(df['Close'] - df['Close'].shift(2)) / df['Close'].shift(2)

Date
2024-10-14 00:00:00-04:00         NaN
2024-10-15 00:00:00-04:00         NaN
2024-10-16 00:00:00-04:00    0.009901
2024-10-17 00:00:00-04:00    0.006012
2024-10-18 00:00:00-04:00   -0.002846
Name: Close, dtype: float64

For more precision in financial analysis context a different formula is recommended:

$$y = \ln\left(\frac{A}{B}\right)$$

In [9]:
import numpy as np

df['2daysRise'] = np.log(df['Close'] / df['Close'].shift(2))
display(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,2daysRise
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-10-14 00:00:00-04:00,220.130005,221.910004,213.740005,219.160004,86291900,0.0,0.0,
2024-10-15 00:00:00-04:00,220.009995,224.259995,217.119995,219.570007,62988800,0.0,0.0,
2024-10-16 00:00:00-04:00,221.399994,222.820007,218.929993,221.330002,49632800,0.0,0.0,0.009853
2024-10-17 00:00:00-04:00,221.589996,222.080002,217.899994,220.889999,50791800,0.0,0.0,0.005994
2024-10-18 00:00:00-04:00,220.710007,222.279999,219.229996,220.699997,49526800,0.0,0.0,-0.002851


Rolling Window Calculation is also common, which consists on comparing each value with the average value over _n_ periods.

> Every pandas object has a rolling() method for looking at a rolling win- dow of values.

In [11]:
df['2daysAvg'] = df['Close'].shift(1).rolling(2).mean()
display(df[['Close', '2daysAvg']])

Unnamed: 0_level_0,Close,2daysAvg
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-10-14 00:00:00-04:00,219.160004,
2024-10-15 00:00:00-04:00,219.570007,
2024-10-16 00:00:00-04:00,221.330002,219.365005
2024-10-17 00:00:00-04:00,220.889999,220.450005
2024-10-18 00:00:00-04:00,220.699997,221.110001


- Use `shift(1)` to exclude the current day data and use prev day instead when calculating the average.
- Use `rolling(2)` to indicate that we want to draw on two consecutive rows.
- Use `mean()` to calculate average for each pair of consecutive rows.

Now the Percentage Change between each day's price can be calculated with its associated rolling average.

In [12]:
df['2daysAvgRise'] = np.log(df['Close'] / df['2daysAvg'])
display(df[['Close','2daysRise','2daysAvgRise']])

Unnamed: 0_level_0,Close,2daysRise,2daysAvgRise
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-14 00:00:00-04:00,219.160004,,
2024-10-15 00:00:00-04:00,219.570007,,
2024-10-16 00:00:00-04:00,221.330002,0.009853,0.008918
2024-10-17 00:00:00-04:00,220.889999,0.005994,0.001994
2024-10-18 00:00:00-04:00,220.699997,-0.002851,-0.001856


Multivariate time series refers to a type of data that consists of multiple time-dependent variables observed over the same time period. Unlike univariate time series, which focuses on a single variable, multivariate time series analyzes the relationships and interactions between two or more variables over time.

In [33]:
# Builds a Stocks DataFrame with tickers from different companies
stocks = pd.DataFrame()
tickers = ['MSFT', 'TSLA', 'GM', 'AAPL', 'ORCL', 'AMZN']
tickers = map(lambda t: yf.Ticker(t), tickers)
tickers = list(tickers)

display(tickers)

[yfinance.Ticker object <MSFT>,
 yfinance.Ticker object <TSLA>,
 yfinance.Ticker object <GM>,
 yfinance.Ticker object <AAPL>,
 yfinance.Ticker object <ORCL>,
 yfinance.Ticker object <AMZN>]

In [34]:
for t in tickers:
    symb = t.info.get('symbol')
    hist = t.history(period='5d')
    hist = pd.DataFrame(hist[['Close']].rename(columns={'Close': symb}))

    if stocks.empty:
        stocks = hist
    else:
        stocks = stocks.join(hist)

In [35]:
display(stocks)

Unnamed: 0_level_0,MSFT,TSLA,GM,AAPL,ORCL,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-10-14 00:00:00-04:00,419.140015,219.160004,48.630001,231.300003,176.119995,187.539993
2024-10-15 00:00:00-04:00,418.73999,219.570007,47.849998,233.850006,174.089996,187.690002
2024-10-16 00:00:00-04:00,416.119995,221.330002,49.009998,231.779999,174.770004,186.889999
2024-10-17 00:00:00-04:00,416.720001,220.889999,49.380001,232.149994,175.679993,187.529999
2024-10-18 00:00:00-04:00,418.160004,220.699997,49.18,235.0,174.690002,188.990005


Following snippet filters out tickers which prices went down by 3 points.

In [43]:
SAFE_STOCK_DROP_POINTS = .99

stable_stocks = []

for i in stocks.columns:
    # compare current w/ prev row in each stock
    if stocks[stocks[i]/stocks[i].shift(1) < SAFE_STOCK_DROP_POINTS].empty:
        stable_stocks.append(i)

display(stable_stocks)

['MSFT', 'TSLA', 'AAPL', 'AMZN']

GM and ORCL went out because they both dropped for a single point.

In [44]:
display(stocks[stable_stocks])

Unnamed: 0_level_0,MSFT,TSLA,AAPL,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-10-14 00:00:00-04:00,419.140015,219.160004,231.300003,187.539993
2024-10-15 00:00:00-04:00,418.73999,219.570007,233.850006,187.690002
2024-10-16 00:00:00-04:00,416.119995,221.330002,231.779999,186.889999
2024-10-17 00:00:00-04:00,416.720001,220.889999,232.149994,187.529999
2024-10-18 00:00:00-04:00,418.160004,220.699997,235.0,188.990005


A key task in analyzing multivariate time series is to identify relationships between different variables. For instance, there is often a dependency between a stock's opening and closing prices, as they typically do not vary significantly from each other on a given day.

Conversely, there may be no dependency between the closing prices of stocks from different sectors of the economy.

In [50]:
tsla = yf.Ticker('TSLA')
df = tkr.history(period='1mo')

In [51]:
display(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-09-19 00:00:00-04:00,190.039993,190.990005,188.470001,189.869995,39543200,0.0,0.0
2024-09-20 00:00:00-04:00,190.229996,191.839996,187.410004,191.600006,100378600,0.0,0.0
2024-09-23 00:00:00-04:00,191.639999,194.449997,190.570007,193.880005,36993100,0.0,0.0
2024-09-24 00:00:00-04:00,194.270004,195.369995,190.130005,193.960007,43478900,0.0,0.0
2024-09-25 00:00:00-04:00,193.75,193.949997,192.160004,192.529999,26391100,0.0,0.0
2024-09-26 00:00:00-04:00,194.309998,194.529999,189.539993,191.160004,36334900,0.0,0.0
2024-09-27 00:00:00-04:00,190.679993,190.899994,187.339996,187.970001,36002300,0.0,0.0
2024-09-30 00:00:00-04:00,187.139999,188.490005,184.649994,186.330002,41583900,0.0,0.0
2024-10-01 00:00:00-04:00,184.899994,186.190002,183.449997,185.130005,36044900,0.0,0.0
2024-10-02 00:00:00-04:00,184.440002,186.600006,184.039993,184.759995,23704100,0.0,0.0


In [52]:
# Filter out columns but Close and Volume
df = df[['Close', 'Volume']]

In [53]:
df = df.rename(columns={'Close': 'Price'})

In [54]:
display(df)

Unnamed: 0_level_0,Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-09-19 00:00:00-04:00,189.869995,39543200
2024-09-20 00:00:00-04:00,191.600006,100378600
2024-09-23 00:00:00-04:00,193.880005,36993100
2024-09-24 00:00:00-04:00,193.960007,43478900
2024-09-25 00:00:00-04:00,192.529999,26391100
2024-09-26 00:00:00-04:00,191.160004,36334900
2024-09-27 00:00:00-04:00,187.970001,36002300
2024-09-30 00:00:00-04:00,186.330002,41583900
2024-10-01 00:00:00-04:00,185.130005,36044900
2024-10-02 00:00:00-04:00,184.759995,23704100


To check if Price and Volume are related, calculate the daily percentage change for each column.

In [56]:
df['priceRise'] = np.log(df['Price'] / df['Price'].shift(1))
df['volumeRise'] = np.log(df['Volume'] / df['Volume'].shift(1))
display(df)

Unnamed: 0_level_0,Price,Volume,priceRise,volumeRise
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-09-19 00:00:00-04:00,189.869995,39543200,,
2024-09-20 00:00:00-04:00,191.600006,100378600,0.00907,0.931555
2024-09-23 00:00:00-04:00,193.880005,36993100,0.01183,-0.998218
2024-09-24 00:00:00-04:00,193.960007,43478900,0.000413,0.161544
2024-09-25 00:00:00-04:00,192.529999,26391100,-0.0074,-0.499249
2024-09-26 00:00:00-04:00,191.160004,36334900,-0.007141,0.319752
2024-09-27 00:00:00-04:00,187.970001,36002300,-0.016828,-0.009196
2024-09-30 00:00:00-04:00,186.330002,41583900,-0.008763,0.14413
2024-10-01 00:00:00-04:00,185.130005,36044900,-0.006461,-0.142948
2024-10-02 00:00:00-04:00,184.759995,23704100,-0.002001,-0.419117


Remember that natural logarithm provides a close approximation of the percentage change within a range of +/– 20 percent. If values exceed this range results will not be accurate.

In [65]:
print(df[abs(df['priceRise']) > .015])

                                Price    Volume  priceRise  volumeRise
Date                                                                  
2024-09-27 00:00:00-04:00  187.970001  36002300  -0.016828   -0.009196
2024-10-03 00:00:00-04:00  181.960007  30204300  -0.015271    0.242336
2024-10-04 00:00:00-04:00  186.509995  40890300   0.024698    0.302909
2024-10-07 00:00:00-04:00  180.800003  42364200  -0.031093    0.035411


In [66]:
print(df['volumeRise'].mean().round(4))

-0.0027


In [68]:
print(df[abs(df['priceRise']) > .015]['volumeRise'].mean().round(4))

0.1429
