 ## Common Time Series Analysis Techniques

In [2]:
import yfinance as yf
ticker = 'TSLA'
tkr = yf.Ticker(ticker)
df=tkr.history(period='5d')

In [3]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-02-03 00:00:00-05:00,386.679993,389.170013,374.359985,383.679993,93732100,0.0,0.0
2025-02-04 00:00:00-05:00,382.630005,394.0,381.399994,392.209991,57072200,0.0,0.0
2025-02-05 00:00:00-05:00,387.51001,388.390015,375.529999,378.170013,57223300,0.0,0.0
2025-02-06 00:00:00-05:00,373.029999,375.399994,363.179993,374.320007,77650100,0.0,0.0
2025-02-07 00:00:00-05:00,370.179993,380.545898,365.630005,365.970001,46862722,0.0,0.0


In [4]:
print(df["Close"])

Date
2025-02-03 00:00:00-05:00    383.679993
2025-02-04 00:00:00-05:00    392.209991
2025-02-05 00:00:00-05:00    378.170013
2025-02-06 00:00:00-05:00    374.320007
2025-02-07 00:00:00-05:00    365.970001
Name: Close, dtype: float64


## Calculating Percentage Changes

In [7]:
import pandas as pd
print(pd.concat([df["Close"], df["Close"].shift(2)], axis=1, keys=["Close","2DaysShift"]))

                                Close  2DaysShift
Date                                             
2025-02-03 00:00:00-05:00  383.679993         NaN
2025-02-04 00:00:00-05:00  392.209991         NaN
2025-02-05 00:00:00-05:00  378.170013  383.679993
2025-02-06 00:00:00-05:00  374.320007  392.209991
2025-02-07 00:00:00-05:00  365.970001  378.170013


In [8]:
(df["Close"]-df["Close"].shift(2))/df["Close"].shift(2)

Date
2025-02-03 00:00:00-05:00         NaN
2025-02-04 00:00:00-05:00         NaN
2025-02-05 00:00:00-05:00   -0.014361
2025-02-06 00:00:00-05:00   -0.045613
2025-02-07 00:00:00-05:00   -0.032261
Name: Close, dtype: float64

In [10]:
import numpy as np
df['2DaysRise'] = np.log(df["Close"]/df["Close"].shift(2))

In [11]:
print(df[["Close",'2DaysRise']])

                                Close  2DaysRise
Date                                            
2025-02-03 00:00:00-05:00  383.679993        NaN
2025-02-04 00:00:00-05:00  392.209991        NaN
2025-02-05 00:00:00-05:00  378.170013  -0.014465
2025-02-06 00:00:00-05:00  374.320007  -0.046686
2025-02-07 00:00:00-05:00  365.970001  -0.032792


## Rolling Window Calculations

In [15]:
#pg 166
df["2DaysAvg"] = df["Close"].shift(1).rolling(2).mean()
print(df[["Close","2DaysAvg"]])

                                Close    2DaysAvg
Date                                             
2025-02-03 00:00:00-05:00  383.679993         NaN
2025-02-04 00:00:00-05:00  392.209991         NaN
2025-02-05 00:00:00-05:00  378.170013  387.944992
2025-02-06 00:00:00-05:00  374.320007  385.190002
2025-02-07 00:00:00-05:00  365.970001  376.245010


## Calculating the Percentage Change of a Rolling Average

In [17]:
df['2DaysAvgRise'] = np.log(df["Close"]/df["Close"].shift(1).rolling(2).mean())
print(df[["Close","2DaysAvg","2DaysAvgRise"]])

                                Close    2DaysAvg  2DaysAvgRise
Date                                                           
2025-02-03 00:00:00-05:00  383.679993         NaN           NaN
2025-02-04 00:00:00-05:00  392.209991         NaN           NaN
2025-02-05 00:00:00-05:00  378.170013  387.944992     -0.025520
2025-02-06 00:00:00-05:00  374.320007  385.190002     -0.028626
2025-02-07 00:00:00-05:00  365.970001  376.245010     -0.027689


## Multivariate Time Series

In [18]:
stocks = pd.DataFrame()
tickers = ["MSFT","TSLA","GM","AAPL","ORCL","AMZN"]
for ticker in tickers:
    tkr = yf.Ticker(ticker)
    hist = tkr.history(period='5d')
    hist = pd.DataFrame(hist[["Close"]].rename(columns={"Close":ticker}))
    if stocks.empty:
        stocks=hist
    else:
        stocks=stocks.join(hist)

In [19]:
stocks

Unnamed: 0_level_0,MSFT,TSLA,GM,AAPL,ORCL,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-02-03 00:00:00-05:00,410.920013,383.679993,47.900002,228.009995,168.600006,237.419998
2025-02-04 00:00:00-05:00,412.369995,392.209991,48.57,232.800003,167.889999,242.059998
2025-02-05 00:00:00-05:00,413.290009,378.170013,47.810001,232.470001,171.660004,236.169998
2025-02-06 00:00:00-05:00,415.820007,374.320007,47.93,233.220001,172.350006,238.830002
2025-02-07 00:00:00-05:00,408.4599,364.202789,47.389999,227.970001,173.800003,228.699997


## Processing Multivariate Time Series

In [21]:
stocks_to_keep = []
for i in stocks.columns:
    if stocks[stocks[i]/stocks[i].shift(1)<0.97].empty:
        stocks_to_keep.append(i)
print(stocks_to_keep)

['MSFT', 'GM', 'AAPL', 'ORCL']


## Analyzing Dependencies Between Variable

In [24]:
import yfinance as yf
import numpy as np
ticker = 'TSLA'
tkr = yf.Ticker(ticker)
df = tkr.history(period='1mo')


In [26]:
df = df[["Close","Volume"]].rename(columns={"Close":"Price"})

In [27]:
df["priceRise"] = np.log(df["Price"]/df["Price"].shift(1))

In [28]:
df["volumeRise"] = np.log(df["Volume"]/df["Volume"].shift(1))

In [30]:
df.head(5)

Unnamed: 0_level_0,Price,Volume,priceRise,volumeRise
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-01-07 00:00:00-05:00,394.359985,75699500,,
2025-01-08 00:00:00-05:00,394.940002,73038800,0.00147,-0.035781
2025-01-10 00:00:00-05:00,394.73999,62287300,-0.000507,-0.159233
2025-01-13 00:00:00-05:00,403.309998,67580500,0.021478,0.081562
2025-01-14 00:00:00-05:00,396.359985,84565000,-0.017383,0.224201


In [34]:
print(df[abs(df["priceRise"]>0.01)])

                                Price    Volume  priceRise  volumeRise
Date                                                                  
2025-01-13 00:00:00-05:00  403.309998  67580500   0.021478    0.081562
2025-01-15 00:00:00-05:00  428.220001  81375500   0.077314   -0.038446
2025-01-17 00:00:00-05:00  426.500000  94991400   0.030181    0.329361
2025-01-30 00:00:00-05:00  400.279999  98092900   0.028328    0.365913
2025-01-31 00:00:00-05:00  404.600006  83568200   0.010735   -0.160252
2025-02-04 00:00:00-05:00  392.209991  57072200   0.021989   -0.496124


In [35]:
# Find average volume rise over the past month
print(df["volumeRise"].mean().round(4))

-0.0172


In [37]:
# Find average volume rise for days with an absolute price rise greater than 0.01
print(df[abs(df["priceRise"]>0.01)]["volumeRise"].mean().round(4))

# The number below is larger than the average above, indicating a possible correlation betwen price and volume

0.0137
