# Volatility Forecast Dataset

In [43]:
#!pip install yfinance
# !pip install pandas_ta
# !pip install arch

In [44]:
# If yfinance throws an error then uncomment the line below to install it. (You need to install it only once so comment it again after)
import yfinance as yf
import pandas_ta as ta
import pandas as pd
import numpy as np
from arch import arch_model

In [45]:
aapl = yf.Ticker("AAPL")
df = aapl.history("20Y")
df.drop(columns = ["Dividends","Stock Splits"],inplace = True)

In [46]:
MACD = df.ta.macd(close='close', fast=12, slow=26, signal=9, append=True).drop(columns=["MACDh_12_26_9","MACDs_12_26_9"])
RSI = df.ta.rsi()
EMA = df.ta.ema()
MFI = df.ta.mfi()

In [47]:
df['Return'] = df['Close'].pct_change()
df['Vol-5d'] = df['Return'].rolling(window='5d').std()
df['MACD'] = MACD
df['RSI'] = RSI
df['EMA'] = EMA
df['MFI'] = MFI

In [48]:
df.dropna(inplace = True)
df.drop(columns = ["MACD_12_26_9","MACDh_12_26_9", "MACDs_12_26_9"], inplace = True)

In [49]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Return,Vol-5d,MACD,RSI,EMA,MFI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2002-01-23,0.333760,0.352745,0.330545,0.352438,443279200,0.054994,0.050050,0.001683,56.879586,0.337801,46.200427
2002-01-24,0.350754,0.359940,0.350601,0.355347,344002400,0.008254,0.035992,0.002980,57.971201,0.340991,43.558844
2002-01-25,0.350448,0.358562,0.346927,0.355960,185914400,0.001725,0.030221,0.004011,58.211280,0.343713,37.623011
2002-01-28,0.358256,0.360553,0.347845,0.356266,186446400,0.000860,0.004042,0.004798,58.339253,0.345995,43.231508
2002-01-29,0.355500,0.360399,0.349836,0.353204,240324000,-0.008596,0.005726,0.005115,56.475804,0.347306,45.017739
...,...,...,...,...,...,...,...,...,...,...,...
2021-11-29,159.369995,161.190002,158.789993,160.240005,88748200,0.021874,0.037867,3.338364,64.995963,157.618498,75.589781
2021-11-30,159.990005,165.520004,159.919998,165.300003,174048100,0.031578,0.034067,3.757074,71.386106,159.015135,78.049987
2021-12-01,167.479996,170.300003,164.529999,164.770004,152052500,-0.003206,0.017949,4.000029,69.945772,160.061475,84.204852
2021-12-02,158.740005,164.199997,157.800003,163.759995,136739200,-0.006130,0.018591,4.064224,67.164616,160.733933,78.305630


In [50]:
df['signal'] = np.zeros(len(df))

In [51]:
for i,time in enumerate(df.index):
    
    before = df["Return"][i-5:i].std()
    after = df["Return"][i:i+5].std()
    
    if after >= before:
        df['signal'][i] = 1
    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['signal'][i] = 1


In [52]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Return,Vol-5d,MACD,RSI,EMA,MFI,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2002-01-23,0.333760,0.352745,0.330545,0.352438,443279200,0.054994,0.050050,0.001683,56.879586,0.337801,46.200427,0.0
2002-01-24,0.350754,0.359940,0.350601,0.355347,344002400,0.008254,0.035992,0.002980,57.971201,0.340991,43.558844,0.0
2002-01-25,0.350448,0.358562,0.346927,0.355960,185914400,0.001725,0.030221,0.004011,58.211280,0.343713,37.623011,0.0
2002-01-28,0.358256,0.360553,0.347845,0.356266,186446400,0.000860,0.004042,0.004798,58.339253,0.345995,43.231508,0.0
2002-01-29,0.355500,0.360399,0.349836,0.353204,240324000,-0.008596,0.005726,0.005115,56.475804,0.347306,45.017739,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-29,159.369995,161.190002,158.789993,160.240005,88748200,0.021874,0.037867,3.338364,64.995963,157.618498,75.589781,1.0
2021-11-30,159.990005,165.520004,159.919998,165.300003,174048100,0.031578,0.034067,3.757074,71.386106,159.015135,78.049987,1.0
2021-12-01,167.479996,170.300003,164.529999,164.770004,152052500,-0.003206,0.017949,4.000029,69.945772,160.061475,84.204852,0.0
2021-12-02,158.740005,164.199997,157.800003,163.759995,136739200,-0.006130,0.018591,4.064224,67.164616,160.733933,78.305630,0.0


In [54]:
df.to_csv("dataset.csv")