In [1]:
# Package import
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import os
import plotly.express as px
import plotly.graph_objects as go
import yfinance as yf
import plotly.io as pio


### Train, Val, Test splits 

Train set will be from 2003-01-01 to 2017-12-31

Validation set will be from 2018-01-01 to 2019-12-31

Test set will be from 2020-01-01 to 2022-12-31

In [2]:
pio.templates.default = "plotly_dark"
default_template = pio.templates[pio.templates.default]
default_template.layout.update(height=800, width=900)


Layout({
    'annotationdefaults': {'arrowcolor': '#f2f5fa', 'arrowhead': 0, 'arrowwidth': 1},
    'autotypenumbers': 'strict',
    'coloraxis': {'colorbar': {'outlinewidth': 0, 'ticks': ''}},
    'colorscale': {'diverging': [[0, '#8e0152'], [0.1, '#c51b7d'], [0.2,
                                 '#de77ae'], [0.3, '#f1b6da'], [0.4, '#fde0ef'],
                                 [0.5, '#f7f7f7'], [0.6, '#e6f5d0'], [0.7,
                                 '#b8e186'], [0.8, '#7fbc41'], [0.9, '#4d9221'],
                                 [1, '#276419']],
                   'sequential': [[0.0, '#0d0887'], [0.1111111111111111,
                                  '#46039f'], [0.2222222222222222, '#7201a8'],
                                  [0.3333333333333333, '#9c179e'],
                                  [0.4444444444444444, '#bd3786'],
                                  [0.5555555555555556, '#d8576b'],
                                  [0.6666666666666666, '#ed7953'],
                           

In [3]:
def get_VIX_data(start : datetime, end : datetime, interval : str):
    """

    """
    df = yf.Ticker("^VIX").history(interval = interval ,start = start, end = end)
    if len(df) == 0:
        raise ValueError("No data found, please check the start and end date")
    
    df.reset_index(drop= False, inplace = True)
    df.drop(columns = ['Dividends', 'Stock Splits', 'Volume'], inplace = True)
    if "Datetime" in df.columns:
        df['Datetime'] = df['Datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')
        df.rename(columns = {'Datetime' : 'Date'}, inplace = True)
    elif "index" in df.columns:
        df['index'] = df['index'].dt.strftime('%Y-%m-%d %H:%M:%S')
        df.rename(columns = {'index' : 'Date'}, inplace = True)
    else: 
        df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')

    # df.to_excel(f"Data/VIX_OHLC.xlsx", index = False)
    return df


end = datetime(2023, 9, 1)
start = datetime(2003, 1, 1)

df = get_VIX_data(start, end, interval = "1D")
df

Unnamed: 0,Date,Open,High,Low,Close
0,2002-12-31,29.080000,29.620001,28.600000,28.620001
1,2003-01-02,28.740000,28.740000,25.320000,25.389999
2,2003-01-03,25.549999,25.549999,24.680000,24.680000
3,2003-01-06,25.320000,25.420000,24.290001,24.910000
4,2003-01-07,25.129999,25.690001,24.910000,25.129999
...,...,...,...,...,...
5198,2023-08-25,17.209999,17.360001,15.450000,15.680000
5199,2023-08-28,16.240000,16.280001,15.000000,15.080000
5200,2023-08-29,15.080000,15.300000,14.340000,14.450000
5201,2023-08-30,14.530000,14.700000,13.830000,13.880000


In [4]:
end = datetime(2023, 8, 31)
start = datetime(2023, 1, 1)

df = get_VIX_data(start, end, interval = "1h")
df

Unnamed: 0,Date,Open,High,Low,Close
0,2023-01-03 02:00:00,23.090000,23.110001,22.99,23.000000
1,2023-01-03 03:00:00,22.990000,23.010000,22.74,22.790001
2,2023-01-03 04:00:00,22.790001,22.950001,22.77,22.920000
3,2023-01-03 05:00:00,22.930000,22.930000,22.83,22.910000
4,2023-01-03 06:00:00,22.910000,23.280001,22.90,23.190001
...,...,...,...,...,...
2268,2023-08-30 11:00:00,14.190000,14.220000,14.03,14.080000
2269,2023-08-30 12:00:00,14.050000,14.280000,14.03,14.210000
2270,2023-08-30 13:00:00,14.130000,14.340000,14.05,14.060000
2271,2023-08-30 14:00:00,14.320000,14.320000,13.83,13.830000


In [5]:
end = datetime(2020, 8, 31)
start = datetime(2020, 1, 1)

try:
    df = get_VIX_data(start, end, interval = "1h")
    df
except ValueError as e:
    print(e)

- ^VIX: 1h data not available for startTime=1577833200 and endTime=1598824800. The requested range must be within the last 730 days.
No data found, please check the start and end date


### Largest possible dataset for VIX using Yahoo Finance with 1h candle data

In [6]:
start = datetime(2022, 2, 1)
end = datetime(2023, 9, 1)

df = get_VIX_data(start, end, interval = "1h")
df

Unnamed: 0,Date,Open,High,Low,Close
0,2022-02-01 02:00:00,24.570000,24.680000,24.530001,24.570000
1,2022-02-01 03:00:00,24.570000,25.070000,24.510000,25.070000
2,2022-02-01 04:00:00,25.100000,25.330000,24.840000,24.959999
3,2022-02-01 05:00:00,24.969999,25.070000,24.650000,24.879999
4,2022-02-01 06:00:00,24.900000,25.110001,24.850000,24.900000
...,...,...,...,...,...
5342,2023-08-31 11:00:00,13.650000,13.970000,13.590000,13.860000
5343,2023-08-31 12:00:00,13.680000,13.970000,13.570000,13.570000
5344,2023-08-31 13:00:00,13.650000,13.690000,13.460000,13.680000
5345,2023-08-31 14:00:00,13.530000,13.690000,13.440000,13.570000


### Data for now consists of 1D data from Yahoo finance

In [7]:

end = datetime(2023, 9, 1)
start = datetime(2002, 6, 1)

df = get_VIX_data(start, end, interval = "1D")
df.to_csv(f"Data/Original_Data/VIX_OHLC_2003_2022.csv", index = False)
df

Unnamed: 0,Date,Open,High,Low,Close
0,2002-05-31,20.260000,20.260000,19.430000,19.980000
1,2002-06-03,20.980000,23.370001,20.620001,23.370001
2,2002-06-04,23.600000,24.820000,23.309999,23.889999
3,2002-06-05,23.520000,23.770000,22.459999,22.610001
4,2002-06-06,22.969999,24.639999,22.940001,24.160000
...,...,...,...,...,...
5346,2023-08-25,17.209999,17.360001,15.450000,15.680000
5347,2023-08-28,16.240000,16.280001,15.000000,15.080000
5348,2023-08-29,15.080000,15.300000,14.340000,14.450000
5349,2023-08-30,14.530000,14.700000,13.830000,13.880000


In [8]:
import pandas as pd

df_viz = df[df["Date"] > "2012-01-01"]
df_viz = df_viz[df_viz["Date"] < "2015-01-01"]


fig = go.Figure(data=[go.Candlestick(
                x=df_viz['Date'],
                open=df_viz['Open'],
                high=df_viz['High'],
                low=df_viz['Low'],
                close=df_viz['Close']
                ,name = "VIX OHLC")])


fig.update_layout(
    title='VIX Price',
    yaxis_title='VIX Price',
    xaxis_title='Date',
    font=dict(
        size=18,
    )
)

fig.write_html(f"Visualizations/Screenshots/VIX_sample_2.html")


fig.show()

In [9]:
import pandas as pd

df_viz = df[df["Date"] > "2022-01-01"]
df_viz = df_viz[df_viz["Date"] < "2022-04-01"]

fig = go.Figure(data=[go.Candlestick(
                x=df_viz['Date'],
                open=df_viz['Open'],
                high=df_viz['High'],
                low=df_viz['Low'],
                close=df_viz['Close']
                ,name = "VIX OHLC")])

fig.update_layout(
    title='VIX Price',
    yaxis_title='VIX Price',
    xaxis_title='Date',
    font=dict(
        size=18,
    )
)

fig.write_html(f"Visualizations/Screenshots/VIX_sample_1.html")


fig.show()