In [59]:
import yfinance as yf
import numpy as np

# Symbols
gold_symbol = "GC=F" 
treasury_symbol = "^TNX"
currency_symbol = "EURUSD=X"

# Define the date range
start_date = "2022-11-21"
end_date = "2023-11-08"

# Fetch the historical data for gold
gold_data = yf.download(gold_symbol, start=start_date, end=end_date, interval='1d')
treasury_data = yf.download(treasury_symbol, start=start_date, end=end_date, interval='1d')
currency_data = yf.download(currency_symbol, start=start_date, end=end_date, interval='1d')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [60]:
import pandas as pd

file_path = 'Data/VIX.csv'  # Replace with your file path

# Read the CSV file
VIX_data = pd.read_csv(file_path)

In [61]:
def create_intervals(date):
    return pd.date_range(start=f'{date} 09:30', end=f'{date} 16:00', freq='5T')

In [62]:
def expand_df(df):
    df = df.reset_index()
    df['Date'] = pd.to_datetime(df['Date'])
    # Apply this function to each date in your DataFrame
    all_intervals = [create_intervals(date) for date in df['Date'].unique()]
    flattened_intervals = [time for sublist in all_intervals for time in sublist]
    # Create a new DataFrame for these intervals
    interval_df = pd.DataFrame({'DateTime': flattened_intervals})

    # Merge with the original DataFrame
    df['Date'] = df['Date'].dt.date
    interval_df['Date'] = interval_df['DateTime'].dt.date
    merged_df = pd.merge(interval_df, df, on='Date', how='left')
    return merged_df

In [63]:
VIX = expand_df(VIX_data)
gold = expand_df(gold_data)
treasury = expand_df(treasury_data)
currency = expand_df(currency_data)

In [64]:
VIX_data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-11-21,24.049999,24.120001,22.299999,22.360001,22.360001,0.0
1,2022-11-22,22.590000,22.709999,21.280001,21.290001,21.290001,0.0
2,2022-11-23,21.490000,21.780001,20.320000,20.350000,20.350000,0.0
3,2022-11-24,,,,,,
4,2022-11-25,20.610001,21.110001,20.459999,20.500000,20.500000,0.0
...,...,...,...,...,...,...,...
247,2023-11-01,18.020000,18.420000,16.629999,16.870001,16.870001,0.0
248,2023-11-02,16.590000,16.620001,15.580000,15.660000,15.660000,0.0
249,2023-11-03,15.700000,15.830000,14.910000,14.910000,14.910000,0.0
250,2023-11-06,15.390000,15.580000,14.840000,14.890000,14.890000,0.0


In [65]:
#1. Log Change
VIX['VIX_Log_Change'] = np.log(VIX['Close'] / VIX['Close'].shift(1))

#2. High-Low Spread
VIX['VIX_High_Low_Spread'] = VIX['High'] - VIX['Low']
VIX.to_csv('Filtered_Data/VIX_filtered.csv')
VIX

Unnamed: 0,DateTime,Date,index,Open,High,Low,Close,Adj Close,Volume,VIX_Log_Change,VIX_High_Low_Spread
0,2022-11-21 09:30:00,2022-11-21,0,24.049999,24.120001,22.299999,22.360001,22.360001,0.0,,1.820002
1,2022-11-21 09:35:00,2022-11-21,0,24.049999,24.120001,22.299999,22.360001,22.360001,0.0,0.0,1.820002
2,2022-11-21 09:40:00,2022-11-21,0,24.049999,24.120001,22.299999,22.360001,22.360001,0.0,0.0,1.820002
3,2022-11-21 09:45:00,2022-11-21,0,24.049999,24.120001,22.299999,22.360001,22.360001,0.0,0.0,1.820002
4,2022-11-21 09:50:00,2022-11-21,0,24.049999,24.120001,22.299999,22.360001,22.360001,0.0,0.0,1.820002
...,...,...,...,...,...,...,...,...,...,...,...
19903,2023-11-07 15:40:00,2023-11-07,251,15.100000,15.170000,14.710000,14.810000,14.810000,0.0,0.0,0.460000
19904,2023-11-07 15:45:00,2023-11-07,251,15.100000,15.170000,14.710000,14.810000,14.810000,0.0,0.0,0.460000
19905,2023-11-07 15:50:00,2023-11-07,251,15.100000,15.170000,14.710000,14.810000,14.810000,0.0,0.0,0.460000
19906,2023-11-07 15:55:00,2023-11-07,251,15.100000,15.170000,14.710000,14.810000,14.810000,0.0,0.0,0.460000


In [66]:
#1. Log Change
gold['GOLD_Log_Change'] = np.log(gold['Close'] / gold['Close'].shift(1))

#2. High-Low Spread
gold['GOLD_High_Low_Spread'] = gold['High'] - gold['Low']
gold.to_csv('Filtered_data/GOLD_filtered.csv')
gold

Unnamed: 0,DateTime,Date,Open,High,Low,Close,Adj Close,Volume,GOLD_Log_Change,GOLD_High_Low_Spread
0,2022-11-21 09:30:00,2022-11-21,1739.0,1744.900024,1736.099976,1737.400024,1737.400024,544,,8.800049
1,2022-11-21 09:35:00,2022-11-21,1739.0,1744.900024,1736.099976,1737.400024,1737.400024,544,0.0,8.800049
2,2022-11-21 09:40:00,2022-11-21,1739.0,1744.900024,1736.099976,1737.400024,1737.400024,544,0.0,8.800049
3,2022-11-21 09:45:00,2022-11-21,1739.0,1744.900024,1736.099976,1737.400024,1737.400024,544,0.0,8.800049
4,2022-11-21 09:50:00,2022-11-21,1739.0,1744.900024,1736.099976,1737.400024,1737.400024,544,0.0,8.800049
...,...,...,...,...,...,...,...,...,...,...
19113,2023-11-07 15:40:00,2023-11-07,1966.5,1966.800049,1960.699951,1966.800049,1966.800049,20,0.0,6.100098
19114,2023-11-07 15:45:00,2023-11-07,1966.5,1966.800049,1960.699951,1966.800049,1966.800049,20,0.0,6.100098
19115,2023-11-07 15:50:00,2023-11-07,1966.5,1966.800049,1960.699951,1966.800049,1966.800049,20,0.0,6.100098
19116,2023-11-07 15:55:00,2023-11-07,1966.5,1966.800049,1960.699951,1966.800049,1966.800049,20,0.0,6.100098


In [67]:
#1. Log Change
treasury['TREASURY_Log_Change'] = np.log(treasury['Close'] / treasury['Close'].shift(1))

#2. High-Low Spread
treasury['TREASURY_High_Low_Spread'] = treasury['High'] - treasury['Low']
treasury.to_csv('Filtered_data/TREASURY_filtered.csv')

In [68]:
#1. Log Change
currency['CURRENCY_Log_Change'] = np.log(currency['Close'] / currency['Close'].shift(1))

#2. High-Low Spread
currency['CURRENCY_High_Low_Spread'] = currency['High'] - currency['Low']
currency.to_csv('Filtered_data/CURRENCY_filtered.csv')