In [1]:
''' Importing the necessary libraries'''
import pandas as pd
import yfinance as yf
from dotenv import load_dotenv
import os
import matplotlib.pyplot as plt

In [2]:
# Load environment variables from .env file
load_dotenv()

# Import environment variables
start_date = os.getenv('start_date')
end_date = os.getenv('end_date')
btc_etf_start_date = os.getenv('btc_etf_start_date')
btc_etf_end_date = os.getenv('btc_etf_end_date')


In [3]:
# # Fetch different commodities data

# COMEX gold prices in USD
gold_data = yf.download('GC=F',
                        start=start_date,
                        end=end_date)

# COMEX silver prices in USD
silver_data = yf.download('SI=F',
                        start=start_date,
                        end=end_date)

# CRUDE oil prices in USD
oil_data = yf.download('CL=F',
                        start=start_date,
                        end=end_date)

# Combine all commodities into a single dataframe
df_commodities = pd.concat([gold_data['Adj Close'],
                            gold_data['Volume'],
                            silver_data['Adj Close'],
                            silver_data['Volume'],
                            oil_data['Adj Close'],
                            oil_data['Volume']], axis=1)

df_commodities.columns = ['GOLD_ADJ_CLOSE', 'SILVER_ADJ_CLOSE', 'OIL_ADJ_CLOSE', 'GOLD_VOLUME', 'SILVER_VOLUME', 'OIL_VOLUME']

# df_commodities.to_parquet('df_commodities.parquet.gzip', compression='gzip')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
# Fetch historical values for currencies

# EUR/USD
eur_usd_data = yf.download('EURUSD=X',
                       start=start_date,
                       end=end_date)

# USD/JPY
usd_jpy_data = yf.download('JPY=X',
                       start=start_date,
                       end=end_date)

# GBP/USD
gbp_usd_data = yf.download('GBPUSD=X',
                       start=start_date,
                       end=end_date)

# EUR/USD
usd_cny_data = yf.download('CNY=X',
                       start=start_date,
                       end=end_date)

# # Combine all currencies into a single dataframe
# df_currencies = yf.download(
# tickers = ["EURUSD=X", "JPY=X", "GBPUSD=X", "CNY=X"],
# start = start_date,
# end = end_date
# )

# Combine all commodities into a single dataframe
df_currencies = pd.concat([eur_usd_data['Adj Close'],
                            usd_jpy_data['Adj Close'],
                            gbp_usd_data['Adj Close'],
                            usd_cny_data['Adj Close']], axis=1)

df_currencies.columns = ['EUR_USD_ADJ_CLOSE', 'USD_JPY_ADJ_CLOSE', 'GBP_USD_ADJ_CLOSE', 'USD_CNY_ADJ_CLOSE']

# df_currencies.to_parquet('df_currencies.parquet.gzip', compression='gzip')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [5]:
# Fetch financial market indices data

# CBOE VIX
vix_data = yf.download('^VIX',
                       start=start_date,
                       end=end_date)

# CBOE interest rate
cboe_interest_rate_data = yf.download('^TNX',
                       start=start_date,
                       end=end_date)


# 5 year treasury yield
treasury_yield_5yrs_data = yf.download('^FVX',
                       start=start_date,
                       end=end_date)


# 13 week treasury bill
treasury_bill_13wk_data = yf.download('^IRX',
                       start=start_date,
                       end=end_date)


# Russell 2000 index
russel_2000_data = yf.download('^RUT',
                       start=start_date,
                       end=end_date)


# iShares 20+ Year Treasury Bond ETF
ishares_20yr_data = yf.download('TLT',
                       start=start_date,
                       end=end_date)

# # Combine all Financial market indices into a single dataframe
# df_financial_ind = yf.download(
# tickers = ["^VIX", "^TNX", "^FVX", "^RUT", "TLT", "^IRX"],
# start = start_date,
# end = end_date
# )

# Combine all commodities into a single dataframe
df_financial_ind = pd.concat([vix_data['Adj Close'],
                            cboe_interest_rate_data['Adj Close'],
                            treasury_yield_5yrs_data['Adj Close'],
                            treasury_bill_13wk_data['Adj Close'],
                            russel_2000_data['Adj Close'],
                            russel_2000_data['Volume'],
                            ishares_20yr_data['Adj Close'],
                            ishares_20yr_data['Volume']], axis=1)

df_financial_ind.columns = ['VIX_ADJ_CLOSE', 'CBOE_INTEREST_RATE_ADJ_CLOSE', 'TREASURY_YIELD_5YRS_ADJ_CLOSE', 'TREASURY_BILL_13WK_ADJ_CLOSE', 'RUSSEL_2000_ADJ_CLOSE', 'RUSSEL_2000_VOLUME', 'ISHARES_20YR_ADJ_CLOSE', 'ISHARES_20YR_VOLUME']

# df_financial_ind.to_parquet('df_financial_ind.parquet.gzip', compression='gzip')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [6]:
# Fetch historical stock prices

# Fetch historical values for TESLA
tesla_data = yf.download('TSLA',
                       start=start_date,
                       end=end_date)

# Fetch historical values for AMD
amd_data = yf.download('AMD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for Intel
intel_data = yf.download('INTC',
                       start=start_date,
                       end=end_date)

# Fetch historical values for Apple
apple_data = yf.download('AAPL',
                       start=start_date,
                       end=end_date)

# Fetch historical values for NVDIA
nvidia_data = yf.download('NVDA',
                       start=start_date,
                       end=end_date)

# Fetch historical values for META
meta_data = yf.download('META',
                       start=start_date,
                       end=end_date)

# Fetch historical values for Google
google_data = yf.download('GOOG',
                       start=start_date,
                       end=end_date)

# # Combine all Stock market prices into a single dataframe
# df_stock = yf.download(
# tickers = ["TSLA", "AMD", "INTC", "AAPL", "NVDA", "META", "GOOG"],
# start = start_date,
# end = end_date
# )

# Combine all commodities into a single dataframe
df_stocks = pd.concat([ tesla_data['Adj Close'],
                        tesla_data['Volume'],
                        amd_data['Adj Close'],
                        amd_data['Volume'],
                        intel_data['Adj Close'],
                        intel_data['Volume'], 
                        apple_data['Adj Close'],
                        apple_data['Volume'],
                        nvidia_data['Adj Close'],
                        nvidia_data['Volume'],
                        meta_data['Adj Close'],
                        meta_data['Volume'],
                        google_data['Adj Close'],
                        google_data['Volume']], axis=1)

df_stocks.columns = ['TESLA_ADJ_CLOSE', 'TESLA_VOLUME', 'AMD_ADJ_CLOSE', 'AMD_VOLUME', 'INTEL_ADJ_CLOSE', 'INTEL_VOLUME', 'APPLE_ADJ_CLOSE', 'APPLE_VOLUME', 'NVIDIA_ADJ_CLOSE', 'NVIDIA_VOLUME', 'META_ADJ_CLOSE', 'META_VOLUME', 'GOOGLE_ADJ_CLOSE', 'GOOGLE_VOLUME']

# df_stocks.to_parquet('df_stocks.parquet.gzip', compression='gzip')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [7]:
# Fetch BTC ETFs data

# Fetch historical values for Grayscale Bitcoin Trust
gbtc_data = yf.download('GBTC',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for ARK 21Shares Bitcoin ETF
arkb_data = yf.download('ARKB',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for Bitwise Bitcoin ETF
bitb_data = yf.download('BITB',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for Fidelity Wise Origin Bitcoin Fund
fbtc_data = yf.download('FBTC',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for Invesco Galaxy Bitcoin ETF
btco_data = yf.download('BTCO',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for iShares Bitcoin Trust
ibit_data = yf.download('IBIT',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for VanEck Bitcoin Trust ETF
hodl_data = yf.download('HODL',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# Fetch historical values for ProShares Bitcoin Strategy ETF
bito_data = yf.download('BITO',
                       start=btc_etf_start_date,
                       end=btc_etf_end_date)

# # Combine all BTC ETFs into a single dataframe
# df_btc_etfs = yf.download(
# tickers = ["GBTC", "ARKB", "BITB", "FBTC", "BTCO", "IBIT", "HODL", "BITO"],
# start = btc_etf_start_date,
# end = btc_etf_end_date
# )

# Combine all commodities into a single dataframe
df_btc_etf = pd.concat([ gbtc_data['Adj Close'],
                        gbtc_data['Volume'],
                        arkb_data['Adj Close'],
                        arkb_data['Volume'],
                        bitb_data['Adj Close'],
                        bitb_data['Volume'],
                        fbtc_data['Adj Close'],
                        fbtc_data['Volume'],
                        btco_data['Adj Close'],
                        btco_data['Volume'],
                        ibit_data['Adj Close'],
                        ibit_data['Volume'],
                        hodl_data['Adj Close'],
                        hodl_data['Volume'],
                        bito_data['Adj Close'],
                        bito_data['Volume']], axis=1)

df_btc_etf.columns = ['GBTC_ADJ_CLOSE', 'GBTC_VOLUME', 'ARKB_ADJ_CLOSE', 'ARKB_VOLUME', 'BITB_ADJ_CLOSE', 'BITB_VOLUME', 'FBTC_ADJ_CLOSE', 'FBTC_VOLUME', 'BTCO_ADJ_CLOSE', 'BTCO_VOLUME', 'IBIT_ADJ_CLOSE', 'IBIT_VOLUME', 'HODL_ADJ_CLOSE', 'HODL_VOLUME', 'BITO_ADJ_CLOSE', 'BITO_VOLUME']

# df_btc_etf.to_parquet('df_btc_etf.parquet.gzip', compression='gzip')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [8]:
# Fetch historical crypto prices

# Fetch historical values for BTC-USD
btc_usd_data = yf.download('BTC-USD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for ETH-USD
eth_usd_data = yf.download('ETH-USD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for USDT-USD
usdt_usd_data = yf.download('USDT-USD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for USDC-USD
usdc_usd_data = yf.download('USDC-USD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for DOGE-USD
doge_usd_data = yf.download('DOGE-USD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for XRP-USD
xrp_usd_data = yf.download('XRP-USD',
                       start=start_date,
                       end=end_date)

# Fetch historical values for SOL-USD
sol_usd_data = yf.download('SOL-USD',
                       start=start_date,
                       end=end_date)

# # Combine all dataframes into a single dataframe
# df_crypto = yf.download(
# tickers = ["BTC-USD", "ETH-USD", "USDT-USD", "USDC-USD", "DOGE-USD", "XRP-USD", "SOL-USD"],
# start = start_date,
# end = end_date
# )

# Combine all commodities into a single dataframe
df_crypto_hist = pd.concat([btc_usd_data['Open'], 
                            btc_usd_data['High'], 
                            btc_usd_data['Low'], 
                            btc_usd_data['Close'], 
                            btc_usd_data['Volume'], 
                            btc_usd_data['Adj Close'],
                            eth_usd_data['Adj Close'],
                            eth_usd_data['Volume'],
                            usdt_usd_data['Adj Close'],
                            usdt_usd_data['Volume'],
                            usdc_usd_data['Adj Close'],
                            usdc_usd_data['Volume'],
                            doge_usd_data['Adj Close'],
                            doge_usd_data['Volume'],
                            xrp_usd_data['Adj Close'],
                            xrp_usd_data['Volume'],
                            sol_usd_data['Adj Close'],
                            sol_usd_data['Volume']], axis=1)

df_crypto_hist.columns = ['BTC_OPEN', 'BTC_HIGH', 'BTC_LOW', 'BTC_CLOSE', 'BTC_VOLUME', 'BTC_ADJ_CLOSE', 'ETH_ADJ_CLOSE', 'ETH_VOLUME', 'USDT_ADJ_CLOSE', 'USDT_VOLUME', 'USDC_ADJ_CLOSE', 'USDC_VOLUME', 'DOGE_ADJ_CLOSE', 'DOGE_VOLUME', 'XRP_ADJ_CLOSE', 'XRP_VOLUME', 'SOL_ADJ_CLOSE', 'SOL_VOLUME']

# df_crypto_hist.to_parquet('df_crypto_hist.parquet.gzip', compression='gzip')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [9]:
df_yfinance = pd.concat([df_commodities, df_currencies, df_financial_ind, df_stocks, df_btc_etf, df_crypto_hist], axis=1)

# df_yfinance.to_parquet('df_yfinance.parquet.gzip', compression='gzip')

In [14]:
import pandas as pd
import yfinance as yf
from dotenv import load_dotenv
import os

def load_environment_variables():
    """
    Load environment variables and return them as a dictionary.
    """
    load_dotenv()
    env_vars = {
        'start_date': os.getenv('start_date'),
        'end_date': os.getenv('end_date'),
        'btc_etf_start_date': os.getenv('btc_etf_start_date'),
        'btc_etf_end_date': os.getenv('btc_etf_end_date')
    }
    return env_vars

def download_data(tickers, start_date, end_date):
    """
    Download data for the given tickers within the specified date range.
    """
    data = {}
    for ticker in tickers:
        try:
            data[ticker] = yf.download(ticker, start=start_date, end=end_date)
        except Exception as e:
            print(f"Error downloading {ticker}: {e}")
    return data

def create_dataframe(data, columns):
    """
    Create a dataframe from the downloaded data and specify columns names.
    """
    df = pd.concat(data, axis=1)
    df.columns = columns
    return df

def main():
    env_vars = load_environment_variables()
    
    # Define tickers for each category
    commodities = ['GC=F', 'SI=F', 'CL=F']
    currencies = ['EURUSD=X', 'JPY=X', 'GBPUSD=X', 'CNY=X']
    financial_indices = ['^VIX', '^TNX', '^FVX', '^RUT', 'TLT', '^IRX']
    stocks = ['TSLA', 'AMD', 'INTC', 'AAPL', 'NVDA', 'META', 'GOOG']
    btc_etfs = ['GBTC', 'ARKB', 'BITB', 'FBTC', 'BTCO', 'IBIT', 'HODL', 'BITO']
    cryptos = ['BTC-USD', 'ETH-USD', 'USDT-USD', 'USDC-USD', 'DOGE-USD', 'XRP-USD', 'SOL-USD']

    # Download data
    commodities_data = download_data(commodities, env_vars['start_date'], env_vars['end_date'])
    currencies_data = download_data(currencies, env_vars['start_date'], env_vars['end_date'])
    financial_indices_data = download_data(financial_indices, env_vars['start_date'], env_vars['end_date'])
    stocks_data = download_data(stocks, env_vars['start_date'], env_vars['end_date'])
    btc_etfs_data = download_data(btc_etfs, env_vars['btc_etf_start_date'], env_vars['btc_etf_end_date'])
    cryptos_data = download_data(cryptos, env_vars['start_date'], env_vars['end_date'])

    # Create dataframes
    df_commodities = create_dataframe([commodities_data[ticker]['Adj Close'] for ticker in commodities] +
                                      [commodities_data[ticker]['Volume'] for ticker in commodities],
                                      ['GOLD_ADJ_CLOSE', 'SILVER_ADJ_CLOSE', 'OIL_ADJ_CLOSE',
                                       'GOLD_VOLUME', 'SILVER_VOLUME', 'OIL_VOLUME'])
    
    df_currencies = create_dataframe([currencies_data[ticker]['Adj Close'] for ticker in currencies],
                                     ['EUR_USD_ADJ_CLOSE', 'USD_JPY_ADJ_CLOSE', 'GBP_USD_ADJ_CLOSE', 'USD_CNY_ADJ_CLOSE'])

    df_financial_ind = create_dataframe([financial_indices_data[ticker]['Adj Close'] for ticker in financial_indices] +
                                        [financial_indices_data['^RUT']['Volume'], financial_indices_data['TLT']['Volume']],
                                        ['VIX_ADJ_CLOSE', 'CBOE_INTEREST_RATE_ADJ_CLOSE', 'TREASURY_YIELD_5YRS_ADJ_CLOSE',
                                         'RUSSEL_2000_ADJ_CLOSE', 'ISHARES_20YR_ADJ_CLOSE', 'TREASURY_BILL_13WK_ADJ_CLOSE',
                                         'RUSSEL_2000_VOLUME', 'ISHARES_20YR_VOLUME'])

    df_stocks = create_dataframe([stocks_data[ticker]['Adj Close'] for ticker in stocks] +
                                 [stocks_data[ticker]['Volume'] for ticker in stocks],
                                 ['TESLA_ADJ_CLOSE', 'AMD_ADJ_CLOSE', 'INTEL_ADJ_CLOSE', 'APPLE_ADJ_CLOSE', 'NVIDIA_ADJ_CLOSE', 'META_ADJ_CLOSE', 'GOOGLE_ADJ_CLOSE',
                                  'TESLA_VOLUME', 'AMD_VOLUME', 'INTEL_VOLUME', 'APPLE_VOLUME', 'NVIDIA_VOLUME', 'META_VOLUME', 'GOOGLE_VOLUME'])

    df_btc_etf = create_dataframe([btc_etfs_data[ticker]['Adj Close'] for ticker in btc_etfs] +
                                  [btc_etfs_data[ticker]['Volume'] for ticker in btc_etfs],
                                  ['GBTC_ADJ_CLOSE', 'ARKB_ADJ_CLOSE', 'BITB_ADJ_CLOSE', 'FBTC_ADJ_CLOSE', 'BTCO_ADJ_CLOSE', 'IBIT_ADJ_CLOSE', 'HODL_ADJ_CLOSE', 'BITO_ADJ_CLOSE',
                                   'GBTC_VOLUME', 'ARKB_VOLUME', 'BITB_VOLUME', 'FBTC_VOLUME', 'BTCO_VOLUME', 'IBIT_VOLUME', 'HODL_VOLUME', 'BITO_VOLUME'])

    # Start by extracting BTC data with specific columns
    btc_data = cryptos_data['BTC-USD'][['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
    btc_data.columns = ['BTC_OPEN', 'BTC_HIGH', 'BTC_LOW', 'BTC_CLOSE', 'BTC_ADJ_CLOSE', 'BTC_VOLUME']

    # Now process other cryptocurrencies
    other_crypto_data = []
    for ticker in cryptos[1:]:  # Assuming 'cryptos' list starts with 'BTC-USD'
        ticker_data = cryptos_data[ticker][['Adj Close', 'Volume']]
        ticker_data.columns = [f'{ticker[:-4]}_ADJ_CLOSE', f'{ticker[:-4]}_VOLUME']
        other_crypto_data.append(ticker_data)

    # Concatenate BTC data with other cryptocurrencies data
    df_crypto_hist = pd.concat([btc_data] + other_crypto_data, axis=1)

    df_crypto_hist.columns = ['BTC_OPEN', 'BTC_HIGH', 'BTC_LOW', 'BTC_CLOSE', 'BTC_ADJ_CLOSE', 'BTC_VOLUME',
                              'ETH_ADJ_CLOSE', 'ETH_VOLUME', 'USDT_ADJ_CLOSE', 'USDT_VOLUME',
                              'USDC_ADJ_CLOSE', 'USDC_VOLUME', 'DOGE_ADJ_CLOSE', 'DOGE_VOLUME',
                              'XRP_ADJ_CLOSE', 'XRP_VOLUME', 'SOL_ADJ_CLOSE', 'SOL_VOLUME']

    # Concatenate all dataframes into a single dataframe
    df_yfinance = pd.concat([df_commodities, df_currencies, df_financial_ind, df_stocks, df_btc_etf, df_crypto_hist], axis=1)
    
    df_yfinance.to_parquet('df_yfinance.parquet.gzip', compression='gzip')


if __name__ == "__main__":
    main()

[*********************100%%**********************]  1 of 1 completed


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%******