In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import yfinance as yf
import pylab
from datetime import datetime
from itertools import combinations
import warnings
warnings.filterwarnings("ignore")
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.fetcher import DataCollector

In [2]:
collector = DataCollector(start="2000-01-01")
data = collector.run_full_collection()


=== Starting Data Collection ===

Downloading asset prices...
Downloading 13 assets from 2000-01-01 to 2026-01-02


[*********************100%***********************]  13 of 13 completed


Downloaded data for 13 assets with 6539 records each.

Downloading macro indicators...
Downloaded 9 macroeconomic data with 6872 records.

Preprocessing data...


In [4]:
data["macro"].tail()

Unnamed: 0_level_0,GDP,UNRATE,CPIAUCSL,FEDFUNDS,DGS10,DCOILWTICO,INDPRO,UMCSENT,VIXCLS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2025-12-24,31095.089,4.3,323.364,4.09,4.15,58.72,101.616,53.6,13.47
2025-12-26,31095.089,4.3,323.364,4.09,4.14,56.6,101.616,53.6,13.6
2025-12-29,31095.089,4.3,323.364,4.09,4.12,57.89,101.616,53.6,14.2
2025-12-30,31095.089,4.3,323.364,4.09,4.14,57.89,101.616,53.6,14.33
2025-12-31,31095.089,4.3,323.364,4.09,4.14,57.89,101.616,53.6,14.33


In [5]:
data["macro"].isnull().sum()/len(data["macro"])

GDP           0.0
UNRATE        0.0
CPIAUCSL      0.0
FEDFUNDS      0.0
DGS10         0.0
DCOILWTICO    0.0
INDPRO        0.0
UMCSENT       0.0
VIXCLS        0.0
dtype: float64

In [6]:
df = data["macro"]
df["month-year"] = df.index.to_period("M")
df["qtr-year"] = df.index.to_period("Q")
df["bi-annual-year"] = df.index.to_period("2Q")
df["year"] = df.index.to_period("Y")

In [7]:
df.head()

Unnamed: 0_level_0,GDP,UNRATE,CPIAUCSL,FEDFUNDS,DGS10,DCOILWTICO,INDPRO,UMCSENT,VIXCLS,month-year,qtr-year,bi-annual-year,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-01-03,10660.465,4.1,170.0,5.73,6.58,25.56,91.8239,111.3,24.21,2000-01,2000Q1,2000Q1,2000
2000-01-04,10660.465,4.1,170.0,5.73,6.49,25.56,91.8239,111.3,27.01,2000-01,2000Q1,2000Q1,2000
2000-01-05,10660.465,4.1,170.0,5.73,6.62,24.65,91.8239,111.3,26.41,2000-01,2000Q1,2000Q1,2000
2000-01-06,10660.465,4.1,170.0,5.73,6.57,24.79,91.8239,111.3,25.73,2000-01,2000Q1,2000Q1,2000
2000-01-07,10660.465,4.1,170.0,5.73,6.52,24.79,91.8239,111.3,21.72,2000-01,2000Q1,2000Q1,2000


In [8]:
df_monthly = df.groupby(["month-year"]).mean()
df_qtrly = df.groupby(["qtr-year"]).mean()
df_biannually = df.groupby(["bi-annual-year"]).mean()
df_yearly = df.groupby(["year"]).mean()

In [9]:
df_monthly.nunique()/len(df_monthly)

GDP               0.227564
UNRATE            0.195513
CPIAUCSL          0.685897
FEDFUNDS          0.394231
DGS10             0.993590
DCOILWTICO        1.000000
INDPRO            0.708333
UMCSENT           0.525641
VIXCLS            0.993590
qtr-year          0.333333
bi-annual-year    0.333333
year              0.083333
dtype: float64

In [10]:
df_qtrly.nunique()/len(df_qtrly)

GDP               0.519231
UNRATE            0.942308
CPIAUCSL          1.000000
FEDFUNDS          0.903846
DGS10             1.000000
DCOILWTICO        1.000000
INDPRO            1.000000
UMCSENT           1.000000
VIXCLS            1.000000
month-year        1.000000
bi-annual-year    1.000000
year              0.250000
dtype: float64

In [11]:
df_biannually.nunique()/len(df_biannually)

GDP           0.519231
UNRATE        0.942308
CPIAUCSL      1.000000
FEDFUNDS      0.903846
DGS10         1.000000
DCOILWTICO    1.000000
INDPRO        1.000000
UMCSENT       1.000000
VIXCLS        1.000000
month-year    1.000000
qtr-year      1.000000
year          0.250000
dtype: float64

In [37]:
df_yearly.nunique()/len(df_yearly)

GDP               0.961538
UNRATE            1.000000
CPIAUCSL          1.000000
FEDFUNDS          1.000000
DGS10             1.000000
DCOILWTICO        1.000000
INDPRO            1.000000
UMCSENT           1.000000
VIXCLS            1.000000
month-year        1.000000
qtr-year          1.000000
bi-annual-year    1.000000
dtype: float64

In [38]:
df_yearly

Unnamed: 0_level_0,GDP,UNRATE,CPIAUCSL,FEDFUNDS,DGS10,DCOILWTICO,INDPRO,UMCSENT,VIXCLS,month-year,qtr-year,bi-annual-year
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000,10660.465,3.992063,172.199603,6.250595,6.029444,30.367579,92.538752,107.28373,23.315,2000-06,2000Q2,2000Q2
2001,10660.465,4.675403,176.9125,4.043347,5.019274,25.887097,89.901336,90.173387,25.749677,2001-06,2001Q2,2001Q2
2002,10902.125444,5.75873,179.814286,1.705873,4.606944,26.190198,89.968779,89.04127,27.29246,2002-06,2002Q2,2002Q2
2003,11433.101044,6.000397,183.482937,1.153095,4.016865,31.079881,91.089392,88.734921,21.982857,2003-06,2003Q2,2003Q2
2004,12183.15231,5.55754,188.710714,1.336706,4.270754,41.507976,93.482965,94.569841,15.480397,2004-06,2004Q2,2004Q2
2005,12937.799889,5.095238,195.288492,3.181429,4.290159,56.644048,96.616181,88.915476,12.807063,2005-06,2005Q2,2005Q2
2006,13142.642,4.623904,201.399203,4.923705,4.794622,66.051116,98.979297,86.685259,12.806534,2006-06,2006Q2,2006Q2
2007,13543.576757,4.550996,207.149028,5.044104,4.63243,72.386693,101.543151,84.821912,17.535936,2007-06,2007Q2,2007Q2
2008,14772.666854,5.726877,215.210798,2.05253,3.665771,99.671502,98.592055,63.449407,32.694862,2008-06,2008Q2,2008Q2
2009,14521.932544,9.061111,214.243246,0.154127,3.265595,61.950437,87.529336,67.089286,31.479008,2009-06,2009Q2,2009Q2


In [15]:
df_biannually.tail(50)

Unnamed: 0_level_0,GDP,UNRATE,CPIAUCSL,FEDFUNDS,DGS10,DCOILWTICO,INDPRO,UMCSENT,VIXCLS,month-year,qtr-year,year
bi-annual-year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013Q3,16953.838,7.234375,233.264875,0.083437,2.706562,105.830469,99.457366,83.13125,14.279687,2013-07,2013Q3,2013
2013Q4,17192.019,7.007812,233.945109,0.083594,2.743906,97.496719,100.231778,74.417187,14.232812,2013-10,2013Q4,2013
2014Q1,17192.019,6.9,234.1,0.08,2.76541,98.678197,100.3158,75.1,14.828852,2014-02,2014Q1,2014
2014Q2,17518.508,6.266667,236.768,0.09,2.620952,103.346825,102.253333,82.633333,12.738254,2014-05,2014Q2,2014
2014Q3,17804.228,6.134375,237.473062,0.09,2.499531,97.869687,102.835625,82.259375,13.072656,2014-07,2014Q3,2014
2014Q4,17912.079,5.665625,237.025063,0.100312,2.278125,73.21125,103.292275,89.203125,16.072344,2014-10,2014Q4,2014
2015Q1,17912.079,5.6,236.252,0.12,1.968852,48.485246,103.7044,93.6,16.564754,2015-02,2015Q1,2015
2015Q2,18279.784,5.428571,236.970413,0.123492,2.169365,57.854603,100.926563,94.319048,13.740159,2015-05,2015Q2,2015
2015Q3,18401.626,5.134375,237.858125,0.133281,2.223125,46.486094,101.069205,91.164062,19.307344,2015-07,2015Q3,2015
2015Q4,18435.137,5.0,237.742625,0.16125,2.191406,41.936563,99.846131,90.89375,17.033281,2015-11,2015Q4,2015


In [16]:
df_monthly.to_csv("data/processed/macro_monthly.csv")
df_qtrly.to_csv("data/processed/macro_quarterly.csv")

OSError: Cannot save file into a non-existent directory: 'data/processed'