In [1]:
#import libraries
import warnings
import yfinance as yf
import os
from datetime import datetime, timedelta
import json
import pandas as pd

warnings.filterwarnings('ignore')

## Data Loading and Preparation

In this analysis we choose tho use the SP500 sectors


In [2]:
universe_name = "selection2"
interval = '1d'  # daily frequency
to_date = datetime.now()  # today pd.to_datetime("2015-12-31") #
from_date = pd.to_datetime("2012-01-01") # to_date - timedelta(days=13*365)  # past ten years pd.to_datetime("2005-12-31") #

universe_symbols = ['SPY','GLD','SLV','DBA','DBB','CPER','CANE','TLH','ISTB','EMB','ILF','INDA','MCHI','IEV','EWJ','EWA','USO','BWX','CBON','VGSH']
universe_asset_class = ['Equity','Commodities','Commodities','Commodities','Commodities','Commodities','Commodities','Bonds','Bonds','Bonds','Equity','Equity','Equity','Equity','Equity','Equity','Commodities','Bonds','Bonds','Bonds']


In [3]:
universe_name = "selection3"
interval = '1d'  # daily frequency
to_date = datetime.now()  # today pd.to_datetime("2015-12-31") #
from_date = pd.to_datetime("2012-10-24") # to_date - timedelta(days=13*365)  # past ten years pd.to_datetime("2005-12-31") #

selection_df = pd.read_csv('selection3.csv',index_col=0,delimiter=';',header=0)
universe_symbols = selection_df.index.tolist()
universe_asset_class = selection_df['asset_class'].tolist()
try:
    universe_tracker = selection_df['Tracker'].tolist()
except Exception as e:
    print(f"Error occurred while reading 'Tracker' column: {e}")
    universe_tracker = ['']*len(universe_symbols)

In [4]:
#download universe from yahoo
print(f"Number of {universe_name} symbols loaded: {len(universe_symbols)}")
start_date = from_date.strftime('%Y-%m-%d')
end_date = to_date.strftime('%Y-%m-%d')

print(f"Downloading data from {start_date} to {end_date}...")

universe_data = yf.download(
    tickers=universe_symbols,
    start=start_date,
    end=end_date,
    interval=interval,
    group_by='ticker',
    auto_adjust=False,
    threads=True,
    progress=False
)
universe_data= pd.DataFrame(universe_data)
# Map Yahoo Finance column names to project conventions for multi-level columns
if isinstance(universe_data.columns, pd.MultiIndex):
    universe_data = universe_data.rename(columns={
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close': 'close',
        'Adj Close': 'adjusted',
        'Volume': 'volume'
    }, level=1)
else:
    universe_data = universe_data.rename(columns={
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close': 'close',
        'Adj Close': 'adjusted',
        'Volume': 'volume'
    })
#universe_data = universe_data.xs('Adj Close', axis=1, level=1)[universe_symbols].dropna()

if isinstance(universe_data.columns, pd.MultiIndex):
    asset_names = universe_data.columns.get_level_values(0).unique()
    print(f"Downloaded data for {len(asset_names)} assets")
else:
    print(f"Downloaded data for {len(universe_data.columns)} assets")

universe_info = {}
for symbol in universe_symbols:
    try:
        ticker = yf.Ticker(symbol)
        universe_info[symbol] = ticker.info
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
universe_info = pd.DataFrame(universe_info).T

universe_folder_path = os.path.join(os.getcwd(),'data', universe_name)

if not os.path.exists(universe_folder_path):
    os.makedirs(universe_folder_path)

try:
    universe_data.to_csv(os.path.join(universe_folder_path, 'universe_data.csv'))
    universe_info.to_csv(os.path.join(universe_folder_path, 'universe_info.csv'))
    universe_settings = {
        'universe_name': universe_name,
        'from_date': start_date,
        'to_date': end_date,
        'interval': interval,
        'universe_symbols': universe_symbols,
        'universe_asset_class': universe_asset_class
    }
    with open(os.path.join(universe_folder_path, 'universe_settings.json'), 'w') as f:
        json.dump(universe_settings, f, indent=4)
    print(f"Universe data and info saved in folder: {universe_folder_path}")
except Exception as e:
    print(f"Error saving universe data: {e}")

Number of selection3 symbols loaded: 53
Downloading data from 2012-10-24 to 2025-11-03...
Downloaded data for 53 assets
Universe data and info saved in folder: c:\my-git\DataScience-novaIMS\APPM-individual\data\selection3


In [5]:
#download and save info data
close_data = universe_data.xs('adjusted', axis=1, level=1) if isinstance(universe_data.columns, pd.MultiIndex) else universe_data['adjusted']
info_data = universe_info[['shortName','longName', 'fundFamily', 'fundInceptionDate']]
summary_df = pd.DataFrame(columns=['Symbol', 'Name', 'Earliest', 'Latest','fundFamily', 'fundInceptionDate'])
for col in close_data.columns:
    col_data = close_data[col].dropna()
    summary_df = pd.concat([summary_df, pd.DataFrame({
        'Symbol': [col],
        'Name': [info_data.loc[col, 'shortName']],
        'Earliest': [col_data.index.min().date()],
        'Latest': [col_data.index.max().date()],
        'fundFamily': [info_data.loc[col, 'fundFamily']],
        'fundInceptionDate': [info_data.loc[col, 'fundInceptionDate']]
    })], ignore_index=True)

summary_df['fundInceptionDate'] = pd.to_datetime(summary_df['fundInceptionDate'], unit='s').dt.date
display(summary_df.sort_values(by='Earliest',ascending=False))
summary_df.to_csv(os.path.join(universe_folder_path, 'universe_summary.csv'), index=False, sep=';')

Unnamed: 0,Symbol,Name,Earliest,Latest,fundFamily,fundInceptionDate
0,EWT,iShares Inc iShares MSCI Taiwan,2012-10-24,2025-10-31,iShares,2000-06-20
27,AIA,iShares Asia 50 ETF,2012-10-24,2025-10-31,iShares,2007-11-13
29,EWG,iShares MSCI Germany Index Fund,2012-10-24,2025-10-31,iShares,1996-03-12
30,CORN,Teucrium Corn Fund ETV,2012-10-24,2025-10-31,Teucrium,2010-06-09
31,EWQ,iShares MSCI France Index Fund,2012-10-24,2025-10-31,iShares,1996-03-12
32,UCO,ProShares Ultra Bloomberg Crude,2012-10-24,2025-10-31,ProShares,2008-11-24
33,BIL,State Street SPDR Bloomberg 1-3,2012-10-24,2025-10-31,SPDR State Street Investment Management,2007-05-25
34,USO,United States Oil Fund,2012-10-24,2025-10-31,USCF Investments,2006-04-10
35,SOYB,Teucrium Soybean Fund ETV,2012-10-24,2025-10-31,Teucrium,2011-09-16
36,EWU,iShares MSCI,2012-10-24,2025-10-31,iShares,1996-03-12
