In [20]:
import os
import pandas as pd

In [21]:
def read_excel_files(folder_path, skip_folders):
    '''
    This function reads all excel files in a given folder and returns a list of dataframes.
    '''
    all_excel_files = []

    for root, dirs, files in os.walk(folder_path):

        dirs[:] = [d for d in dirs if d not in skip_folders]

        for file in files:    
            if file.endswith(".xlsx"):
                full_path = os.path.join(root, file)
                all_excel_files.append(full_path)
    return all_excel_files



folder_path = "../raw"
# Define subfolders to skip
skip_folders = ["cash", "risk_free", "benchmarks", "retired"]

all_excel_files = read_excel_files(folder_path, skip_folders)

print(f"Successfully read {len(all_excel_files)} excel files")
print(all_excel_files)

Successfully read 25 excel files
['../raw/bonds/em_bond_etf.xlsx', '../raw/bonds/jp_ult_sht_cor_bond_etf.xlsx', '../raw/bonds/ish_wld_cor_bond_etf.xlsx', '../raw/bonds/ubs_bbs_tips_bonds_etf.xlsx', '../raw/bonds/ish_glob_bond_etf.xlsx', '../raw/alternatives/gold_etf.xlsx', '../raw/alternatives/vici_reits_aim_etf.xlsx', '../raw/alternatives/aim_data_centr_etf.xlsx', '../raw/equities/min_vol/world_min_vol_etf.xlsx', '../raw/equities/min_vol/euro_min_vol_etf.xlsx', '../raw/equities/min_vol/snp_min_vol_etf.xlsx', '../raw/equities/min_vol/em_min_vol_etf.xlsx', '../raw/equities/us/spdr_world_tech.xlsx', '../raw/equities/us/amu_rai_us_etf.xlsx', '../raw/equities/em/artemis_em_etf.xlsx', '../raw/equities/other/bnp_aqua_etf.xlsx', '../raw/equities/other/gam_sus_em_etf.xlsx', '../raw/equities/other/bgf_sus_etf.xlsx', '../raw/equities/other/pic_env_etf.xlsx', '../raw/equities/uk_large_cap/uklarge_ishares_etf.xlsx', '../raw/equities/uk_large_cap/uklarge_inv_etf.xlsx', '../raw/equities/uk_large_cap

In [25]:
# Start with empty dataframe and fill from the loop
combined_df = pd.DataFrame()

for i in all_excel_files:
    
    # Read excel file
    df = pd.read_excel(i, index_col='Date', parse_dates=True)
    # Get ticker value
    ticker_value = df['Ticker'].dropna().iloc[0].strip()
    # Rename column
    df = df.rename(columns={'Last Price': ticker_value})
    # Clean dataframe, this code only keeps the last price column (now ticker)
    df_clean = df[[ticker_value]]
    # Get three years back and change order of columns
    three_years_back = pd.Timestamp.now() - pd.DateOffset(years=3)
    df1 = df_clean[df_clean.index >= three_years_back]
    df1 = df1.sort_index()

    combined_df = pd.concat([combined_df, df1], axis=1)
    

In [26]:
df_combined_nonan = combined_df.dropna()
print(f"Rows before: {len(combined_df)}")
print(f"Rows after: {len(df_combined_nonan)}")
print(f"Number of dates dropped: {len(combined_df) - len(df_combined_nonan)}")

# Save the dataframe to an excel file
df_combined_nonan.to_excel("etf_combined.xlsx")


Rows before: 777
Rows after: 672
Number of dates dropped: 105


In [27]:
def create_ticker_to_file_mapping(all_excel_files):
    ticker_map = {}
    
    for file_path in all_excel_files:
        try:
            df = pd.read_excel(file_path, index_col='Date', parse_dates=True)
            ticker_value = df['Ticker'].dropna().iloc[0]
            ticker_map[ticker_value] = file_path
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            continue
    
    return ticker_map

ticker_to_file_map = create_ticker_to_file_mapping(all_excel_files)

def find_ticker_file(ticker):
    file_path = ticker_to_file_map.get(ticker)
    if file_path:
        print(f"Found {ticker}: {file_path}")
        return file_path
    else:
        print(f"Ticker {ticker} not found")
        return None
    

ticker_of_interest = find_ticker_file("BNAMEXI LN")

Ticker BNAMEXI LN not found
