In [1]:
import os
import json
import pandas as pd
import numpy as np
import portfolio_utils as pfu
import time

portfolio_dir = "../data/portfolios/raw"

Since loading portfolio data for all runs at the same time might run into memory constraints, we iteratively compute statistics/performance metrics one run/portfolio at a time.

In [2]:
# get dirnames to iterate over
run_names = [r for r in os.listdir(portfolio_dir) if os.path.isdir(os.path.join(portfolio_dir, r))]
#print(run_names)
# get channel ids
channel_ids = pd.read_csv(f"{portfolio_dir}/channel_ids.csv")["channel_ids"].values
#print(channel_ids)

# load benchmarks
benchmark_returns = pd.read_csv("../data/asset_data/returns/benchmarks_returns.csv", sep=";").set_index("date")
spy_returns = benchmark_returns["SPY"]
tbill_returns = benchmark_returns["3m_tbills"]

In [3]:
pf_stats_list = []
for run_name in run_names:
    start_time = time.time()
    print(f"Processing run: {run_name}")
    # load settings
    settings = json.load(open(os.path.join(portfolio_dir, run_name, "settings.json")))
    
    for channel_id in channel_ids:
        # load portfolio data
        try:
            pos_df_bt = pd.read_csv(f"{portfolio_dir}/{run_name}/{channel_id}_pos_bt.csv", sep=";", index_col=0)
            pos_df_at = pd.read_csv(f"{portfolio_dir}/{run_name}/{channel_id}_pos_at.csv", sep=";", index_col=0)
            trade_logs_df = pd.read_csv(f"{portfolio_dir}/{run_name}/{channel_id}_trade_logs.csv", sep=";")
        except: # skip if no trades (trade_logs_df file has no column names if empty...)
            continue
        # get portfolio object
        p = pfu.Portfolio(channel_id=channel_id, 
                            pos_df_bt=pos_df_bt, 
                            pos_df_at=pos_df_at, 
                            trade_logs_df=trade_logs_df, 
                            compute_settings=settings)
        if not p.has_trades: # skip if no trades
            continue 
        # get portfolio stats 
        stats = {   
            "channel_id": channel_id, 
            "run_name": run_name, 
            ### run settings
            "portfolio_type": settings["portfolio_type"],
            "max_positions": settings["max_positions"],
            "max_holding_period": settings["max_holding_period"],
            "neutral_asset": settings["neutral_asset"],
            "min_days_wait_after_upload": settings["min_days_wait_after_upload"],
            ### portfolio stats 
            # general
            "n_days_total_period": p.n_days_total_period, # trading days
            "n_days_active_holdings": p.n_days_active_holdings, # trading days
            "n_days_active_period": p.n_days_active_period, # trading days
            "n_buys": p.n_buys,
            "n_sells": p.n_sells,
            "n_unique_positions": p.n_unique_positions,
            "n_buys_stocks": p.n_buys_stocks,
            "n_buys_cryptos": p.n_buys_cryptos,
            "n_buys_etfs": p.n_buys_etfs,
            "n_buys_commodities": p.n_buys_commodities,
                }
        # add performance stats
        for period in ["full", "active_period", "active_days"]:
            performance_stats = {
                f"total_return_{period}": p.get_total_return(period=period),
                f"total_excess_return_{period}": p.get_total_excess_return(bm_returns=spy_returns, period=period),
                f"sharpe_{period}": p.get_sharpe_ratio(bm_returns=tbill_returns, period=period),
                f"sortino_{period}": p.get_sortino_ratio(bm_returns=tbill_returns, daily_target_return=0, period=period),
                f"value_at_risk_{period}": p.get_value_at_risk(alpha=0.05, period=period),
                f"max_drawdown_{period}": p.get_max_drawdown(period=period),
                f"beta_{period}": p.get_beta(bm_returns=spy_returns, period=period)
            }
            stats.update(performance_stats)
        # add to list
        pf_stats_list.append(stats)
    print(f"Run finished in: {time.time()-start_time:.2f} seconds")
# create df and save to csv
pf_stats_df = pd.DataFrame(pf_stats_list)
pf_stats_df.to_csv("../data/portfolios/portfolio_stats.csv", index=False, sep=";")


Processing run: equal_weight_3m_tbills_hp126_wait1_pos5
Run finished in: 6.69 seconds
Processing run: equal_weight_3m_tbills_hp126_wait1_pos99999
Run finished in: 7.14 seconds
Processing run: equal_weight_3m_tbills_hp21_wait1_pos5
Run finished in: 6.40 seconds
Processing run: equal_weight_3m_tbills_hp21_wait1_pos99999
Run finished in: 6.67 seconds
Processing run: equal_weight_3m_tbills_hp252_wait1_pos5
Run finished in: 6.66 seconds
Processing run: equal_weight_3m_tbills_hp252_wait1_pos99999
Run finished in: 7.40 seconds
Processing run: equal_weight_3m_tbills_hp99999_wait1_pos5
Run finished in: 6.88 seconds
Processing run: equal_weight_3m_tbills_hp99999_wait1_pos99999
Run finished in: 8.62 seconds
Processing run: equal_weight_cash_hp126_wait1_pos5
Run finished in: 6.46 seconds
Processing run: equal_weight_cash_hp126_wait1_pos99999
Run finished in: 6.95 seconds
Processing run: equal_weight_cash_hp21_wait1_pos5
Run finished in: 7.04 seconds
Processing run: equal_weight_cash_hp21_wait1_pos

In [4]:
print(run_name, channel_id)

equal_weight_cash_hp21_wait1_pos5 @belangp


In [4]:
# load portfolio stats
pf_stats_df = pd.read_csv("../data/portfolios/portfolio_stats.csv", sep=";")

# check info
print(pf_stats_df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5064 entries, 0 to 5063
Data columns (total 38 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   channel_id                         5064 non-null   object 
 1   run_name                           5064 non-null   object 
 2   portfolio_type                     5064 non-null   object 
 3   max_positions                      5064 non-null   int64  
 4   max_holding_period                 5064 non-null   int64  
 5   neutral_asset                      5064 non-null   object 
 6   min_days_wait_after_upload         5064 non-null   int64  
 7   n_days_total_period                5064 non-null   int64  
 8   n_days_active_holdings             5064 non-null   int64  
 9   n_days_active_period               5064 non-null   int64  
 10  n_buys                             5064 non-null   int64  
 11  n_sells                            5064 non-null   int64