## Import Required Libraries

We'll import all necessary Python libraries for financial data analysis, portfolio optimization, and visualization.

In [1]:
import warnings
import os
from datetime import datetime, timedelta
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyoff
from plotly.subplots import make_subplots
from IPython.display import display, Markdown

from Backtester.BacktestResults import TestResults

warnings.filterwarnings('ignore')

## Load backtest data

In [2]:
universe_name = "selection3";test_name = "GEM_LP"

test_path = f"data/{universe_name}/{test_name}"
test_results = TestResults(test_path)
strategies_list = test_results.list_strategies()
test_images_path = os.path.join(test_path, "images",test_name)
os.makedirs(test_images_path, exist_ok=True)

md = f"""
### Backtest Settings

- Test path: `{test_path}`
- Number of strategies: **{len(strategies_list)}**

```json
{json.dumps(test_results.test_settings, indent=2)}
```
### Strategies
```json
{json.dumps(strategies_list, indent=2)}
```
"""
display(Markdown(md))


### Backtest Settings

- Test path: `data/selection3/GEM_LP`
- Number of strategies: **15**

```json
{
  "universe_name": "selection3",
  "backtest_duration": 504,
  "lookback_periods": 127,
  "num_datasets": 100,
  "random_seed": 12,
  "num_assets": null,
  "test_name": "GEM_LP",
  "test_folder_path": "c:\\my-git\\DataScience-novaIMS\\APPM-individual\\data\\selection3\\GEM_LP"
}
```
### Strategies
```json
[
  "AAA1_noComission",
  "AAA1",
  "AAA2_noComission",
  "AAA2",
  "AAA3",
  "AAA4",
  "AAA_M1",
  "EqualWeight",
  "GEM1",
  "GEM2",
  "GEM3",
  "GEM4",
  "GEM5_noComission",
  "GEM5",
  "GEM_M1"
]
```


In [34]:
analyze_strategy = "GEM2"
dataset_name="dataset_51"
#analyze_by = "VWR"
#top_and_bottom = 5

In [35]:
strategy_images_path = os.path.join(test_images_path, analyze_strategy)
os.makedirs(strategy_images_path, exist_ok=True)

strategy_returns = test_results.strategies[analyze_strategy].get_datasets_returns(dataset_names=[dataset_name])
strategy_cum_returns = (1 + strategy_returns).cumprod()

datasets_info = test_results.datasets
dataset_data = test_results.get_datasets_data(datasets_filter_list=[dataset_name],column="adjusted")
portfolio_data = test_results.strategies[analyze_strategy].datasets[dataset_name].get_asset_values('portfolio')
portfolio_value_history = portfolio_data['portfolio_value']
dataset_adjusted = pd.DataFrame(dataset_data.get(dataset_name)).pct_change().cumsum() + 1
start_date = datasets_info[dataset_name]['start_date']
end_date = datasets_info[dataset_name]['end_date']

datasets_weights = test_results.strategies[analyze_strategy].get_datasets_weights()
wdf = datasets_weights.get(dataset_name)
asset_value_history = pd.DataFrame(test_results.strategies[analyze_strategy].datasets[dataset_name].get_asset_values('values'))
asset_value_history['Cash'] = portfolio_data['cash']

# Create subplots
fig = make_subplots(
    rows=3, cols=1,
    specs=[[{"type": "scatter"}], [{"type": "bar"}], [{"type": "bar"}]],
    vertical_spacing=0.08,
    row_heights=[0.5, 0.25, 0.25]
)

colors = px.colors.qualitative.Plotly

# Subplot 1: Cumulative Returns
x_dates = dataset_adjusted.index
sorted_cols = sorted(dataset_adjusted.columns)
ncols_total = len(sorted_cols)
color_cycle_main = list(np.tile(colors, int(np.ceil(ncols_total / len(colors)))))[:ncols_total]

for i, col in enumerate(sorted_cols):
    fig.add_trace(go.Scatter(
        x=x_dates,
        y=dataset_adjusted[col],
        mode='lines',
        name=col,
        line=dict(width=1, color=color_cycle_main[i]),
        hovertemplate=f"{col}<br>Date: %{{x}}<br>Cumulative Return: %{{y:.4f}}<extra></extra>",
        legendgroup=col,
        showlegend=True
    ), row=1, col=1)

#fig.add_trace(go.Scatter(
#    x=portfolio_value_history.index,
#    y=portfolio_value_history.values / portfolio_value_history.values[0],
#    mode='lines',
#    name='Portfolio Value',
#    line=dict(color='red', width=2),
#    hovertemplate="Portfolio Value<br>Date: %{x}<br>Normalized Value: %{y:.4f}<extra></extra>",
#    legendgroup='Portfolio Value',
#    showlegend=True
#), row=1, col=1)
fig.add_trace(go.Scatter(
    x=x_dates,
    y=strategy_cum_returns[dataset_name],
    mode='lines',
    name='Total',
    line=dict(color='black', width=2),
    hovertemplate="Total<br>Date: %{x}<br>Cumulative Return: %{y:.4f}<extra></extra>"
))
# Subplot 2: Weights
if wdf is not None and not wdf.empty:
    sorted_weight_cols = sorted(wdf.columns)
    
    for col_name in sorted_weight_cols:
        # Find matching color from sorted_cols
        if col_name in sorted_cols:
            col_idx = sorted_cols.index(col_name)
            bar_color = color_cycle_main[col_idx]
        else:
            bar_color = 'gray'
        
        fig.add_trace(go.Bar(
            x=wdf.index,
            y=wdf[col_name],
            name=col_name,
            marker_color=bar_color,
            hovertemplate=f"{col_name}<br>Date: %{{x}}<br>Weight: %{{y:.6f}}<extra></extra>",
            legendgroup=col_name,
            showlegend=False
        ), row=2, col=1)

# Subplot 3: Asset Values
sorted_asset_cols = sorted(asset_value_history.columns)

for col_name in sorted_asset_cols:
    # Find matching color from sorted_cols or use special color for Cash
    if col_name == 'Cash':
        bar_color = 'lightgray'
        legend_group = 'Cash'
    elif col_name in sorted_cols:
        col_idx = sorted_cols.index(col_name)
        bar_color = color_cycle_main[col_idx]
        legend_group = col_name
    else:
        bar_color = 'gray'
        legend_group = col_name
    
    fig.add_trace(go.Bar(
        x=asset_value_history.index,
        y=asset_value_history[col_name],
        name=col_name,
        marker_color=bar_color,
        hovertemplate=f"{col_name}<br>Date: %{{x}}<br>Value: %{{y:.2f}}<extra></extra>",
        legendgroup=legend_group,
        showlegend=False
    ), row=3, col=1)

fig.update_xaxes(title_text='Date', row=1, col=1)
fig.update_xaxes(title_text='Date', row=2, col=1)
fig.update_xaxes(title_text='Date', row=3, col=1)
fig.update_yaxes(title_text='Cumulative Return', row=1, col=1)
fig.update_yaxes(title_text='Weight', range=[0, 1], row=2, col=1)
fig.update_yaxes(title_text='Asset Value ($)', row=3, col=1)

fig.update_layout(
    barmode='stack',
    height=1300,
    showlegend=True,
    legend=dict(
        orientation="v", 
        yanchor="top", 
        y=1, 
        xanchor="left", 
        x=1.02,
        itemsizing='constant',
        tracegroupgap=0
    )
)

pyoff.iplot(fig)


In [36]:
# Calculate Drawdown

running_max = strategy_cum_returns[dataset_name].cummax()
drawdown = (strategy_cum_returns[dataset_name] - running_max) / running_max
drawdown.index = pd.to_datetime(x_dates)
# Create drawdown chart
fig_dd = go.Figure()

# Add drawdown area
fig_dd.add_trace(go.Scatter(
    x=drawdown.index,
    y=drawdown * 100,  # Convert to percentage
    fill='tozeroy',
    name='Drawdown',
    line=dict(color='red', width=1),
    fillcolor='rgba(255, 0, 0, 0.3)',
    hovertemplate='Date: %{x}<br>Drawdown: %{y:.2f}%<extra></extra>'
))

# Add zero line
fig_dd.add_hline(y=0, line_dash="dash", line_color="gray", line_width=1)

# Calculate key drawdown statistics
max_drawdown = drawdown.min() * 100
max_dd_date = drawdown.idxmin()
current_drawdown = drawdown.iloc[-1] * 100

# Add annotations for max drawdown
fig_dd.add_annotation(
    x=max_dd_date,
    y=max_drawdown,
    text=f'Max DD: {max_drawdown:.2f}%',
    showarrow=True,
    arrowhead=2,
    arrowcolor='red',
    ax=0,
    ay=-40
)

fig_dd.update_layout(
    title=f'Drawdown Chart - {analyze_strategy} - {dataset_name}',
    xaxis_title='Date',
    yaxis_title='Drawdown (%)',
    height=500,
    showlegend=True,
    hovermode='x unified'
)

# Display statistics
display(Markdown(f"""
### Drawdown Statistics for {dataset_name}
- **Maximum Drawdown:** {max_drawdown:.2f}%
- **Max Drawdown Date:** {max_dd_date.strftime('%Y-%m-%d')}
- **Current Drawdown:** {current_drawdown:.2f}%
- **Average Drawdown:** {(drawdown[drawdown < 0].mean() * 100):.2f}%
"""))

pyoff.iplot(fig_dd)


### Drawdown Statistics for dataset_51
- **Maximum Drawdown:** -17.22%
- **Max Drawdown Date:** 2022-01-24
- **Current Drawdown:** -4.84%
- **Average Drawdown:** -5.99%


In [40]:
# print the basic dataset stats: final value, volatility, VWR

# Get the basic stats
dataset_perf = test_results.strategies[analyze_strategy].datasets[dataset_name].get_performance()
bt_perf = test_results.strategies[analyze_strategy].datasets[dataset_name].get_bt_performance()
final_value = test_results.strategies[analyze_strategy].datasets[dataset_name].final_value
volatility = dataset_perf.loc[dataset_name, 'annual_volatility'] if not dataset_perf.empty else np.nan
vwr = bt_perf.loc[dataset_name, 'VWR'] if not bt_perf.empty and 'VWR' in bt_perf.columns else np.nan

display(Markdown(f"### Basic Dataset Stats for {dataset_name} on strategy {analyze_strategy}"))
display(Markdown(f"- **Final Value:** ${final_value:,.2f}"))
display(Markdown(f"- **Annual Volatility:** {volatility:.4f}"))
display(Markdown(f"- **VWR (Variability-Weighted Return):** {vwr:.4f}"))


order_history = test_results.strategies[analyze_strategy].datasets[dataset_name].get_orders()
total_comm=order_history['commission'].sum()
avg_pnl=order_history['pnl'].mean()
display(Markdown(f"**Total Commissions for {dataset_name}:** ${total_comm:,.2f}"))
display(Markdown(f"**Average PnL for {dataset_name}:** ${avg_pnl:,.2f}"))
#display total number of orders and average size
total_orders = order_history.shape[0]
sell_orders = order_history[order_history['order_type'] == 'SELL']
buy_orders = order_history[order_history['order_type'] == 'BUY']


# calculate and display profit loss ratio
# Filter winning and losing trades (based on sell orders which realize P&L)
winning_trades = sell_orders[sell_orders['pnl'] > 0]
losing_trades = sell_orders[sell_orders['pnl'] < 0]

if len(winning_trades) > 0 and len(losing_trades) > 0:
    avg_win = winning_trades['pnl'].mean()
    avg_loss = abs(losing_trades['pnl'].mean())
    profit_loss_ratio = avg_win / avg_loss
    
    win_rate = len(winning_trades) / len(sell_orders) * 100
    loss_rate = len(losing_trades) / len(sell_orders) * 100
    
    display(Markdown(f"### Profit/Loss Ratio Analysis for {dataset_name}"))
    display(Markdown(f"- **Profit/Loss Ratio:** {profit_loss_ratio:.2f}:1"))
    display(Markdown(f"- **Average Winning Trade:** ${avg_win:,.2f}"))
    display(Markdown(f"- **Average Losing Trade:** ${avg_loss:,.2f}"))
    display(Markdown(f"- **Win Rate:** {win_rate:.1f}% ({len(winning_trades)} trades)"))
    display(Markdown(f"- **Loss Rate:** {loss_rate:.1f}% ({len(losing_trades)} trades)"))
    
    # Calculate APPT (Average Profitability Per Trade)
    appt = (win_rate/100 * avg_win) - (loss_rate/100 * avg_loss)
    display(Markdown(f"- **APPT (Average Profitability Per Trade):** ${appt:,.2f}"))
else:
    display(Markdown(f"**Note:** Insufficient data to calculate Profit/Loss Ratio"))

buy_orders = order_history[order_history['order_type'] == 'BUY']
sell_orders = order_history[order_history['order_type'] == 'SELL']
avg_order_size = buy_orders['executed_size'].mean()
display(Markdown(f"**Total Orders for {dataset_name}:** {total_orders}"))
display(Markdown(f"**Average Order Size for {dataset_name}:** {avg_order_size:,.2f}"))
# from sell_orders display the average pnl per asset
avg_pnl_sell = sell_orders.groupby('asset')['pnl'].sum()
avg_pnl_sell.sort_values(ascending=False, inplace=True)
display(Markdown(f"**Average PnL per Asset for {dataset_name}:**"))
display(avg_pnl_sell)

#plot an histogram of the order PnL
fig_pnl = px.histogram(order_history, x='pnl', nbins=50, title=f'Order PnL Distribution for {dataset_name}',
                       labels={'pnl': 'Profit and Loss ($)'})
fig_pnl.update_layout(height=500)
pyoff.iplot(fig_pnl)

#plot an histogram of the order PnL
fig_pnl = px.histogram(buy_orders, x='executed_size', nbins=50, title=f'Order Executed Size Distribution for {dataset_name}',
                       labels={'executed_size': 'Executed Size'})
fig_pnl.update_layout(height=500)
pyoff.iplot(fig_pnl)

### Basic Dataset Stats for dataset_51 on strategy GEM2

- **Final Value:** $124,420.59

- **Annual Volatility:** 0.1841

- **VWR (Variability-Weighted Return):** 5.8733

**Total Commissions for dataset_51:** $8,136.96

**Average PnL for dataset_51:** $41.50

### Profit/Loss Ratio Analysis for dataset_51

- **Profit/Loss Ratio:** 0.73:1

- **Average Winning Trade:** $341.15

- **Average Losing Trade:** $466.09

- **Win Rate:** 37.8% (230 trades)

- **Loss Rate:** 18.2% (111 trades)

- **APPT (Average Profitability Per Trade):** $43.89

**Total Orders for dataset_51:** 911

**Average Order Size for dataset_51:** 423.15

**Average PnL per Asset for dataset_51:**

asset
UGA     8307.007526
USO     7274.209370
DBE     4457.140941
DBB     3942.717239
CPER    3917.899218
SOYB    3351.594271
CORN    2961.266383
EWD     2671.221333
EWI     1737.719013
DBA     1422.709923
EWQ     1400.120609
EZU     1046.410954
EWJ      802.881554
TUR      703.360046
EWT      671.428955
EEM      536.630619
SPY      486.569946
CANE     455.129066
INDA     399.139633
BWX      337.960342
EWZ      327.963566
FEZ      277.849751
EWG      251.159500
EWA      173.409700
EMB       13.499451
URTH       6.309578
IEV      -71.130272
VGK      -76.380974
AIA     -127.373910
EWW     -132.941398
EPOL    -188.369625
ECH     -235.790782
IEF     -302.499138
EWY     -304.630569
GOVI    -530.679333
EWN     -557.813087
EWL     -584.889751
TLH     -622.521774
GLD     -660.040329
ILF     -668.827295
QQQ     -731.070053
WEAT    -795.924682
UNG     -958.657164
EWU     -960.715883
MCHI   -1093.981354
TLT    -1179.976242
GREK   -1280.865463
SLV    -1625.439575
ARGT   -1867.832993
UCO    -5646.6

In [None]:
#display(pd.DataFrame(portfolio_value_history))
#display(pd.DataFrame(order_history).sort_values('pnl', ascending=True).head(10))
#display(pd.DataFrame(order_history).sort_values('pnl', ascending=False).head(10))
uco_orders = order_history[(order_history['asset'] == 'UCO')] #  & (order_history['order_type'] == 'SELL')
display(pd.DataFrame(uco_orders).sort_values('executed_date', ascending=True))

Unnamed: 0_level_0,asset,order_type,status,created_date,created_price,created_size,executed_date,executed_price,executed_size,executed_value,commission,pnl
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
383,UCO,SELL,Completed,NoneType,17.7425,-1183.0,2021-06-08,17.615,-1183.0,20377.175451,20.838545,461.369278
447,UCO,SELL,Completed,NoneType,18.413375,-948.0,2021-07-19,17.844999,-948.0,19426.890289,16.917059,-2509.83094
612,UCO,SELL,Completed,NoneType,23.236999,-796.0,2021-11-03,23.236999,-796.0,19724.609825,18.496651,-1227.958517
620,UCO,SELL,Completed,NoneType,23.666875,-820.0,2021-11-10,23.666875,-820.0,19923.9505,19.406838,-517.112703
631,UCO,SELL,Completed,NoneType,22.393875,-785.0,2021-11-17,22.393875,-785.0,18645.712919,17.579192,-1066.520874
781,UCO,SELL,Completed,NoneType,31.43,-11.0,2022-02-15,29.445,-11.0,320.924992,0.323895,2.970005
796,UCO,SELL,Completed,NoneType,29.820499,-574.0,2022-02-25,29.820499,-574.0,16769.429573,17.116967,347.537094
809,UCO,SELL,Completed,NoneType,48.455002,-74.0,2022-03-09,43.0075,-74.0,2853.625,3.182555,328.929977
817,UCO,SELL,Completed,NoneType,33.400002,-436.0,2022-03-16,35.200001,-436.0,16813.25,15.3472,-1466.049667
378,UCO,SELL,Canceled,NoneType,16.855375,-1183.0,,,,,,
