In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path

In [3]:
import sys
sys.path.append("/dsmlp/home-fs04/19/019/riling/scalable_rl_portfolio_management")


In [4]:
from tics.tic_config import tics_176, tics_grouped
from agent.data_downloader import short_name_sha256

In [5]:
train_start_date= '2009-01-01'
train_end_date= '2020-07-01'
# test_start_date= '2020-07-01'
test_start_date = '2020-09-14'
test_end_date= '2021-10-01'

In [6]:
start = train_start_date
end = test_end_date

In [7]:
df_baseline = pd.read_csv(Path("acount_value") / "dow_jones_data.csv")[['close', 'date']].rename(columns={'close': 'account_value'})
df_manager = pd.read_csv(Path("acount_value") / "manager_2.csv")[['account_value', 'date']]
df_176 = pd.read_csv(Path("acount_value") / "176.csv")[['account_value', 'date']]

In [8]:
dfs = []
tics = []
for i in range(6):
    tics += tics_grouped[i]
    name = short_name_sha256('_'.join(tics_grouped[i]))
    vaulue_data_file = Path("acount_value") / f"{name}_{start}_{end}.csv"
    value_df = pd.read_csv(vaulue_data_file)[['account_value', 'date']].drop_duplicates()
    dfs.append(value_df)

In [9]:
def normalize_from(df: pd.DataFrame, start_date: str, end_date) -> pd.DataFrame:
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    start = pd.to_datetime(start_date)
    end = pd.to_datetime(end_date)
    
    df = df[df['date'] >= start]
    df = df[df['date'] <= end].sort_values('date')
    
    mask = df['date'] == start
    if not mask.any():
        raise ValueError(f"没有找到日期 {start_date} 的记录")
    V0 = df.loc[mask, 'account_value'].iloc[0]
    
    # 4. 归一化
    df['account_value'] = df['account_value'] / V0
    
    return df

In [13]:
df_baseline_normalized = normalize_from(df_baseline, "2009-03-17", test_end_date)
dfs_normalized = [normalize_from(df, "2009-03-17", test_end_date) for df in dfs]
# df_manager_normalized = normalize_from(df_manager, train_start_date, test_end_date)
# df_176_normalized = normalize_from(df_176, train_start_date, test_end_date)

In [None]:
import plotly.graph_objects as go
import plotly.express as px

def plot_account_value_comparison_plotly(
    models = None,
    model_labels = None,
    baseline = None,
    baseline_label = None,
    manager = None,
    manager_lable = None,
    x_col: str = 'date',
    y_col: str = 'account_value',
    title: str = "Pool Return Comparison"
) -> go.Figure:
    """
    用 Plotly 绘制多条模型的归一化账户价值（收益率）曲线及一条基准曲线，
    其中所有 y_col 已从 1 起归一化，代表收益率曲线。

    参数
    ----
    models : list[pd.DataFrame]
        要对比的模型列表，每个 DataFrame 至少包含 x_col 和 y_col 两列。
    model_labels : list[str]
        与 models 一一对应的图例标签。
    baseline : pd.DataFrame
        基准模型的 DataFrame，结构同上。
    baseline_label : str
        基准模型的图例标签。
    x_col : str, optional
        用作横轴的列名，默认为 'date'。
    y_col : str, optional
        用作纵轴的列名，默认为 'account_value'。
    title : str, optional
        图表标题，默认为 "Normalized Return Comparison"。
    """


    fig = go.Figure()

    if models is not None:
        color_sequence = px.colors.qualitative.Pastel

        for i, (df, label) in enumerate(zip(models, model_labels)):
            days = list(range(1, len(df) + 1))
            fig.add_trace(
                go.Scatter(
                    # x=df[x_col],
                    x=days,
                    y=df[y_col],
                    mode='lines',
                    name=label,
                    line=dict(color=color_sequence[i % len(color_sequence)], width=2),
                    opacity=0.6
                )
            )

    if manager is not None:
        days = list(range(1, len(manager) + 1))
        fig.add_trace(
            go.Scatter(
                # x=manager[x_col],
                x=days,
                y=manager[y_col],
                mode='lines',
                name=manager_lable,
                line=dict(color='red', width=4)
            )
        )

    if baseline is not None:
        days = list(range(1, len(baseline) + 1))
        fig.add_trace(
            go.Scatter(
                # x=baseline[x_col],
                x=days,
                y=baseline[y_col],
                mode='lines',
                name=baseline_label,
                line=dict(color='blue', width=4, dash='dash')
            )
        )

    fig.update_layout(
        width=1000,
        height=600,
        margin=dict(l=40, r=40, t=40, b=40),
        title=title,
        xaxis_title=x_col.capitalize(),
        yaxis_title="Total Return Rate",
        template="plotly_white",
        # legend=dict(title="Model")
    )

    return fig

In [15]:
fig1 = plot_account_value_comparison_plotly(
    models=dfs_normalized,
    model_labels=[f"Pool {i+1}" for i in range(len(dfs_normalized))],
    baseline=df_baseline_normalized,
    baseline_label="Dow Jones",
    manager=df_manager_normalized,
    manager_lable="Manager",
    title="Return Comparison"
)
fig1.show()

NameError: name 'df_manager_normalized' is not defined

In [22]:
fig2 = plot_account_value_comparison_plotly(
    models=dfs_normalized[2:2],
    model_labels=[f"Pool {i+1}" for i in range(len(dfs_normalized))][2:2],
    baseline=df_baseline_normalized,
    baseline_label="Dow Jones"
)
fig2.show()

In [56]:
fig3 = plot_account_value_comparison_plotly(
    baseline=df_baseline_normalized,
    baseline_label="Dow Jones",
    manager=df_manager_normalized,
    manager_lable="Manager",
    title="Manager Return Rate"
)
fig3.show()