In [None]:
# Performance Metrics (Monthly)
Using the classification method for the returns, with ESS features in AI/ML

import warnings
warnings.filterwarnings('ignore')

%run import_library.ipynb

file = open('outputs/finalised_df/df_cn_month.pickle', "rb")
df = pickle.load(file)

for_testing_df = df.copy()
sedol_list = list(df.sedol.unique())
df

# check the list of starting date
sedol_dates = {}

for s in sedol_list:
    temp_df = df[df['sedol'] == s]
    date = list(temp_df.index)[0]
    sedol_dates[s] = date

sedol_dates

### Various Assumptions Involved:
1. At the end of the trading day, the team will make a decision for the next day given the collated datasets for the day
2. Predicting the next day's price, and comparing the result against the actual closing price.

# model parameters

sentiment = False
finalised_model = xgb_monthly_combined
frequency = "M"
test_date = "2019-01-01" # sync this test date with script 04, where the train test split date is being stated

### Optimize Quantile
Purpose: To optimize quantile for all individual firms based on best excess returns. Note: not a general quantile for financial sector, it is customise individually for each firms.

# run through sedol, companies specific. Note: Long run time (roughly 45 mins)
%time

quantile_dict = {}
predict_dict = {} # prediction results for companies

for i in sedol_list:    
    
    temp_df = df[(df['sedol'] == i)] # fitler and concat them
    temp_df.sort_index(axis=0, ascending=True, inplace=True)
    temp_df['returns_movement'] = temp_df.apply(lambda x: returns_movement(x), axis=1) # y true for the next week data

    # train test split - get the date
    train_df = temp_df[temp_df.index < test_date]
    test_df = temp_df[temp_df.index >= test_date]

    if train_df.empty: # if the df date is after test date
        best_performance_values = [0.9, 0.15, 0.5, 2.0]
        quantile_dict[i] = [best_performance_values, "nil"]
        
    else:
        # using train data to tune for optimal quantile
        best_performance_values, best_excess_return = optimize_quantile(train_df, i, finalised_model, sentiment, frequency, 0.90)
        quantile_dict[i] = [best_performance_values, best_excess_return]

    # best performance values based on excess returns tuning
    quantile = best_performance_values[0]
    ess_threshold = best_performance_values[1]
    returns_threshold = best_performance_values[2]
    news_spikes_threshold = best_performance_values[3]
    
    # using test data
    curr_df = prediction_processing(test_df, finalised_model, sentiment, frequency, quantile, ess_threshold, returns_threshold, news_spikes_threshold)
    predict_dict[i] = curr_df # for test dataset

### Classification Performance Metrics
To understand the timing of the price movement is conclusive from the sentiment movements

quantile_dict

counter = 0
for s, df in predict_dict.items(): # s for sedol, df for performance dataframe
    
    # creating the performance metrics for all the firms
    df = performance(df, s, frequency)
    
    if counter == 0: # initiate 1st df
        performance_df =  pd.DataFrame(df,index=['Annualised Return (Buy Hold)','Annualised Return (Sentiment)','Annualised Excess Return', 'Winrate', "Annualised Turnover", 'Volatility (Buy Hold)', 'Volatility (Sentiment)', 'Max Drawdown (Buy Hold)', 'Max Drawdown (Sentiment)',"Cumulative Return (Buy Hold)", "Cumulative Return (Sentiment)", 'Sharpe Ratio', 'F1 Score', "Precision"], columns=[s])
    
    else:
        performance_df[s] = df
    
    counter = counter + 1

performance_df = perf_format(performance_df)
performance_df

### Summary of Individual Firm Performance Metrics
Combine all the 30 holdings (listed in HK Exchange)

# absoulte & cumulative returns chart
import plotly.graph_objects as go
import numpy as np

ldf_abs_returns_fig = []
ldf_cum_returns_fig = []

for s, df in predict_dict.items(): # s for sedol, df for performance dataframe
    
    entity_name = df['entity_name'][0]
    
    # cumulative returns
    fig_cum_returns = go.Figure()

    # Add traces
    fig_cum_returns.add_trace(go.Scatter(x=df.index, y=df['return_mp_cum'], name=f"{entity_name} Sentiment Strategy"))
    fig_cum_returns.add_trace(go.Scatter(x=df.index, y=df['return_bm_cum'], name=f"{entity_name} Buy Hold Strategy"))
    fig_cum_returns.update_layout(
        title=f"{entity_name} Cumulative Returns",
        xaxis_title="Time Period",
        yaxis_title="Cumulative Returns"
    )
    
    # absolute returns
    fig_abs_returns = go.Figure()

    # Add traces
    fig_abs_returns.add_trace(go.Scatter(x=df.index, y=df['return_mp'], name=f"{entity_name} Sentiment Strategy"))
    fig_abs_returns.add_trace(go.Scatter(x=df.index, y=df['return_bm'], name=f"{entity_name} Buy Hold Strategy"))
    fig_abs_returns.update_layout(
        title=f"{entity_name} Absolute Returns",
        xaxis_title="Time Period",
        yaxis_title="Absolute Returns"
    )
    ldf_cum_returns_fig.append(fig_cum_returns)
    ldf_abs_returns_fig.append(fig_abs_returns)

%matplotlib inline
import matplotlib.pyplot as plt
import ipywidgets as widgets

out1 = widgets.Output()
out2 = widgets.Output()
out3 = widgets.Output()

tab = widgets.Tab(children = [out1, out2, out3])
tab.set_title(0, 'Overview')
tab.set_title(1, 'Individual Firms (Cumulative Returns)')
tab.set_title(2, 'Individual Firms (Absolute Returns)')
display(tab)

with out1:
    final_performance_df = performance_df.copy().style.set_caption("Overview of Individual Firms Performance")
    display(final_performance_df)

with out2:
    for fig in ldf_cum_returns_fig:
        display(fig)
    
with out3:
    for fig in ldf_abs_returns_fig:
        display(fig)

### Pre-Assign Train Dataset For Script 06 Optimization

predict_train_dict = {}
count = 0
for k, v in quantile_dict.items():
    count = count + 1
    best_performance_values = v[0]
    temp_df = for_testing_df[(for_testing_df['sedol'] == k)] # filter
    temp_df.sort_index(axis=0, ascending=True, inplace=True)
    temp_df['returns_movement'] = temp_df.apply(lambda x: returns_movement(x), axis=1) # y true for the next week data

    # best performance values based on excess returns tuning
    quantile = best_performance_values[0]
    ess_threshold = best_performance_values[1]
    returns_threshold = best_performance_values[2]
    news_spikes_threshold = best_performance_values[3]
    
    # using test data
    curr_df = prediction_processing(temp_df, finalised_model, sentiment, frequency, quantile, ess_threshold, returns_threshold, news_spikes_threshold)
    predict_train_dict[k] = curr_df # for test dataset
