In [1]:
# Syntax error being tracked here: https://github.com/microsoft/vscode-jupyter/issues/6635
# If it's annoying, workaround here: https://stackoverflow.com/a/35597119/768439
%load_ext autoreload
%autoreload 2

# TODO: Use this to profile: https://stackoverflow.com/questions/45893768/how-do-i-find-out-what-parts-of-my-code-are-inefficient-in-python
%load_ext line_profiler

In [2]:
import sys
sys.path.append('../')

from src.serialization_lib import *
from src.data_types import *
from IPython.display import HTML, display, Markdown
from typing import List

import pdfkit as pdf

import os
import pandas as pd
import statistics
import datetime
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_colwidth', 100)
pd.options.display.float_format = "{:,.2f}".format

In [3]:
INITIAL_PORTFOLIO_VALUE = 10000

BASE_METRIC = EvaluationMetric.EV_EBIT
TEST_METRIC = EvaluationMetric.P_B
STOCKS_UNIVERSE = StockUniverse.LARGE
PORTFOLIO_WEIGHT_STRATEGY = StockBasketWeightApproach.EQUAL_WEIGHTING

INITIAL_PORTFOLIO_VALUE = 10000
PORTFOLIO_SIZE = 30
REBALANCE_DAYS = 90

DATA_BASE_PATH = '/Volumes/SDCard/TipBackTest/'
DATA_PROCESSED_BASE_PATH = os.path.join(DATA_BASE_PATH, 'processed_data')
DATA_RESULT_BASE_PATH = os.path.join(DATA_BASE_PATH, 'results')

DEVELOPMENT = False
env = 'dev' if DEVELOPMENT else 'prod'

In [4]:
df_res_filename = get_feather_filename('df_res', BASE_METRIC, TEST_METRIC, REBALANCE_DAYS, PORTFOLIO_SIZE, STOCKS_UNIVERSE, env)
df_res_filename = os.path.join(DATA_PROCESSED_BASE_PATH, df_res_filename)
df_res = read_df_from_feather(df_res_filename)

# TODO: Need to fix the source code that generates these dataframes
if 'index' in df_res:
    df_res.rename(columns={'index': 'date'}, inplace=True)
    df_res.set_index('date', inplace=True)

df_debug_filename = get_feather_filename('df_debug', BASE_METRIC, TEST_METRIC, REBALANCE_DAYS, PORTFOLIO_SIZE, STOCKS_UNIVERSE, env)
df_debug_filename = os.path.join(DATA_PROCESSED_BASE_PATH, df_debug_filename)
df_debug = read_df_from_feather(df_debug_filename)

# TODO: Need to fix the source code that generates these dataframes
if 'index' in df_debug:
    df_debug.rename(columns={'index': 'date'}, inplace=True)
    df_debug.set_index('date', inplace=True)

In [5]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/style.html
def color_per_cell(v):
    per = float(v if str(v)[-1] != '%' else str(v)[:-1])
    if per < -25:
        return "background-color: orangered"
    elif per >= -25 and per < 0:
        return "background-color: #ffcccb"
    elif per >= 0 and per < 25:
        return "background-color: #90ee90"
    elif per >= 25:
        return "background-color: green"

def display_two_decimal_places(val: float) -> str:
    return f'${val:,.2f}'

def display_date_only(d: datetime) -> str:
    return d.strftime('%Y-%m-%d')

def stock_rebalance_instances_to_df(rebalances_array: List[StockRebalanceInstance]) -> pd.DataFrame:
    vars = list(map(lambda f: f.name, dataclasses.fields(rebalances_array[0])))
    df = pd.DataFrame.from_records([[getattr(i, v) for v in vars] for i in rebalances_array], columns=['ticker', 'prev', 'curr'])
    df['ticker'] = df['ticker'].str.replace("'", "")
    return df

def per_stocks_up(rebalances_array: List[StockRebalanceInstance]) -> str:
    return f'{round(100 * sum([1 for reb_i in rebalances_array if (reb_i.curr_price > reb_i.prev_price)]) / len(rebalances_array), 2)}%'

def p_change(r: pd.Series) -> str:
    return f'{round(100 * (r.base_portfolio_curr_price - r.base_portfolio_prev_price ) / r.base_portfolio_prev_price, 2)}%'

def p_change2(r: pd.Series) -> str:
    return f'{round(100 * (r.curr - r.prev ) / r.prev, 2)}%'

def get_date_based_df(df_debug, date):
    df_date = df_debug.loc[date]
    stock_rebalance_instances = df_date['base_portfolio_per_ticker_data']
    df = stock_rebalance_instances_to_df(stock_rebalance_instances)
    tickers_closed = [str(s) for s in df_date['base_portfolio_tickers_closed']]
    df['up'] = df.apply(lambda r: r['curr'] > r['prev'], axis=1)
    df['per_change'] = df.apply(lambda r: p_change2(r), axis=1)
    df['did_close'] = df.apply(lambda r: "Closed" if str(r.ticker) in tickers_closed else "", axis=1)
    df = df.sort_values(by='per_change', key=lambda col: col.map(lambda v: float(v[:-1])))    
    summary_row = [
        'Summary / Total ',
        round(sum(df['prev']), 20),
        round(sum(df['curr']), 20),
        f"{round(100 * sum(df['up']) / len(df['up']))}%",
        f"{round(statistics.mean([float(p[:-1]) for p in df['per_change']]), 2)}%",
        ""
    ]
    df.loc[len(df.index)] = summary_row
    return df

df_debug['per_stock_up'] = df_debug.apply(lambda r: per_stocks_up(list(r.base_portfolio_per_ticker_data)), axis=1)
df_debug['portfolio_change'] = df_debug.apply(lambda r: p_change(r), axis=1)

df_debug_readable = df_debug.copy()
df_debug_readable['num_tickers_closed'] = df_debug_readable.apply(lambda r: len(r.base_portfolio_tickers_closed), axis=1)
df_debug_readable = df_debug_readable.drop(['base_portfolio_tickers_closed', 'base_portfolio_per_ticker_data', 'new_base_portfolio_per_ticker_data'], axis=1)
df_debug_readable.rename(columns={'base_portfolio_prev_price': 'prev', 'base_portfolio_curr_price': 'curr'}, inplace=True)

date_to_inspect = df_debug.index[7]
df_date = get_date_based_df(df_debug, date_to_inspect)

In [6]:
table_styles = [
        {
            'selector': 'caption',
            'props': [
                ('color', 'black'),
                ('font-size', '30px'),
                ('text-align', 'center'),
                ('padding-bottom', '30px')
            ]
        },
        {
            'selector': 'tr',
            'props': [
                ('border-bottom', '1px solid black'),
            ]
        },        
        {
            'selector': 'tr:hover',
            'props': [
                ('color', 'blue'),
                ('font-weight', 'bold')
            ]
        },                
        {
            'selector': 'td',
            'props': [
                ('padding', '15px'),
                ('text-align', 'center'),
            ]
        },                
        {
            'props': [
                ('background-color', 'white'),
            ]
        },
    ]

table_attributes = 'style="border-collapse:collapse"'

def save_and_display(html: str, filename: str):
    with open (filename, 'w') as f:
        f.write(html)
    pdf.from_string(html, filename.replace('.html', '.pdf'))
    display(HTML(html))


In [7]:
df_debug_with_style = df_debug_readable.rename(columns={
    'per_stock_up': '% stocks up',
    'portfolio_change': '% change',
    'num_tickers_closed': 'num closed'
    }).round(2).style \
    .format({
        'curr': display_two_decimal_places,
        'prev': display_two_decimal_places,
        'date': display_date_only,
        'prev_date': display_date_only,
        'curr_date': display_date_only,    
    }).set_caption(
        f"Return for \n" \
        f" for {str(STOCKS_UNIVERSE)}\n" \
        f" for {str(BASE_METRIC)}" \
        f" for rebalance period: {str(REBALANCE_DAYS)}" \
        f" for portfolio size: {str(PORTFOLIO_SIZE)}" \
    ).set_table_styles(table_styles) \
    .set_table_attributes(table_attributes)

results_filename_html = os.path.join(DATA_RESULT_BASE_PATH, f'{BASE_METRIC.file_friendly()} - Rebalance {REBALANCE_DAYS} for Portfolio Size {PORTFOLIO_SIZE}.html')
results_html = df_debug_with_style.applymap(color_per_cell, subset=['% stocks up', '% change']).hide_index().render()
save_and_display(results_html, results_filename_html)

In [None]:
date_str = datetime.datetime.strftime(date_to_inspect, '%Y-%m-%d')

df_with_style = df_date \
    .rename(columns={
        'per_change': '% change'
    }).style \
    .format({
        'curr': display_two_decimal_places,
        'prev': display_two_decimal_places,
    }) \
    .set_caption(
        f"Rebalance results" \
        f" on {date_str}" \
        f" for {str(STOCKS_UNIVERSE)}" \
        f" for {str(BASE_METRIC)}"
    ).set_table_styles(table_styles) \
    .set_table_attributes(table_attributes)

rebalance_day_filename_html = os.path.join(DATA_RESULT_BASE_PATH, f'{BASE_METRIC.file_friendly()} - {date_str} rebalance.html')
rebalance_day_html = df_with_style.applymap(color_per_cell, subset=['% change']).hide_index().render()
save_and_display(rebalance_day_html, rebalance_day_filename_html)

Loading pages (1/6)
Counting pages (2/6)                                               
Resolving links (4/6)                                                       
Loading headers and footers (5/6)                                           
Printing pages (6/6)
Done                                                                      


ticker,prev,curr,up,% change,did_close
LGHS,$1.09,$0.26,False,-76.15%,
CPHI,$0.37,$0.20,False,-45.95%,
BOPH,$0.85,$0.48,False,-43.53%,
CRTP,$0.70,$0.40,False,-42.86%,
CHBT,$2.90,$1.69,False,-41.72%,
VRNM,$3.45,$2.20,False,-36.23%,
MSN,$1.29,$1.06,False,-17.5%,
CVVT,$0.52,$0.43,False,-17.47%,
CMKG,$1.15,$0.97,False,-15.65%,
XUE,$2.58,$2.24,False,-13.07%,


In [None]:
def cagr(start: float, end: float, n: int) -> float:
    cagr = pow(end / start, 1 / n) - 1
    return cagr * 100

def value_growth(cagr: float, n: int) -> float:
    return 10000 * pow(1 + cagr / 100, n)    

def get_df_with_style(data: dict, caption: str) -> None:
    return pd.DataFrame(
        data, 
        columns = ['Time Period', 'CAGR', 'SD', '$10,000 grows to...']) \
    .style.set_caption(caption) \
    .format({
        "CAGR": "{:.2f}%",
        "SD": "{:.2f}%",
        '$10,000 grows to...': "${:,.2f}"
    }) \
    .set_table_styles(table_styles) \
    .set_table_attributes(table_attributes)

min_date = min(df_res.index)
max_date = max(df_res.index)

time_period_table_days = 365 * 1

tabulated_base_data = []
tabulated_test_data = []

period_start_date = min_date

idx_s = 0
idx_e = 0

for date, data in df_res.iterrows():
    idx_e += 1
    td = (date - period_start_date)
    if (td.days >= time_period_table_days - 5) and idx_s < len(df_res):
        n = round(td.days / 365)    
        df = df_res.iloc[idx_s:idx_e]
        
        prev_data = df_res.iloc[idx_s]
        curr_data = df_res.iloc[idx_e]
        
        period_start_dateiod = list(d.strftime('%Y-%m-%d') for d in [period_start_date, date])
        
        base_start_p = prev_data['base_price']
        base_end_p = curr_data['base_price']
        base_cagr = cagr(base_end_p, base_start_p, n)
        base_sd = np.std(df['base_price'].values)
        base_sd = base_sd / np.sum(df['base_price'].values) * 100
        base_growth = value_growth(base_cagr, n)
        
        tabulated_base_data.append([period_start_dateiod, base_cagr, base_sd, base_growth])
        
        test_start_p = prev_data['test_price']
        test_end_p = curr_data['test_price']
        test_cagr = cagr(test_start_p, test_end_p, n)
        test_sd = np.std(df['test_price'].values)
        test_sd = test_sd / np.sum(df['test_price'].values) * 100
        test_growth = value_growth(test_cagr, n)

        tabulated_test_data.append([period_start_dateiod, test_cagr, test_sd, test_growth])
        
        idx_s = idx_e + 1
        idx_e += 1
        period_start_date = date

base_df_with_style = get_df_with_style(tabulated_base_data, f'{str(BASE_METRIC)} - Summary.html')         
base_summary_filename_html = os.path.join(DATA_RESULT_BASE_PATH, f'{BASE_METRIC.file_friendly()} - Summary.html')
base_summary_html = base_df_with_style.applymap(color_per_cell, subset=['CAGR', 'SD']).render()
save_and_display(base_summary_html, base_summary_filename_html)

test_df_with_style = get_df_with_style(tabulated_test_data, f'{str(TEST_METRIC)} - Summary.html')         
test_summary_filename_html = os.path.join(DATA_RESULT_BASE_PATH, f'{TEST_METRIC.file_friendly()} - Summary.html')
test_summary_html = test_df_with_style.applymap(color_per_cell, subset=['CAGR', 'SD']).render()
save_and_display(test_summary_html, test_summary_filename_html)

Loading pages (1/6)
Counting pages (2/6)                                               
Resolving links (4/6)                                                       
Loading headers and footers (5/6)                                           
Printing pages (6/6)
Done                                                                      


Unnamed: 0,Time Period,CAGR,SD,"$10,000 grows to..."
0,"['2011-01-03', '2011-12-29']",41.79%,3.95%,"$14,179.43"
1,"['2011-12-29', '2013-03-22']",-25.62%,1.84%,"$7,438.17"
2,"['2013-03-22', '2014-03-18']",-4.32%,1.34%,"$9,568.13"
3,"['2014-03-18', '2015-03-13']",-1.60%,0.20%,"$9,839.61"
4,"['2015-03-13', '2016-03-07']",-14.81%,2.09%,"$8,519.34"
5,"['2016-03-07', '2017-03-02']",-21.27%,2.50%,"$7,872.96"
6,"['2017-03-02', '2018-05-25']",9.21%,1.92%,"$10,921.00"
7,"['2018-05-25', '2019-05-21']",-57.26%,6.96%,"$4,274.07"


Loading pages (1/6)
Counting pages (2/6)                                               
Resolving links (4/6)                                                       
Loading headers and footers (5/6)                                           
Printing pages (6/6)
Done                                                                      


Unnamed: 0,Time Period,CAGR,SD,"$10,000 grows to..."
0,"['2011-01-03', '2011-12-29']",-28.87%,4.58%,"$7,113.28"
1,"['2011-12-29', '2013-03-22']",21.47%,1.32%,"$12,146.53"
2,"['2013-03-22', '2014-03-18']",-25.47%,2.37%,"$7,452.55"
3,"['2014-03-18', '2015-03-13']",-32.70%,5.09%,"$6,730.17"
4,"['2015-03-13', '2016-03-07']",-14.19%,1.04%,"$8,581.25"
5,"['2016-03-07', '2017-03-02']",-10.18%,1.05%,"$8,982.02"
6,"['2017-03-02', '2018-05-25']",-28.08%,3.91%,"$7,192.24"
7,"['2018-05-25', '2019-05-21']",389.09%,14.49%,"$48,909.38"


In [None]:
all_html_files = [
    base_summary_filename_html,
    test_summary_filename_html,
    rebalance_day_filename_html,
    results_filename_html
]


In [None]:
from bs4 import BeautifulSoup

head = "<head>"
body = "<body>"

for filename in all_html_files:
    with open(filename, 'r') as f:
        data = BeautifulSoup(f.read())
        head += str(data.head()[0])
        body += str(data.body()[0])
        body += "<br><br>"

head += "</head>"
body += "</body>"
html = f"<html>{head}{body}</html>"

with open(os.path.join(DATA_RESULT_BASE_PATH, 'combined.html'), 'w') as f:
    f.write(html)