In [1]:
# Syntax error being tracked here: https://github.com/microsoft/vscode-jupyter/issues/6635
# If it's annoying, workaround here: https://stackoverflow.com/a/35597119/768439
%load_ext autoreload
%autoreload 2

# TODO: Use this to profile: https://stackoverflow.com/questions/45893768/how-do-i-find-out-what-parts-of-my-code-are-inefficient-in-python
%load_ext line_profiler

In [2]:
import sys
sys.path.append('..')

import pandas as pd
import pprint
import os

from src.backtest import *
from src.backtest_helpers import *
from src.serialization_lib import *
from src.data_types import *

pd.set_option('display.max_colwidth', None)
pp = pprint.PrettyPrinter(indent=4)

In [3]:
# Data

QUANDL_API_KEY_PROD = "y2JSRKBniVyVxner8BrP"
QUANDL_API_KEY_DEV = 'cfg2wsKZrVNuYBJpETAs'

DATA_RAW_BASE_PATH = '/Volumes/SDCard/TipBackTest/raw_data'
DATA_PROCESSED_BASE_PATH = '/Volumes/SDCard/TipBackTest/processed_data'
if not os.path.exists(DATA_PROCESSED_BASE_PATH):
    os.makedirs(DATA_PROCESSED_BASE_PATH)

FORCE_RECOMPUTE = True
DEVELOPMENT = False
env = 'dev' if DEVELOPMENT else 'prod'
DAILY_DATA_FILE_NAME = os.path.join(DATA_PROCESSED_BASE_PATH, f"daily_data_{env}.feather")


if not os.path.exists(DAILY_DATA_FILE_NAME) or FORCE_RECOMPUTE:
    print("Recomputing daily data...")
    daily_metrics_filename = 'SHARADAR-DAILY.csv' if DEVELOPMENT else 'SHARADAR_DAILY_3_9ffd00fad4f19bbdec75c6e670d3df83.csv'
    daily_prices_filename = 'SHARADAR-SEP.csv' if DEVELOPMENT else 'SHARADAR_SEP_2_0bd2000858d1d8d1f48d4cdea5f8c9e2.csv'

    daily_metrics = pd.read_csv(os.path.join(DATA_RAW_BASE_PATH, daily_metrics_filename))
    daily_prices = pd.read_csv(os.path.join(DATA_RAW_BASE_PATH, daily_prices_filename))

    daily_data = daily_metrics.merge(daily_prices[['ticker', 'date','closeadj']], on=['date', 'ticker'], how='inner')
    daily_data.rename(columns={'closeadj': 'price'}, inplace=True)
    daily_data = daily_data.sort_values(by='date')
    write_df_to_feather(daily_data, DAILY_DATA_FILE_NAME)
else:
    print("Reading daily data...")
    daily_data = read_df_from_feather(DAILY_DATA_FILE_NAME)

daily_data

Reading daily data...


Unnamed: 0,index,ticker,date,lastupdated,ev,evebit,evebitda,marketcap,pb,pe,ps,price
0,14551059,MHRCQ,2011-01-03,2021-05-30,482.6,-30.4,-65.7,430.2,6.2,-19.8,14.0,7.420
1,13461774,FFDF,2011-01-03,2019-12-13,20.7,11.3,9.0,14.4,0.8,12.0,2.0,10.073
2,13461775,FFEX,2011-01-03,2018-10-18,79.4,-3.9,-3053.8,81.6,1.0,-6.3,0.2,4.660
3,13461776,FFFD,2011-01-03,2018-10-18,50.9,17.2,12.1,22.6,0.5,13.7,1.2,16.618
4,13461777,FFG,2011-01-03,2018-10-18,1227.9,7.2,7.0,911.9,0.8,9.1,0.9,19.087
...,...,...,...,...,...,...,...,...,...,...,...,...
14551055,14534512,NGG,2021-05-28,2021-05-28,89444.2,24.3,15.5,50556.9,2.0,31.6,2.7,66.870
14551056,14534513,NGL,2021-05-28,2021-05-28,3734.4,-7.9,-27.8,308.7,0.3,-0.4,0.1,2.390
14551057,14534514,NGM,2021-05-28,2021-05-28,1010.3,-9.1,-9.7,1159.9,2.9,-10.5,,15.060
14551058,14534503,NFE,2021-05-28,2021-05-28,9671.8,-70.6,-97.6,8702.0,30.5,-41.1,16.6,42.100


In [4]:
# Prepare Inputs for Base + Test
INITIAL_PORTFOLIO_VALUE = 10000
PORTFOLIO_SIZE = 30
REBALANCE_DAYS = 90

BASE_METRIC = EvaluationMetric.EV_EBIT
TEST_METRIC = EvaluationMetric.P_B
STOCKS_UNIVERSE = StockUniverse.LARGE
PORTFOLIO_WEIGHT_STRATEGY = StockBasketWeightApproach.EQUAL_WEIGHTING

if not os.path.exists(os.path.join(DATA_PROCESSED_BASE_PATH, f'daily_data_base_sorted_{env}.feather')) or FORCE_RECOMPUTE:  
    print("Recomputing sorted daily data...")
    # Optimization to avoid sorting every time
    daily_data_base_sorted = sort_df_by_metric(daily_data, BASE_METRIC)
    daily_data_test_sorted = sort_df_by_metric(daily_data, TEST_METRIC)

    daily_data_base_sorted.to_feather(os.path.join(DATA_PROCESSED_BASE_PATH, f'daily_data_base_sorted_{env}.feather'))
    daily_data_test_sorted.to_feather(os.path.join(DATA_PROCESSED_BASE_PATH, f'daily_data_test_sorted_{env}.feather'))
else:
    print("Reading sorted daily data...")
    daily_data_base_sorted = read_df_from_feather(os.path.join(DATA_PROCESSED_BASE_PATH, f'daily_data_base_sorted_{env}.feather'))
    daily_data_test_sorted = read_df_from_feather(os.path.join(DATA_PROCESSED_BASE_PATH, f'daily_data_test_sorted_{env}.feather'))

Reading sorted daily data...


KeyboardInterrupt: 

In [None]:
back_test_result = compute_backtest_dfs(
    BASE_METRIC,
    TEST_METRIC,
    STOCKS_UNIVERSE,
    PORTFOLIO_WEIGHT_STRATEGY,
    REBALANCE_DAYS,
    PORTFOLIO_SIZE,
    INITIAL_PORTFOLIO_VALUE,
    daily_data,
    daily_data_base_sorted,
    daily_data_test_sorted,
    save_to_disk=True,
    env=env)

In [None]:
filename = get_feather_filename('df_debug', BASE_METRIC, TEST_METRIC, REBALANCE_DAYS, PORTFOLIO_SIZE, STOCKS_UNIVERSE, env)
filename = os.path.join(DATA_PROCESSED_BASE_PATH, filename)
write_df_to_feather(back_test_result.df_debug, filename)

filename = get_feather_filename('df_res', BASE_METRIC, TEST_METRIC, REBALANCE_DAYS, PORTFOLIO_SIZE, STOCKS_UNIVERSE, env)
filename = os.path.join(DATA_PROCESSED_BASE_PATH, filename)
write_df_to_feather(back_test_result.df, filename)