In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import matplotlib.pyplot as plt
from jupyterthemes import jtplot
import plotly.express as px
import plotly.io as pio

jtplot.style(theme="monokai", context="notebook", ticks=True,  figsize=(40, 10))

In [2]:
jtplot.style(theme="monokai", context="notebook", ticks=True,  figsize=(40, 10))

In [3]:
TOP_10_CAPITALIZATION = ['btcusd', 'ethusd', 'eosusd', 'ltcusd', 'xrpusd', 'babusd', 'xmrusd', 'neousd', 'iotusd', "dshusd"]
timestamp2datetime = lambda x: datetime.fromtimestamp(int(x) / 1000.0)

In [7]:
trading_logistic_df = pd.read_csv(
    f"../results/forest/returns/trading_returns_no_volume_future_2state_movement_120min_57.5_threshold.csv",
    sep=',',
    parse_dates=["open_time", "close_time"],
#     index_col=['time', 'pair'],
    infer_datetime_format=True,
#     compression='gzip',
)
print(trading_logistic_df.shape)
trading_logistic_df.head()

(13770, 12)


Unnamed: 0,position_num,type,pair,open_row_num,open_time,open_price,duration,close_row_num,close_time,close_price,close_return,transaction_cost
0,1,long,btcusd,3,2019-11-01 00:03:00,9149.2,120,123,2019-11-01 02:03:00,9163.7,0.001585,0
1,1,long,babusd,921,2019-11-01 15:21:00,273.48,120,1041,2019-11-01 17:21:00,275.785,0.008428,0
2,1,long,babusd,3572,2019-11-03 11:32:00,287.07,122,3694,2019-11-03 13:34:00,288.7,0.005678,0
3,1,long,neousd,5873,2019-11-05 01:53:00,11.1485,123,5996,2019-11-05 03:56:00,11.2565,0.009687,0
4,1,long,babusd,9279,2019-11-07 10:39:00,292.0,126,9405,2019-11-07 12:45:00,291.7,-0.001027,0


In [None]:
fig = px.histogram(trading_logistic_df[ trading_logistic_df["close_return"] > - 0.06 ], x="close_return")

fig.update_layout(shapes=[
    dict(
      type= 'line',
      yref= 'paper', y0= 0, y1= 1,
      xref= 'x', x0= -0.003, x1= -0.003,
    ),
    dict(
      type= 'line',
      yref= 'paper', y0= 0, y1= 1,
      xref= 'x', x0= 0.003, x1= 0.003,
    )    
])

In [8]:
def get_total_return(returns_df, costs_bps=[]):
    costs_bps = [ x*0.0005 for x in range(0, 9) ] if not costs_bps else costs_bps
    # get growth rate for all bps
    for cost in costs_bps:
        returns_df[f"return_growth_rate_{ int( cost * 10**4 ) }bps"] = 1 + returns_df["close_return"] -  cost    
    # aggregate over each position and its short/long-positions
    position_returns_df = (
        returns_df
        .groupby( ["position_num", "type"] )
        # multiply each 
        .agg(
            **{ f"total_return_{ int( cost * 10**4 ) }bps": (f"return_growth_rate_{ int( cost * 10**4 ) }bps", "prod") for cost in costs_bps }
        ) 
        - 1
    )
    # aggregate overeach position's total return
    return position_returns_df.groupby(level="type").agg("mean")

In [9]:
get_total_return(trading_logistic_df)

Unnamed: 0_level_0,total_return_0bps,total_return_5bps,total_return_10bps,total_return_15bps,total_return_20bps,total_return_25bps,total_return_30bps,total_return_35bps,total_return_40bps
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
long,0.175837,0.133204,0.092096,0.05246,0.014243,-0.022603,-0.058129,-0.09238,-0.125402
short,0.417635,0.311775,0.213773,0.123049,0.039067,-0.038673,-0.110632,-0.177235,-0.238881


In [None]:
# costs_bps = [ 0, 0.001, 0.0015, 0.002, 0.003, 0.0035, 0.004 ]
costs_bps = [ x*0.0005 for x in range(0, 9) ]
for cost in costs_bps:
    trading_logistic_df[f"return_growth_rate_{ int( cost * 10**4 ) }bps"] = 1 + trading_logistic_df["close_return"] -  cost
    
trading_logistic_df.head()

In [None]:
position_returns_df = trading_logistic_df.groupby( ["position_num", "type"] ).agg(
    **{ f"total_return_{ int( cost * 10**4 ) }bps": (f"return_growth_rate_{ int( cost * 10**4 ) }bps", "prod") for cost in costs_bps }
) - 1
position_returns_df.head()

In [None]:
position_returns_df.groupby(level="type").agg("mean")