In [1]:
from IPython.display import HTML
print('Disabling code cells ...')
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>''')

Disabling code cells ...


In [2]:
%reload_ext line_profiler
import pandas as pd
import json
import numpy as np
import warnings
from typing import Optional, List, Union, Callable
from datetime import datetime, timedelta
import pytz
warnings.filterwarnings("ignore")
import cufflinks as cf
cf.set_config_file(offline=True, offline_show_link=False, theme='white', sharing='secret')
cf_colors = ['#3F93CA', '#DB3A34', '#466365', '#FFCAB1', '#C0E5C8']
print('Importing modules ...')

Importing modules ...


In [3]:
with open('../input/main-test-daniel-dataset-json/test.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
print('Loading dataset ...')

Loading dataset ...


In [4]:
def plot_pos(p, sl=None, tp=None, hline=None):
    if tp is None:
        tp = get_position_tps(p)[0]
    if sl is None:
        sl = p['sl']['price']
        
    open_ = pd.to_datetime(p['entry']['execution']['time'])
    close_ = open_ + timedelta(hours=1)
    
    sl_shape = cf.tools.get_shape(x0=open_, x1=close_, yref='close', y0=sl, y1=p['entry']['price'], kind='rect', fill=True, opacity=0.3, color='red', width=0.1)
    tp_shape = cf.tools.get_shape(x0=open_, x1=close_, yref='close', y0=tp, y1=p['entry']['price'], kind='rect', fill=True, opacity=0.3, color='green', width=0.1)
    shapes = [s for s in [sl_shape, tp_shape, hline] if s is not None]
    return p['rates'].set_index(pd.to_datetime(p['rates']['time'])).iplot(kind='candle', shapes=shapes, width=0.1)
print('Defining plot_pos() function ...')

Defining plot_pos() function ...


In [5]:
def _candle_mean(candle: Union[pd.Series, dict]) -> float:
    avg = candle["low"] + (candle["high"] - candle["low"]) / 2
    return round(avg, 6)
print('Defining _candle_mean() function ...')

Defining _candle_mean() function ...


In [6]:
def _get_executed_orders(p: dict, ignore: List[str] = None):
    
    if ignore is None:
        ignore = list()
        
    executed = [o for o in p['orders'] if o.get('execution') != dict()]
    sorted_executed = sorted(executed, key=lambda x: pd.to_datetime(x['execution']['time']))
    return [e for e in sorted_executed if e['name'] not in ignore]
print('Defining _get_executed_orders() function ...')

Defining _get_executed_orders() function ...


In [7]:
def _determine_position_result(p: dict, partials: List[float], ignore: List[str]):
    
    SIDE = 1 if p['side'].upper() == 'BUY' else -1
    
    # get events (orders) in execution order
    events = _get_executed_orders(p, ignore=ignore)
    
    # makes the rates attr back into a df
    p['rates'] = pd.DataFrame(p['rates'])

    if len(events) == 0:
        last_candle = p['rates'].iloc[-1]
        r = SIDE * (_candle_mean(last_candle) - p['entry']['execution']['price']) / p['sl_delta']
        return (pd.to_datetime(last_candle.time, utc=True), r, 'EOP')

    result = 0
    for i, partial in enumerate(partials):

        if i > len(events) - 1:
            break

        if i == 0:
            result_type = events[i]['name']

        close = pd.to_datetime(events[i]['execution']['time'], utc=True)
        r = SIDE * (events[i]['execution']['price'] - p['entry']['execution']['price']) / p['sl_delta']

        # Closing events
        if events[i]['name'].upper() in ['SL', 'SL_TO_BE']:
            # if it's the first event record the loss,
            # otherwise i'm assuming the second losing 
            # partial is always at breakeven
            if i == 0:
                result = r
            break
        elif events[i]['name'].upper() == ['CLOSE']:
            result = r * (1 - sum(partials[0:i]))
            break

        else: result += r * partial

    return (close, result, result_type)
print('Defining _determine_position_result() function ...')

Defining _determine_position_result() function ...


In [8]:
def get_position_tps(p):
    return sorted(set([o['price'] for o in p['orders'] if o['name'].upper() == 'TP']))
print('Defining get_position_tps() function ...')

Defining get_position_tps() function ...


In [9]:
def find_best_sl(p, for_tp=0, tp=None):
    '''Tries to find the SL price at which "for_tp" wouldve been hit.
    Returns None if the direction was wrong (tp wouldve never been hit)
    Returns the new SL price expressed as % of the size of the current one'''
    
    if not isinstance(p['rates'], pd.DataFrame):
        p['rates'] = pd.DataFrame(p['rates'])

    if tp is None:
        try:
            tp = get_position_tps(p)[for_tp]
        except AttributeError:
            # if for_tp is not there, pick last tp
            tp = get_position_tps(p)[-1]
        
    bound = p['rates']
    SIDE = 1 if p['side'].upper() == 'BUY' else -1
    spread = 20 * p['symbol']['info']['trade_tick_size'] # let's just say 2 pips
    entry_hit = p['entry']['execution']['time']
    
    if SIDE == 1:
        tp_hit = (bound.high >= tp)
        if tp_hit.any():
            window = ~tp_hit.cumsum().astype(bool)
            new_sl = bound.loc[(bound.time > entry_hit) & window, 'low'].min() - spread
    
    elif SIDE == -1:
        tp_hit = (bound.low <= tp)
        if tp_hit.any():
            window = ~tp_hit.cumsum().astype(bool)
            new_sl = bound.loc[(bound.time > entry_hit) & window, 'high'].max() + spread
    
    
    # if there's no tp in rates that hit
    # there's no sl that could've saved the trade
    # the direction was just wrong
    try:
        if not pd.isnull(new_sl):
            return new_sl
    except UnboundLocalError:
        pass
    return None
print('Defining find_best_sl() function ...')

Defining find_best_sl() function ...


In [10]:
def find_best_tp(p, sl=None):
    '''Tries to find the TP price at which "sl" wouldve never been hit.
    Returns None if the trade went straight into loss
    Returns the new TP price expressed in R compared to for_sl'''
    
    if not isinstance(p['rates'], pd.DataFrame):
        p['rates'] = pd.DataFrame(p['rates'])
        
    if sl is None:
        sl = p['sl']['price']
    
    bound = p['rates']
    SIDE = 1 if p['side'].upper() == 'BUY' else -1
    spread = 20 * p['symbol']['info']['trade_tick_size'] # let's just say 2 pips
    entry_hit = p['entry']['execution']['time']
    
    if SIDE == 1:      
        sl_hit_cond = (bound.low <= sl + spread/2)
        if len(sl_hit_cond) == 0:
            sl_hit_cond = True
        # this means: take the sl_hit series, which is gonna look
        # something like 0,0,0,0,1,1,1,0,1,0,0,0,1,0 and make the
        # cumsum and then turn it into bools, which is gonna make
        # it look like False,False,False,False,True,True,True,True etc.
        # then take the inverse, which is what we want (all of this
        # works because 2 is considere True after booleanization)
        window = ~sl_hit_cond.cumsum().astype(bool)
        new_tp = bound.loc[(bound.time > entry_hit) & window, 'high'].max() - spread
        
    elif SIDE == -1:
        sl_hit_cond = (bound.high >= sl - spread/2)
        if len(sl_hit_cond) == 0:
            sl_hit_cond = True
        window = ~sl_hit_cond.cumsum().astype(bool)
        new_tp = bound.loc[(bound.time > entry_hit) & window, 'low'].min() + spread
        
    # if the tp is lower than the entry that means that
    # the trade went straight to sl, so return none
    if SIDE * (new_tp - p['entry']['execution']['price']) < 0:
        return None
    return new_tp
print('Defining find_best_tp() function ...')

Defining find_best_tp() function ...


In [11]:
def find_absolute_best_params(p, previous_best: tuple):
    SIDE = 1 if p['side'].upper() == 'BUY' else -1
    entry = p['entry']['execution']['price']
    pairs = [previous_best]
    
    better_sl = previous_best[0]
    for i in range(100):
        if better_sl is not None:
            better_tp = find_best_tp(p, sl=better_sl)
        else:
            better_tp = previous_best[1]
            better_sl = find_best_sl(p, tp=better_tp)
        if better_tp is not None and better_sl is not None:
            commit = (better_sl, better_tp)
            pairs.append(commit)
            # when the results are equal, stop iteration
            if (i != 0 and
                abs(commit[0] - pairs[-2][0]) < 1e-7 and
                abs(commit[1] - pairs[-2][1]) < 1e-7):
                    break
            better_sl = find_best_sl(p, tp=better_tp)

    pairs = [pair for pair in pairs if None not in pair]
    
    if len(pairs) == 0:
        return (None, None)
    
    return sorted(pairs, key=lambda x: abs(x[0] - entry) / abs(x[1] - entry))[0]
print('Defining find_absolute_best_params() function ...')

Defining find_absolute_best_params() function ...


In [12]:
def make_results(given: List['Position'] = None,
                 partials: List[float] = None,
                 ignore: List[str] = None,
                 cleanse: Optional[Callable] = True,
                 only: Optional[list] = None):

    if given is None:
        given = data[:]
    
    if partials is None:
        partials = [1]

    if ignore is None:
        ignore = list()
          
    if cleanse is True:
        def base_filter(df: pd.DataFrame) -> pd.DataFrame:
            df = df.drop(df[df.result>4].index, axis=0)
            df = df.drop(df[df.sl_pips>50].index, axis=0)
            df = df.drop(df[df.sl_pips<5].index, axis=0)
            df = df.drop(df.loc['2021-12-22'].index, axis=0) # outlier
            return df
        cleanse = base_filter

    results = list()
    for p in given:
        res = _determine_position_result(p, partials=partials, ignore=ignore)
        entry = p['entry']['execution']['price']
        entry_time = p['entry']['execution']['time']
        if not isinstance(p['entry']['execution']['time'], datetime):
            p['entry']['execution']['time'] = datetime.fromisoformat(entry_time)
        
        open_ = pd.to_datetime(p['time'], utc=True)
        close_ = pd.to_datetime(res[0], utc=True)
        
        better_sl_R = None
        better_tp_R = None
        if only is None or any([x in y for x in ('best', 'better') for y in only]):
            if not pd.api.types.is_datetime64_any_dtype(p['rates']['time'].dtype):
                p['rates']['time'] = pd.to_datetime(p['rates']['time'])
            # finds better sl for tp0
            better_sl = find_best_sl(p, for_tp=0)
            # finds better tp for current sl
            better_tp = find_best_tp(p)
            try: # they can return None
                better_sl_R = abs(better_sl-entry)/p['sl_delta']
                better_tp_R = abs(better_tp-entry)/p['sl_delta']
            except TypeError:
                pass
        
        best_sl_R = None
        best_tp_R = None
        if only is None or any(['best' in x for x in only]):
            # absolute best R possible (very expensive)
            x = find_absolute_best_params(p, (better_sl, better_tp))
            try: # find_absolute_best_params can return (None, None)
                best_sl_R = abs(x[0] - entry) / p['sl_delta'] # compared to original sl
                best_tp_R = abs(x[1] - entry) / abs(x[0] - entry) # compared to best_sl
            except TypeError:
                pass
        
        results.append(dict(open=open_,
                            close=close_,
                            symbol=p['symbol']['name'],
                            side=p['side'],
                            sl_pips=p['sl_delta']/p['symbol']['info']['trade_tick_size']/10,
                            result=float(res[1]),
                            result_type=str(res[2]),
                            better_sl_R=better_sl_R,
                            better_tp_R=better_tp_R,
                            best_sl_R=best_sl_R,
                            best_tp_R=best_tp_R))

    df = pd.DataFrame(results).set_index('open', drop=True).sort_index()
    if cleanse is not None:
        df = cleanse(df)
    
    if only is None: 
        only = df.columns.tolist()
    relevant_cols = df.columns[df.columns.str.contains('|'.join(only))].tolist()
    
    return df[relevant_cols]
print('Defining make_results() function ...')

Defining make_results() function ...


In [13]:
def merge(data: dict, mode='mean'):
    x = [getattr(df.resample('T'), mode)() for df in data.values()]
    x = [df.add_suffix(f'_{name}') for name, df in zip(data.keys(), x)]
    return pd.concat(x, axis=1).dropna(how='all').sort_index()
print('Defining merge() function ...')

Defining merge() function ...


***
# Analisi Risultati Daniel Savage FX
## 23 6451 - Le basi
Partiamo ipotizzando di seguire "alla lettera" (ammesso un certo margine di errore) i segnali e iniziamo con lo studiare le due varianti più importanti:
- Alla lettera, chiudendo l'intera posizione a TP1
- Alla lettera, chiudendo il 60% della posizione a TP1 e il 40% a TP2

In [14]:
df_60_40 = make_results(data[:], partials=[0.6, 0.4], only=['close', 'result'])
df_100 = make_results(data[:], partials=[1], only=['close', 'result'])

In [15]:
df_60_40_by_close = df_60_40.set_index('close').sort_index()
df_100_by_close = df_100.set_index('close').sort_index()

merged = merge({'60_40': df_60_40_by_close, '100': df_100_by_close})   
merged.cumsum().iplot(kind='scatter', connectgaps=True, interpolation='hv', 
                      title='Risultato cumulativo (R) [by Close]', width=2, legend='top',
                      yTitle='R', fill=True, zerolinecolor='grey', opacity=1, colors=cf_colors)

In [16]:
df = merged.groupby(pd.Grouper(freq='M')).sum().reset_index()
df['close'] = df.close.dt.strftime('%b %Y')
df.iplot(kind='bar', x='close', opacity=1, title='Risultato mensile (R) [by Close]',
         yTitle='R', zerolinecolor='grey', colors=cf_colors, xTitle='month', legend='top')

In [17]:
df = merged.groupby(pd.Grouper(freq='W-MON')).sum().reset_index()
df['close'] = df.close.dt.strftime('%W-%y')
df.iplot(kind='bar', x='close', opacity=1, title='Risultato settimanale (R) [by Close]',
         yTitle='R', zerolinecolor='grey', colors=cf_colors, xTitle='week number', legend='top')

In [18]:
df = merged.groupby(pd.Grouper(freq='D')).sum()
df = df[df!=0].dropna(how='all').reset_index()
n_days = (df.iloc[-1, 0] - df.iloc[0, 0]).days
df['close'] = df.close.dt.strftime('%d %b %Y')
df.iplot(kind='bar', x='close', opacity=1, yTitle='R', zerolinecolor='grey',
         title=f'Risultato giornaliero (R) [by Close] [non_zero={len(df)}/{n_days}] [worked={len(df)/(n_days*.7):.0%}]',
         colors=cf_colors, xTitle='date', legend='top')

### Per questi prossimi grafici non è necessario analizzare la differenza fra le due versioni

In [19]:
df = df_100.groupby(pd.Grouper(freq='D')).count()
df = df[df!=0].dropna(how='all')
df['result'].iplot(kind='hist', histnorm='probability', colors=cf_colors, opacity=1, xTitle='trades',
                   title='Distribuzione del numero di trades in 1 giorno (frequenza) [by Open]',
                   yTitle='probability', bargroupgap=0.5, linecolor='grey', zerolinecolor='grey')

In [20]:
df['result'] = df_100.groupby(pd.Grouper(freq='D')).sum()['result']
df.groupby('close').sum().loc[1:, 'result'].iplot(kind='bar', opacity=1, colors=cf_colors,
                                                  title='Risultato cumulativo in base al numero di trade giornalieri (R) [by Open]',
                                                  yTitle='R', zerolinecolor='grey')

***
## Limite al numero di trades giornalieri
E' chiaro che il numero di trades giornalieri influisce sul risultato della strategia:
 - Proviamo a limitire il numero massimo di operazioni giornaliere
 - Paragonerò la strategia 60_40 visto che si è rivelata più profittevole

In [21]:
def reduce_n_trades_max(df, max_trades):
    df = df[:]
    df['counter'] = 1
    # this adds a daily trades tracker, by using transform (which I have no clue how it works tbh)
    df['counter'] = df[['counter']].groupby(pd.Grouper(freq='D')).transform(lambda x: x.cumsum())
    return df[df.counter <= max_trades].drop('counter', axis=1)

max_2_60_40_by_close = reduce_n_trades_max(df_60_40, 2).set_index('close').sort_index()
max_3_60_40_by_close = reduce_n_trades_max(df_60_40, 3).set_index('close').sort_index()
max_4_60_40_by_close = reduce_n_trades_max(df_60_40, 4).set_index('close').sort_index()
merged = merge({'60_40_max_2': max_2_60_40_by_close,
                '60_40_max_3': max_3_60_40_by_close,
                '60_40_max_4': max_4_60_40_by_close,
                '60_40_old': df_60_40_by_close})

In [22]:
merged.cumsum().iplot(kind='scatter', connectgaps=True, interpolation='hv', 
                      title='Risultato cumulativo (R) [by Close]', width=2, legend='top',
                      yTitle='R', fill=True, zerolinecolor='grey', opacity=1, colors=cf_colors)

*E' chiaro che la combinazione migliore è **60_40** con **max 3 trade al giorno**, quindi proseguiamo ad analizzare quella.*

In [23]:
merged = merge({'60_40_max_3': max_3_60_40_by_close,
                '60_40_old': df_60_40_by_close})

In [24]:
df = merged.groupby(pd.Grouper(freq='D')).sum()
df = df[df!=0].dropna(how='all').reset_index()
n_days = (df.iloc[-1, 0] - df.iloc[0, 0]).days
df['close'] = df.close.dt.strftime('%d %b %Y')
df.iplot(kind='bar', x='close', opacity=1, yTitle='R', zerolinecolor='grey', subplots=True, subplot_titles=['60_40_max_3', '60_40_old'],
         title=f'Risultato giornaliero (R) [by Close] [non_zero={len(df)}/{n_days}] [worked={len(df)/(n_days*.7):.0%}]',
         colors=cf_colors, xTitle='date', legend=False)

### Ora che il rischio massimo giornaliero è stato limitato a 3R, possiamo pensare di aumentare il rischio individuale per trade

In [25]:
max_3_60_40_risk_1_5_by_close = max_3_60_40_by_close.copy()
max_3_60_40_risk_1_5_by_close['result'] *= 1.5
merged = merge({'60_40_max_3': max_3_60_40_by_close,
                '60_40_old': df_60_40_by_close,
                '60_40_max_3 (r: 1.5x)': max_3_60_40_risk_1_5_by_close})

In [26]:
df = merged.groupby(pd.Grouper(freq='M')).sum().reset_index()
df['close'] = df.close.dt.strftime('%b %Y')
df.iplot(kind='bar', x='close', opacity=1, title='Risultato mensile (R) [by Close]',
         yTitle='R', zerolinecolor='grey', colors=cf_colors, xTitle='month', legend='top')

## Prime conclusioni

Come possiamo vedere, questa operazione ci porta un beneficio in **13 casi su 16**, mentre paghiamo le conseguenze del rischio maggiorato solo in 2 casi: Novembre 2020 e Novembre 2021.

La domanda a cui stiamo cercando una risposta tuttavia è: basta questa performance per passare la Challenge FTMO? 

La risposta (grossolana) è, in questo caso: *solo 3 volte su 16*. Avremmo passato la Challenge a Marzo 2021, Dicembre 2021 e Gennaio 2022.

In altri 8 casi non l'avremmo passata e non l'avremmo persa, mentre in 2 casi l'avremmo persa.

***
## Risk Management Dinamico

In [27]:
def make_streak_df(df, by='open'):

    df = df.set_index(by).sort_index()[['result']]
        
    df['result'] = df.result.apply(lambda x: 1 if x>=0 else 0)
    grouper = (df.result != df.result.shift()).cumsum()
    df['streak_pos'] = df.groupby(grouper).cumsum()

    df['result'] = df.result.apply(lambda x: 1 if x==0 else 0) # inverts
    grouper = (df.result != df.result.shift()).cumsum()
    df['streak_neg'] = df.groupby(grouper).cumsum()[['result']]

    df['streak'] = df.streak_pos-df.streak_neg
    return df.reset_index(drop=True)[['streak']]

***