In [1]:
!pip install --proxy http://proxyaws-qvr.pole-emploi.intra:8080 yfinance ta bayesian-optimization

Collecting yfinance
  Downloading yfinance-0.2.18-py2.py3-none-any.whl (60 kB)
[K     |████████████████████████████████| 60 kB 5.0 MB/s eta 0:00:011
[?25hCollecting ta
  Downloading ta-0.10.2.tar.gz (25 kB)
Collecting bayesian-optimization
  Downloading bayesian_optimization-1.4.3-py3-none-any.whl (18 kB)
Collecting pytz>=2022.5
  Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)
[K     |████████████████████████████████| 502 kB 5.1 MB/s eta 0:00:01
[?25hCollecting frozendict>=2.3.4
  Downloading frozendict-2.3.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)
[K     |████████████████████████████████| 114 kB 10.1 MB/s eta 0:00:01
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Collecting lxml>=4.9.1
  Downloading lxml-4.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (7.1 MB)
[K     |████████████████████████████████| 7.1 MB 5.2 MB/s eta 0:00:01
Collecting beautifulsoup4>=4.11.1
  Do

In [18]:
import pandas as pd, numpy as np
import itertools
import seaborn as sns
import yfinance as yf
import multiprocessing as mp
import time
import logging

from copy import copy
import statistics as stats
import math
from functools import reduce

from ta.volume import MFIIndicator
from ta.volatility import AverageTrueRange
from ta.trend import STCIndicator
from ta.trend import EMAIndicator

from bayes_opt import BayesianOptimization

# Génération du fichier source


In [3]:
from ibm_watson_studio_lib import access_project_or_space
wslib = access_project_or_space()

df_historical_data = pd.read_csv(wslib.mount.get_data_path('NDX_1985.csv'))
df_historical_data.reset_index(inplace=True)
df_historical_data.drop(columns=["index"], inplace=True)
df_historical_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1985-10-01,110.620003,112.160004,110.565002,112.139999,112.139999,153160000
1,1985-10-02,112.139999,112.540001,110.779999,110.824997,110.824997,164640000
2,1985-10-03,110.839996,111.184998,110.120003,110.870003,110.870003,147300000
3,1985-10-04,110.870003,110.870003,109.855003,110.074997,110.074997,147900000
4,1985-10-07,110.074997,110.135002,108.175003,108.199997,108.199997,128640000


In [4]:
df_historical_data.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [5]:
# Test si aucune ligne manquante
test_list = [champ == 0 for champ in df_historical_data.isnull().sum()]

# Si toutes les colonnes sont True, résultat = True
notnull = all(i for i in test_list)
print("Aucune ligne vide détectée, pour l'ensemble des colonnes : ", notnull)

Aucune ligne vide détectée, pour l'ensemble des colonnes :  True


In [6]:
df_historical_data["Open"] = df_historical_data.Open.apply(lambda x: round(x,2))
df_historical_data["High"] = df_historical_data.High.apply(lambda x: round(x,2))
df_historical_data["Low"] = df_historical_data.Low.apply(lambda x: round(x,2))
df_historical_data["Close"] = df_historical_data.Close.apply(lambda x: round(x,2))

In [7]:
df_historical_data.tail(1)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
9474,2023-05-04,13014.07,13064.02,12938.45,12982.48,12982.480469,4745780000


In [8]:
df_historical_data.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

# Génération des combinaisons de paramètres

In [8]:
# Paramètres de sortie
l_stc_sb = [20,25]
l_ema = [e for e in range(40,130,10)]
l_STC_length = [s for s in range(40,130,10)]
l_STC_slow_length = [s for s in range(40,120,10)]
l_STC_fast_length = [s for s in range(15,35,5)]
l_mfi_seuil = [m for m in range(50,58,2)]
l_mfi_period = [m for m in range(12,18,2)]
l_at_l = [14,36]
l_at_m = [0.2,1.0,1.4]
l_rr_ratio = [3,4,5]
l_ATR_sl_l = [13,36]
l_ATR_sl = [2,3]

In [9]:
all_params = [l_stc_sb, l_ema, l_STC_length, l_STC_slow_length, l_STC_fast_length, l_mfi_seuil, l_mfi_period, l_at_l, l_at_m, l_rr_ratio, l_ATR_sl_l, l_ATR_sl]
combinaisons_raw = list(itertools.product(*all_params))

In [10]:
len(combinaisons_raw)

4478976

### Combinaisons affinées

In [11]:
combinaisons = []
for i in range(0,len(combinaisons_raw)-1):
    # Garde uniquement si si STC slow length <= STC length ou STC Fast length < STC Slow length
    if (combinaisons_raw[i][3] <= combinaisons_raw[i][2]) & (combinaisons_raw[i][4] < combinaisons_raw[i][3]):
        combinaisons.append(combinaisons_raw[i])

In [12]:
print("Longuer de la liste originelle : {}\nLongueur de la liste épurée : {}".format(len(combinaisons_raw), len(combinaisons)))

Longuer de la liste originelle : 4478976
Longueur de la liste épurée : 2737151


In [13]:
del combinaisons_raw
#del combinaisons

# Fonctions

## Indicateurs Techniques

#### Alphatrend

In [9]:
# Trend indicator, équivalent de l'affichage couleur
def trend_indicator(trend):
    if trend > 0 :
        # Uptrend
        x = 1
    elif trend < 0 :
        # Downtrend
        x = -1
    else :
        # Range
        x = 0
    return x

In [10]:
# Defintion fonction
def generate_alphatrend(df_in, mfi_p, mfi_seuil, atr_l, m):
    '''Paramètres d'entrée : longueur MFI, longueur ATR, multiplier
    Retourne les colonnes Alphatrend, Alphatrend +2, Trend (position AT1 / AT2)
    :mfi_p = période MFI servant à délimiter up/down de l'alphatrend
    :mfi_seuil = période MFI pour recherche crossover, détermine uptrend ou downtrend'''

    df = df_in.copy()

    # Colonnes MFI
    s_mfi = MFIIndicator(high=df.High, low=df.Low, close=df.Close, volume=df.Volume, window=mfi_p).money_flow_index()
    df["MFI_ref"] = s_mfi

    # Colonne ATR
    s_atr = AverageTrueRange(high=df.High, low=df.Low, close=df.Close, window=atr_l).average_true_range()
    df["ATR"] = s_atr

    # Lignes UpT et DownT
    df["UpT_support"] = df["Low"] - df["ATR"] * m
    df["DownT_support"] = df["High"] + df["ATR"] * m

    # Suppression des lignes sans signal, en début de DataFrame
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    # ===============================================
    # Calcul Alphatrend, en tant que série
    
    Alphatrend = [0]

    for i in range (1, df.shape[0]):
        # Cas Uptrend
        if df.at[i,"MFI_ref"] >= mfi_seuil :
            if df.at[i,"UpT_support"] < Alphatrend[-1] :
                # Flat
                Alphatrend.append(Alphatrend[-1])
            else :
                # Trailing stop loss Up
                Alphatrend.append(df.at[i,"UpT_support"])

        # Cas Downtrend, MFI < 50
        else :
            if df.at[i,"DownT_support"] > Alphatrend[-1] :
                # Flat
                Alphatrend.append(Alphatrend[-1])
            else :
                # Trailing stop loss Down
                Alphatrend.append(df.at[i,"DownT_support"])

    # ===============================================
    # Ajout des lignes k1 et k2 en tant que colonnes
    
    if df.shape[0] == len(Alphatrend):
        df["Alphatrend_k1"] = pd.Series(Alphatrend).apply(lambda x: round(x,2))
        # Ligne k2 décalée de 2j
        Alphatrend2 = df["Alphatrend_k1"].shift(periods=2, fill_value=0)
        df["Alphatrend_k2"] = pd.Series(Alphatrend2).apply(lambda x: round(x,2))
        # Trend
        df["Trend"] = df.Alphatrend_k1 - df.Alphatrend_k2
        df["Trend"] = df["Trend"].apply(trend_indicator)
    else :
        print("Erreur lors de la génération des lignes Alphatrend")

    # ===============================================
    # Génération des signaux Achat / Vente

    # On isole tous les index non neutres, où AT1 != AT2, à la hausse (1) comme à la baisse (-1)
    s_trend = df["Trend"].loc[df["Trend"]!=0]
    s_trend_diff = s_trend - s_trend.shift(1)

    buy_signal_indexes = s_trend_diff[s_trend_diff == 2].index
    sell_signal_indexes = s_trend_diff[s_trend_diff == -2].index

    df["Signal"] = 0
    df.loc[buy_signal_indexes,"Signal"] = 1
    df.loc[sell_signal_indexes,"Signal"] = -1

    # ===============================================
    # Sélection des colonnes suffisantes
    df = df[["Date","Alphatrend_k1","Alphatrend_k2","Trend","Signal"]]
    
    return df

#### STC & EMA

In [11]:
def generate_STC_and_EMA(df_in, stc_length, fast_length, slow_length, ema_period):
  
    df = df_in[["Date","Close"]].copy()

    s_stc = STCIndicator(close=df.Close, window_slow=slow_length, window_fast=fast_length, cycle=stc_length).stc()
    s_ema = EMAIndicator(close=df.Close, window=ema_period).ema_indicator()

    df["STC"] = round(s_stc,2)
    df["EMA"] = round(s_ema,2)

    df.drop(columns=["Close"], inplace=True)

    return df

#### ATR sortie & Merge tous indicateurs techniques

In [12]:
def merge_technical_indicators(df_in, atr_l, df1, df2, date_min="1998-01-01"):
    ''' Fusionne les DataFrames d'indicateurs techniques, 
    ajoute également date_min au format 'yyyy-mm-dd' pour fixer le début du Backtesting'''

    df_essentials = df_in.copy()

    # Ajout de la colonne ATR qui servira plus tard dans le calcul de la sortie.
    s_atr = AverageTrueRange(high=df_essentials.High, low=df_essentials.Low, close=df_essentials.Close, window=atr_l).average_true_range()
    df_essentials["ATR"] = pd.Series(s_atr).apply(lambda x: round(x,2))

    # Réduction au strict nécessaire pour les colonnes
    df_essentials = df_essentials[["Date","Open","Close","ATR"]].copy()

    # Merge des 3
    data_frames = [df_essentials, df1, df2]
    df_merged = reduce(lambda  left,right: pd.merge(left,right, on=['Date'], how='left'), data_frames)

    # Réduction de la fenêtre de tests à partir de la date_min
    df_merged = df_merged.loc[df_merged["Date"] >= date_min]

    df_merged.reset_index(drop=True,inplace=True)

    return df_merged

## Entrées / Sorties 

#### Détection des entrées

Stratégie :<br>
<li>Entreée : Buy signal + Prix > EMA + STC < seuil(25)</li>
<li>Sortie : Sell signal + Prix < EMA + STC > seuil(75)</li>

#### Sorties, valeurs & aggrégation

In [13]:
def generate_exit_vars(df_in, entry_index, atr_sl, rr_ratio):
    ''' Obtient l'Open de l'index d'entrée.
    Puis génère la valeur du Stop Loss et Take Profit'''

    df = df_in.copy()

    valeur_entree = df.at[entry_index,"Open"]
    date_entree = df.at[entry_index,"Date"]
    atr_reference = df.at[entry_index,"ATR"]

    # Ajustés pour Stratégie Short
    stop_loss = valeur_entree + atr_sl * atr_reference
    take_profit = valeur_entree - rr_ratio * (atr_sl * atr_reference)

    return date_entree, valeur_entree, stop_loss, take_profit

In [14]:
def generate_exit_vars_Long(df_in, entry_index, atr_sl, rr_ratio):
    ''' Obtient l'Open de l'index d'entrée.
    Puis génère la valeur du Stop Loss et Take Profit'''

    df = df_in.copy()

    valeur_entree = df.at[entry_index,"Open"]
    date_entree = df.at[entry_index,"Date"]
    atr_reference = df.at[entry_index,"ATR"]

    # Ajustés pour Stratégie Long
    stop_loss = valeur_entree - atr_sl * atr_reference
    take_profit = valeur_entree + rr_ratio * (atr_sl * atr_reference)

    return date_entree, valeur_entree, stop_loss, take_profit

In [15]:
def return_SL_or_TP_index(df_in, entry_index, stop_loss, take_profit) :
    '''Serie des valeurs close entre entrée et fin du DataFrame.
    Si Close > Stop loss, dans le cas d'un Short, alors touché.
    On cherche alors l'index de la première valeur >=0, si il existe'''

    df = df_in.copy()

    last_line = max(df.index)

    # Recherche index Stop Loss, ou index dernière ligne df
    # SHORT : Stop Loss touché si Close >= valeur SL
    close_series = (df.loc[entry_index:last_line,"Close"] - stop_loss) >= 0
    if len(close_series[ close_series == True ]) > 0 :
        sl_index = close_series[ close_series == True ].index[0]
    else :
        # Pour la dernière entrée, si rien n'est touché, alors la dernière ligne fera office de sortie
        sl_index = last_line

    # Recherche indexTake Profit
    # SHORT : Take Profit atteint si Close <= valeur TP
    close_series = (take_profit - df.loc[entry_index:last_line,"Close"]) >= 0
    if len(close_series[ close_series == True ]) > 0 :
        tp_index = close_series[ close_series == True ].index[0]
    else :
        tp_index = last_line

    # Sélection du 1er index touché : Stop Loss ou Take Profit
    lowest_index = min(sl_index, tp_index)

    # Pointeur des valeurs Date sortie & Prix de clôture dans le DataFrame indicateurs techniques
    exit_date = df.at[lowest_index,"Date"]
    exit_value = df.at[lowest_index,"Close"]

    return exit_date, exit_value

In [16]:
def return_SL_or_TP_index_Long(df_in, entry_index, stop_loss, take_profit) :
    '''Serie des valeurs close entre entrée et fin du DataFrame.
    Si Close > Stop loss, dans le cas d'un Short, alors touché.
    On cherche alors l'index de la première valeur >=0, si il existe'''

    df = df_in.copy()

    last_line = max(df.index)

    # Recherche index Stop Loss, ou index dernière ligne df
    # LONG : Stop Loss touché si Close <= valeur SL
    close_series = (df.loc[entry_index:last_line,"Close"] - stop_loss) <= 0
    if len(close_series[ close_series == True ]) > 0 :
        sl_index = close_series[ close_series == True ].index[0]
    else :
        # Pour la dernière entrée, si rien n'est touché, alors la dernière ligne fera office de sortie
        sl_index = last_line

    # Recherche indexTake Profit
    # LONG : Take Profit atteint si Close >= valeur TP
    close_series = (take_profit - df.loc[entry_index:last_line,"Close"]) <= 0
    if len(close_series[ close_series == True ]) > 0 :
        tp_index = close_series[ close_series == True ].index[0]
    else :
        tp_index = last_line

    # Sélection du 1er index touché : Stop Loss ou Take Profit
    lowest_index = min(sl_index, tp_index)

    # Pointeur des valeurs Date sortie & Prix de clôture dans le DataFrame indicateurs techniques
    exit_date = df.at[lowest_index,"Date"]
    exit_value = df.at[lowest_index,"Close"]

    return exit_date, exit_value

# Strategy as Class

In [17]:
class Strat_AT_STC_EMA:
  
    def __init__(self, p_ema=200, p_AT_m=1, p_AT_l=14, p_AT_mfi_l = 14, p_AT_mfi_s = 50, p_STC_l=80, p_STC_slow_l=50, p_STC_fast_l=27, p_STC_b=25, p_STC_h=75, p_ATR_SL_l = 14, p_ATR_SL = 2, p_RR_ratio = 3, p_leverage=1):
        self.ema_l = p_ema
        self.at_m = p_AT_m
        self.at_l = p_AT_l
        self.at_mfi_l = p_AT_mfi_l
        self.at_mfi_s = p_AT_mfi_s
        self.stc_l = p_STC_l
        self.stc_s_l = p_STC_slow_l
        self.stc_f_l = p_STC_fast_l
        self.stc_seuil_b = p_STC_b
        self.stc_seuil_h = p_STC_h
        self.ATR_SL_l = p_ATR_SL_l
        self.ATR_SL = p_ATR_SL
        self.RR_ratio = p_RR_ratio
        self.leverage = p_leverage

  
    def make_technical_indicators(self, df_source):
        df_AT = generate_alphatrend(df_source, mfi_p=self.at_mfi_l, mfi_seuil=self.at_mfi_s, atr_l=self.at_l, m=self.at_m)
        df_STC_EMA = generate_STC_and_EMA(df_source, stc_length=self.stc_l, fast_length=self.stc_f_l, slow_length=self.stc_s_l, ema_period=self.ema_l)
        df_Technical_Indicators = merge_technical_indicators(df_source, self.ATR_SL_l, df_AT, df_STC_EMA)
        return df_Technical_Indicators


    def get_entries_signals(self, df_in):
        ''' Nécessite en entrée le DataFrame avec indicateurs techniques.
        L'enrichit avec signaux Entrée Long (1) et Entrée Short (-1).'''

        df_IT = df_in.copy()

        # Valeur 3 pour signaux d'entrée valides
        df_IT["Buy_entry"] = np.sign(df_IT.Close - df_IT.EMA) + df_IT.Signal + np.sign(self.stc_seuil_b - df_IT.STC)
        # Valeur -3 pour signaux d'entrée valides
        # Attention / par deux signaux négatifs -> positif, d'où l'inversion sur un seul champ
        df_IT["Sell_entry"] = np.sign(df_IT.Close - df_IT.EMA) + df_IT.Signal + np.sign(self.stc_seuil_h - df_IT.STC)

        # Conversion en np array
        arr_buy_entry = df_IT["Buy_entry"].to_numpy()
        # np.where(condition, vrai, sinon)
        df_IT["Buy_entry"] = np.where(arr_buy_entry==3, 1, 0)

        arr_sell_entry = df_IT["Sell_entry"].to_numpy()
        df_IT["Sell_entry"] = np.where(arr_sell_entry==-3.0, -1, 0)

        # Agrégation des deux types de signaux.
        df_IT["Entry"] = df_IT["Sell_entry"] + df_IT["Buy_entry"]
        df_IT.drop(columns=["Buy_entry","Sell_entry"], inplace=True)

        return df_IT

  
    def apply_short_strategy(self, df_entries):
        ''' Entrée : DataFrame avec indicateurs techniques généré par get_entries_signals
        Sortie : DataFrame avec entrée - sortie, valeur & date'''

        df = df_entries.copy()
        d_entrees_sorties = {
            "date_entree" : [],
            "valeur_entree" : [],
            "date_sortie" : [],
            "valeur_sortie" : []
        }

        # Liste des entrées : -1 pour signal Short
        short_entries_indexes = df.loc[ df["Entry"]==-1 ].index
        # Ajout d'une unité pour entrée le lendemain du signal confirmé et clos
        short_entries_indexes += 1
        short_entries_indexes = short_entries_indexes.to_list()
        
        # Correction : Si la liste d'entrée est vide -> donne 1 seule entrée, celle de la date du jour
        if not short_entries_indexes :
            short_entries_indexes.append(max(df.index))
        
        # Attention, avec le +=1, si signal le dernier jour on est out of bounds de la liste. Correction
        if short_entries_indexes[-1] > max(df.index):
            short_entries_indexes[-1] = max(df.index)

        # Application des deux fonctions précédentes pour recherche Date + Valeur, d'entrée et sortie
        for entry in short_entries_indexes:
            # Calcul stop loss, take profit, et stocke Date + Open correspondants à l'index d'entrée passé dans la fonction
            entry_date, entry_price, sl, tp = generate_exit_vars(df, entry, self.ATR_SL, self.RR_ratio)
            # Détetion de Date + Close de la sortie, identifée avec Take Profit et stop loss trouvés ligne précédente
            exit_date, exit_price = return_SL_or_TP_index(df, entry, sl, tp)

            # Ajout des résultats dans le dictionnaire
            d_entrees_sorties["date_entree"].append(entry_date)
            d_entrees_sorties["valeur_entree"].append(entry_price)
            d_entrees_sorties["date_sortie"].append(exit_date)
            d_entrees_sorties["valeur_sortie"].append(exit_price)

        df_es = pd.DataFrame(d_entrees_sorties)
        return df_es
  

    def performance_metrics_short(self, df_es):
        ''' Sur la base du DataFrame Entrées/Sorties, génère les performances pour chaque trade, l'équity curve base 1000,
        le Win Rate, Expectancy Ratio, Expectancy, Profit Factor, Gain & Perte moyenne, et copie l'état des paramètres'''

        df = df_es.copy()

        try : 
            # Performance en % pour chaque trade pris (par ligne). Signe - car stratégie Short
            s_perf = round( -((df["valeur_sortie"]-df["valeur_entree"])/df["valeur_entree"]), 3)
            a_perf = np.array(s_perf)

            # Win Rate
            count_won = len( a_perf[a_perf > 0] )
            count_lost = len(a_perf) - count_won
            win_rate = round( count_won/len(a_perf) ,2)

            # Reward-to-Risk Ratio X Win Ratio - Loss Ratio = Expectancy Ratio
            expectancy_ratio = round( self.RR_ratio * count_won/len(a_perf) - count_lost/len(a_perf) ,2)

            # Equity Curve, base 1000
            l_perf = s_perf.to_list()
            perf_nette_b1000 = [round( (999*(1+x*self.leverage)-1000)*0.99 ,2) for x in l_perf]

            # Gain & perte moyenne (arrondi entier)
            arr_b1000 = np.array(perf_nette_b1000)
            moyenne_gains_nets = int( np.mean(arr_b1000[ arr_b1000>0 ]) )
            moyenne_pertes_nettes = int( np.mean(arr_b1000[ arr_b1000<0 ]) )

            # Expectancy (arrondi entier)
            expectancy = int( (count_won/len(a_perf) * moyenne_gains_nets) - (count_lost/len(a_perf) * moyenne_pertes_nettes) )

            # Profit Factor : sommes des gains / somme des pertes
            profit_factor = round( np.sum(arr_b1000[ arr_b1000>0 ]) / np.sum( np.abs(arr_b1000[ arr_b1000<0 ]) ) ,2)
            
            # Agrégation des résultats si DataFrame ES non nul
            row = [win_rate, moyenne_gains_nets, moyenne_pertes_nettes, expectancy, expectancy_ratio, profit_factor, perf_nette_b1000]
        
        except:
            # Si valeurs Nan ou autre générant une erreur dans les métriques
            row = [0, 0, 0, 0, 0, 0, [0]]

        # Tous les attributs de l'object au moment du test
        #d_attr = x.__dict__
        d_attr = self.__dict__
        row.append(d_attr)

        return row
    
    
    def apply_long_strategy(self, df_entries):
        ''' Entrée : DataFrame avec indicateurs techniques généré par get_entries_signals
        Sortie : DataFrame avec entrée - sortie, valeur & date'''

        df = df_entries.copy()
        d_entrees_sorties = {
            "date_entree" : [],
            "valeur_entree" : [],
            "date_sortie" : [],
            "valeur_sortie" : []
        }

        # Liste des entrées : +1 pour signal Long
        short_entries_indexes = df.loc[ df["Entry"]==1 ].index
        # Ajout d'une unité pour entrée le lendemain du signal confirmé et clos
        short_entries_indexes += 1
        short_entries_indexes = short_entries_indexes.to_list()
        
        # Correction : Si la liste d'entrée est vide -> donne 1 seule entrée, celle de la date du jour
        if not short_entries_indexes :
            short_entries_indexes.append(max(df.index))
        
        # Correction : avec le +=1, si signal le dernier jour on est out of bounds de la liste
        if short_entries_indexes[-1] > max(df.index) :
            short_entries_indexes[-1] = max(df.index)

        # Application des deux fonctions précédentes pour recherche Date + Valeur, d'entrée et sortie
        for entry in short_entries_indexes:
            # Calcul stop loss, take profit, et stocke Date + Open correspondants à l'index d'entrée passé dans la fonction
            entry_date, entry_price, sl, tp = generate_exit_vars_Long(df, entry, self.ATR_SL, self.RR_ratio)
            # Détetion de Date + Close de la sortie, identifée avec Take Profit et stop loss trouvés ligne précédente
            exit_date, exit_price = return_SL_or_TP_index_Long(df, entry, sl, tp)

            # Ajout des résultats dans le dictionnaire
            d_entrees_sorties["date_entree"].append(entry_date)
            d_entrees_sorties["valeur_entree"].append(entry_price)
            d_entrees_sorties["date_sortie"].append(exit_date)
            d_entrees_sorties["valeur_sortie"].append(exit_price)

        df_es = pd.DataFrame(d_entrees_sorties)
        return df_es
    
    
    def performance_metrics_long(self, df_es):
        ''' Sur la base du DataFrame Entrées/Sorties, génère les performances pour chaque trade, l'équity curve base 1000,
        le Win Rate, Expectancy Ratio, Expectancy, Profit Factor, Gain & Perte moyenne, et copie l'état des paramètres'''

        df = df_es.copy()

        try :
            # Performance en % pour chaque trade pris (par ligne). Signe - car stratégie Short
            s_perf = round( ((df["valeur_sortie"]-df["valeur_entree"])/df["valeur_entree"]), 3)
            a_perf = np.array(s_perf)

            # Win Rate
            count_won = len( a_perf[a_perf > 0] )
            count_lost = len(a_perf) - count_won
            win_rate = round( count_won/len(a_perf) ,2)

            # Reward-to-Risk Ratio X Win Ratio - Loss Ratio = Expectancy Ratio
            expectancy_ratio = round( self.RR_ratio * count_won/len(a_perf) - count_lost/len(a_perf) ,2)

            # Equity Curve, base 1000
            l_perf = s_perf.to_list()
            # Commission de 1% en entrée et en sortie
            perf_nette_b1000 = [round( (999*(1+x*self.leverage)-1000)*0.99 ,2) for x in l_perf]

            # Gain & perte moyenne (arrondi entier)
            arr_b1000 = np.array(perf_nette_b1000)
            moyenne_gains_nets = int( np.mean(arr_b1000[ arr_b1000>0 ]) )
            moyenne_pertes_nettes = int( np.mean(arr_b1000[ arr_b1000<0 ]) )

            # Expectancy (arrondi entier)
            expectancy = int( (count_won/len(a_perf) * moyenne_gains_nets) - (count_lost/len(a_perf) * moyenne_pertes_nettes) )

            # Profit Factor : sommes des gains / somme des pertes
            profit_factor = round( np.sum(arr_b1000[ arr_b1000>0 ]) / np.sum( np.abs(arr_b1000[ arr_b1000<0 ]) ) ,2)
            
            # Agrégation des résultats si DataFrame ES non nul
            row = [win_rate, moyenne_gains_nets, moyenne_pertes_nettes, expectancy, expectancy_ratio, profit_factor, perf_nette_b1000]
        
        except:
            # Si valeurs Nan ou autre générant une erreur dans les métriques
            row = [0, 0, 0, 0, 0, 0, [0]]

        # Tous les attributs de l'object au moment du test
        #d_attr = x.__dict__
        d_attr = self.__dict__
        row.append(d_attr)

        return row
    

## Test multiple, en parrallèle

In [23]:
print("Number of processors: ", mp.cpu_count())

Number of processors:  64


#### Optimisation short

In [28]:
def mesure_performance(combinaison):
    try : 
        # nouvel objet
        backtest = Strat_AT_STC_EMA()

        # Attribution meilleurs paramètres fixes optimisés Short
        backtest.ATR_SL_l = 26
        backtest.RR_ratio = 5
        backtest.at_m = 0.2
        backtest.at_l = 33

        # Attribution des paramètres variables de test
        backtest.stc_seuil_h = combinaison[0]
        backtest.ema_l = combinaison[1]
        backtest.stc_l = combinaison[2]
        backtest.stc_s_l = combinaison[3]
        backtest.stc_f_l = combinaison[4]
        backtest.at_mfi_s = combinaison[5]
        backtest.at_mfi_l = combinaison[6]

        df_indicateurs_techniques = backtest.make_technical_indicators(df_historical_data)
        df_signaux = backtest.get_entries_signals(df_indicateurs_techniques)
        df_entrees_sorties = backtest.apply_short_strategy(df_signaux)

        ligne_resultat = backtest.performance_metrics_short(df_entrees_sorties)
        return ligne_resultat
    
    except Exception:
        logging.exception("f(%r) failed" % (args,))

In [29]:
st = time.time()

if __name__=="__main__":
    pool = mp.Pool(mp.cpu_count())
    results = pool.map(mesure_performance, [combi for combi in combinaisons])
    pool.close()
    #pool.join()

et = time.time()

#### Optimisation Long

In [31]:
def mesure_performance(combinaison):
    try : 
        # nouvel objet
        backtest = Strat_AT_STC_EMA()

        # Attribution meilleurs paramètres fixes optimisés Short


        # Attribution des paramètres variables de test
        backtest.stc_seuil_b = combinaison[0]
        backtest.ema_l = combinaison[1]
        backtest.stc_l = combinaison[2]
        backtest.stc_s_l = combinaison[3]
        backtest.stc_f_l = combinaison[4]
        backtest.at_mfi_s = combinaison[5]
        backtest.at_mfi_l = combinaison[6]
        backtest.at_l = combinaison[7]
        backtest.at_m = combinaison[8]
        backtest.RR_ratio = combinaison[9]
        backtest.ATR_SL_l = combinaison[10]
        backtest.ATR_SL = combinaison[11]
    
        # Calculs
        df_indicateurs_techniques = backtest.make_technical_indicators(df_historical_data)
        df_signaux = backtest.get_entries_signals(df_indicateurs_techniques)
        df_entrees_sorties = backtest.apply_long_strategy(df_signaux)

        ligne_resultat = backtest.performance_metrics_long(df_entrees_sorties)
        return ligne_resultat
    
    except Exception:
        logging.exception("f(%r) failed" % (args,))

In [None]:
st = time.time()

if __name__=="__main__":
    pool = mp.Pool(48)
    results = pool.map(mesure_performance, [combi for combi in combinaisons])
    pool.close()
    #pool.join()

et = time.time()

#### Ajout colonnes supplémentaires, nettoyage & export

In [None]:
elapsed_time = (et - st)/60
print('Execution time: {} minutes'.format(int(elapsed_time)))

In [33]:
df_optimisation_short = pd.DataFrame(results, columns = ["win_rate", "moyenne_gains_nets", "moyenne_pertes_nettes", "expectancy", "expectancy_ratio", "profit_factor", "equity_curve", "parametres"])
df_optimisation_short.shape

(54, 8)

In [34]:
# On écarte les trades supposés non profitables
df_optimisation_short_best_only = df_optimisation_short.loc[ df_optimisation_short["profit_factor"]>2 ]

In [37]:
def get_nb_trades(liste_trades):
    '''déduit le nombre de trades de la longueur de la liste equity_curve'''
    return len(liste_trades)

In [37]:
s_nb_trades = df_optimisation_short_best_only["equity_curve"].apply(get_nb_trades)
df_optimisation_short_best_only["nb_trades"] = s_nb_trades

In [37]:
df_optimisation_short_best_only["expected_gain"] = df_optimisation_short_best_only["nb_trades"] * df_optimisation_short_best_only["expectancy"]

In [38]:
df_optimisation_short_best_only = df_optimisation_short_best_only.sort_values(by=["expected_gain","profit_factor"], ascending=False)

In [39]:
df_optimisation_short_best_only.shape

(54, 10)

In [40]:
wslib = access_project_or_space()
asset = wslib.save_data("Long_wide.csv", data=str.encode(df_optimisation_short_best_only.to_csv(header=True, index=False)), overwrite=True)
wslib.show(asset)

{'name': 'Bayes_params_detailed_performance.csv',
 'asset_type': 'data_asset',
 'asset_id': '1b841d20-70ea-4aab-95c6-9a00eb31f796',
 'attachment_id': 'b8c6eaf9-d4da-48c2-81e9-c346af44644b',
 'filepath': 'Bayes_params_detailed_performance.csv',
 'data_size': None,
 'mime': 'text/csv',
 'summary': ['created or overwritten file',
             'created data asset',
             'created attachment']}


# Test unitaire

In [19]:
x = Strat_AT_STC_EMA()
# Attribution meilleurs paramètres fixes optimisés Short
x.at_m = 0.2
x.stc_f_l = 25
x.at_l = 36
x.ema_l = 200
x.stc_l = 80
x.stc_s_l = 50

In [20]:
# Attribution des paramètres variables de test
x.ATR_SL_l = 14
x.ATR_SL = 2
x.RR_ratio = 5
x.stc_seuil_b = 25
x.stc_seuil_h = 75

In [21]:
print(x.__dict__)

{'ema_l': 200, 'at_m': 0.2, 'at_l': 36, 'at_mfi_l': 14, 'at_mfi_s': 50, 'stc_l': 80, 'stc_s_l': 50, 'stc_f_l': 25, 'stc_seuil_b': 25, 'stc_seuil_h': 75, 'ATR_SL_l': 14, 'ATR_SL': 2, 'RR_ratio': 5, 'leverage': 1}


In [22]:
df_indicateurs_techniques = x.make_technical_indicators(df_historical_data)
df_signaux = x.get_entries_signals(df_indicateurs_techniques)
df_entrees_sorties = x.apply_long_strategy(df_signaux)

ligne_resultat = x.performance_metrics_long(df_entrees_sorties)

In [None]:
ligne_resultat

# Optimisation Bayesienne

In [19]:
def best_Long(p_ema, p_AT_m, p_AT_l, p_AT_mfi_l, p_AT_mfi_s, p_STC_l, p_STC_slow_l, p_STC_fast_l, p_STC_b, p_ATR_SL_l, p_ATR_SL, p_RR_ratio):
    '''Meilleur Profit Factor pour stratégie Long.
    Tous paramètres sauf Leverage'''
    try :
        # nouvel objet
        bayes = Strat_AT_STC_EMA()

        # Attribution des paramètres
        bayes.ema_l = int(p_ema)
        bayes.at_m = round(p_AT_m ,1)
        bayes.at_l = int(p_AT_l)
        bayes.at_mfi_l = int(p_AT_mfi_l)
        bayes.at_mfi_s =  int(p_AT_mfi_s)
        bayes.stc_l = int(p_STC_l)
        bayes.stc_s_l = int(p_STC_slow_l)
        bayes.stc_f_l = int(p_STC_fast_l)
        bayes.stc_seuil_b = int(p_STC_b)
        bayes.ATR_SL_l = int(p_ATR_SL_l)
        bayes.ATR_SL = round(p_ATR_SL, 1)
        bayes.RR_ratio = round(p_RR_ratio, 1)
   
        # Calculs
        df_indicateurs_techniques = bayes.make_technical_indicators(df_historical_data)
        df_signaux = bayes.get_entries_signals(df_indicateurs_techniques)
        df_entrees_sorties = bayes.apply_long_strategy(df_signaux)

        ligne_resultat = bayes.performance_metrics_long(df_entrees_sorties)
        # Profit Factor
        pf = ligne_resultat[5]
        # Expected Gain
        eg = len(ligne_resultat[6]) * ligne_resultat[3]        
                
        #return pf
        return eg
    
    except Exception:
        logging.exception("f(%r) failed" % (args,))

In [21]:
# Bounded region of parameter space
pbounds = {'p_ema' : (20,130), 
           'p_AT_m' : (0.2,1.6), 
           'p_AT_l' : (6,40), 
           'p_AT_mfi_l' : (10,20), 
           'p_AT_mfi_s' : (50,58), 
           'p_STC_l' : (50,140), 
           'p_STC_slow_l' : (40,130), 
           'p_STC_fast_l' : (10,40), 
           'p_STC_b' : (15,35), 
           'p_ATR_SL_l' : (6,40), 
           'p_ATR_SL' : (1,4), 
           'p_RR_ratio' : (2,6) 
           }

In [22]:
optimizer = BayesianOptimization(
    f=best_Long,
    pbounds=pbounds,
    random_state=1,
    verbose=1
)

In [23]:
optimizer.maximize(
    init_points=10,
    n_iter=100,
)

|   iter    |  target   | p_ATR_SL  | p_ATR_... |  p_AT_l   |  p_AT_m   | p_AT_m... | p_AT_m... | p_RR_r... |  p_STC_b  | p_STC_... |  p_STC_l  | p_STC_... |   p_ema   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [95m3        [0m | [95m7.198e+03[0m | [95m3.629    [0m | [95m36.42    [0m | [95m8.892    [0m | [95m0.2547   [0m | [95m11.7     [0m | [95m57.03    [0m | [95m2.393    [0m | [95m23.42    [0m | [95m38.74    [0m | [95m97.98    [0m | [95m102.3    [0m | [95m54.71    [0m |
| [95m24       [0m | [95m1.001e+04[0m | [95m3.843    [0m | [95m31.67    [0m | [95m17.49    [0m | [95m0.2308   [0m | [95m12.26    [0m | [95m56.96    [0m | [95m4.509    [0m | [95m17.86    [0m | [95m28.85    [0m | [95m95.67    [0m | [95m121.8    [0m | [95m120.2    [0m |
| [95m28       [0m | [95m1.106e+04[0m | [95m4.0      [0m | [9

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


| [95m61       [0m | [95m2.136e+04[0m | [95m4.0      [0m | [95m23.87    [0m | [95m6.0      [0m | [95m0.2      [0m | [95m10.0     [0m | [95m58.0     [0m | [95m6.0      [0m | [95m35.0     [0m | [95m40.0     [0m | [95m50.0     [0m | [95m130.0    [0m | [95m43.85    [0m |


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


| [95m80       [0m | [95m2.221e+04[0m | [95m4.0      [0m | [95m40.0     [0m | [95m6.0      [0m | [95m0.2      [0m | [95m10.0     [0m | [95m58.0     [0m | [95m6.0      [0m | [95m35.0     [0m | [95m40.0     [0m | [95m50.0     [0m | [95m110.2    [0m | [95m36.18    [0m |
| [95m82       [0m | [95m2.45e+04 [0m | [95m4.0      [0m | [95m40.0     [0m | [95m6.0      [0m | [95m0.2      [0m | [95m10.0     [0m | [95m58.0     [0m | [95m6.0      [0m | [95m35.0     [0m | [95m40.0     [0m | [95m50.0     [0m | [95m109.4    [0m | [95m22.87    [0m |


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)




In [24]:
print(optimizer.max)

{'target': 24503.0, 'params': {'p_ATR_SL': 4.0, 'p_ATR_SL_l': 40.0, 'p_AT_l': 6.0, 'p_AT_m': 0.2, 'p_AT_mfi_l': 10.0, 'p_AT_mfi_s': 58.0, 'p_RR_ratio': 6.0, 'p_STC_b': 35.0, 'p_STC_fast_l': 40.0, 'p_STC_l': 50.0, 'p_STC_slow_l': 109.44226754080721, 'p_ema': 22.870218673115115}}


## Optimisations par fonction d'acquisiton

In [25]:
from bayes_opt import UtilityFunction

In [26]:
help(optimizer.space)

Help on TargetSpace in module bayes_opt.target_space object:

class TargetSpace(builtins.object)
 |  TargetSpace(target_func, pbounds, constraint=None, random_state=None, allow_duplicate_points=False)
 |  
 |  Holds the param-space coordinates (X) and target values (Y)
 |  Allows for constant-time appends while ensuring no duplicates are added
 |  
 |  Example
 |  -------
 |  >>> def target_func(p1, p2):
 |  >>>     return p1 + p2
 |  >>> pbounds = {'p1': (0, 1), 'p2': (1, 100)}
 |  >>> space = TargetSpace(target_func, pbounds, random_state=0)
 |  >>> x = space.random_points(1)[0]
 |  >>> y = space.register_point(x)
 |  >>> assert self.max_point()['max_val'] == y
 |  
 |  Methods defined here:
 |  
 |  __contains__(self, x)
 |  
 |  __init__(self, target_func, pbounds, constraint=None, random_state=None, allow_duplicate_points=False)
 |      Parameters
 |      ----------
 |      target_func : function
 |          Function to be maximized.
 |      
 |      pbounds : dict
 |          Dic

<b> Description des fonctions d'optimisation </b>
<p>Choosing the most suitable acquisition function depends on the specific characteristics of your optimization problem. <br>Given that you have an enormous number of combinations (over a trillion), exploration is likely crucial in the initial stages to find promising regions. <br>Therefore, starting with EI or UCB could be beneficial as they inherently balance exploration and exploitation.</p>

<p>EI, being a well-rounded acquisition function, is often a popular choice. <br>It performs well in a variety of scenarios, striking a balance between exploration and exploitation. <br>If your objective function is relatively smooth and free from many local optima, EI might be a good starting point.</p>

<p>UCB, on the other hand, can be more aggressive in exploring uncertain regions. <br>If your objective function is highly noisy or has a complex landscape with <b>many local optima</b>, UCB might help in discovering diverse regions and avoiding premature convergence.</p>

### Par défaut

In [28]:
optimizer_expl = BayesianOptimization(
    f = best_Long,
    pbounds = pbounds,
    random_state = 7,
    verbose = 1,
)

In [28]:
optimizer_expl.maximize(
    init_points = 50,
    n_iter = 1500,
)

Maximum obtenu après 1000 itérations ?!

In [29]:
print(optimizer_expl.max)

{'target': 29859.0, 'params': {'p_ATR_SL': 4.0, 'p_ATR_SL_l': 13.775354832670509, 'p_AT_l': 17.14162730146001, 'p_AT_m': 0.2, 'p_AT_mfi_l': 10.0, 'p_AT_mfi_s': 58.0, 'p_RR_ratio': 6.0, 'p_STC_b': 35.0, 'p_STC_fast_l': 22.307464024675635, 'p_STC_l': 62.12246263882115, 'p_STC_slow_l': 49.03818252282128, 'p_ema': 20.0}}


In [30]:
l_res_default = optimizer_expl.space.res()

In [32]:
df_default = pd.DataFrame(l_res_default, columns=["target","params"])\
            .sort_values(by=["target"], ascending=False)\
            .reset_index(drop=True)

Unnamed: 0,target,params
0,29859.0,"{'p_ATR_SL': 4.0, 'p_ATR_SL_l': 13.77535483267..."


In [32]:
df_default.head(1)

Unnamed: 0,target,params
0,29859.0,"{'p_ATR_SL': 4.0, 'p_ATR_SL_l': 13.77535483267..."


### Upper Confidence Bound

UCB/EXPLORATION

In [40]:
optimizer_UCB_exploration = BayesianOptimization(
    f = best_Long,
    pbounds = pbounds,
    random_state = 2020,
    verbose = 1,
)

In [45]:
'''Common Range: 1e-6 to 1e-2
Explanation: The alpha parameter controls the amount of noise in the observed data. 
Higher values indicate higher noise levels, while lower values indicate less noise.'''
optimizer_UCB_exploration.set_gp_params(alpha=1e-4, kernel=None, n_restarts_optimizer=5)

In [46]:
# Valeur Kappa 10 = maximum Exploration 
# (0.1 = maximum Exploitation)
acquisition_function = UtilityFunction(kind="ucb", kappa=10)

In [47]:
optimizer_UCB_exploration.maximize(
    init_points = 50,
    n_iter = 1500,
    acquisition_function=acquisition_function,
)

In [79]:
l_res_ucb_exploration = optimizer_UCB_exploration.space.res()

In [81]:
df_ucb_exploration = pd.DataFrame(l_res_ucb_exploration, columns=["target","params"])\
            .sort_values(by=["target"], ascending=False)\
            .reset_index(drop=True)

In [81]:
df_ucb_exploration.head(1)

UCB /EXPLOITATION

In [40]:
optimizer_UCB_exploitation = BayesianOptimization(
    f = best_Long,
    pbounds = pbounds,
    random_state = 51,
    verbose = 1,
    allow_duplicate_points = True,
)

In [45]:
'''Common Range: 1e-6 to 1e-2
Explanation: The alpha parameter controls the amount of noise in the observed data. 
Higher values indicate higher noise levels, while lower values indicate less noise.'''
optimizer_UCB_exploitation.set_gp_params(alpha=1e-4, kernel=None, n_restarts_optimizer=5)

In [46]:
# Valeur Kappa : 10 = maximum Exploration 
# 0.1 = maximum Exploitation
acquisition_function = UtilityFunction(kind="ucb", kappa=0.5)

In [47]:
st1 = time.time()

optimizer_UCB_exploitation.maximize(
    init_points = 100,
    n_iter = 1500,
    acquisition_function = acquisition_function,
)

et1 = time.time()

In [78]:
elapsed_time = (et1 - st1)/60
print('Execution time: {} minutes'.format(int(elapsed_time)))

In [79]:
l_res_ucb_exploitation = optimizer_UCB_exploitation.space.res()

In [81]:
df_ucb_exploitation = pd.DataFrame(l_res_ucb_exploitation, columns=["target","params"])\
            .sort_values(by=["target"], ascending=False)\
            .reset_index(drop=True)

In [81]:
df_ucb_exploitation.head(1)

In [52]:
df_ucb = pd.concat([df_ucb_exploration, df_ucb_exploitation])
df_ucb = pd.concat([df_ucb, df_ucb_exploitation, df_default])

### Probability of Improvement

POI/EXPLOITATION

In [68]:
optimizer_POI_exploitation = BayesianOptimization(
    f = best_Long,
    pbounds = pbounds,
    random_state = 64,
    verbose = 1,
    allow_duplicate_points = True,
)

In [71]:
# exploration xi=0.1
# exploitation xi=1e-4
acquisition_function = UtilityFunction(kind="poi", xi=1e-3)

|   iter    |  target   | p_ATR_SL  | p_ATR_... |  p_AT_l   |  p_AT_m   | p_AT_m... | p_AT_m... | p_RR_r... |  p_STC_b  | p_STC_... |  p_STC_l  | p_STC_... |   p_ema   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [95m2        [0m | [95m7.107e+03[0m | [95m2.746    [0m | [95m11.7     [0m | [95m6.995    [0m | [95m0.4922   [0m | [95m10.23    [0m | [95m55.08    [0m | [95m2.898    [0m | [95m22.97    [0m | [95m28.59    [0m | [95m50.18    [0m | [95m43.21    [0m | [95m61.45    [0m |
| [95m9        [0m | [95m1.085e+04[0m | [95m3.6      [0m | [95m22.29    [0m | [95m33.45    [0m | [95m0.2327   [0m | [95m16.02    [0m | [95m50.62    [0m | [95m4.177    [0m | [95m15.98    [0m | [95m32.2     [0m | [95m95.27    [0m | [95m44.24    [0m | [95m22.49    [0m |


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[91mData point [ 3.60047157 22.29117055 33.44694065  0.23270928 16.02398954 50.61865942
  4.17680021 15.9839305  32.19829091 95.27178591 44.23664743 22.48960214] is not unique. 1 duplicates registered. Continuing ...[0m
| [95m104      [0m | [95m1.252e+04[0m | [95m3.944    [0m | [95m33.94    [0m | [95m6.011    [0m | [95m0.4121   [0m | [95m18.15    [0m | [95m50.44    [0m | [95m5.492    [0m | [95m24.07    [0m | [95m33.19    [0m | [95m76.54    [0m | [95m121.7    [0m | [95m48.07    [0m |
| [95m158      [0m | [95m1.295e+04[0m | [95m3.87     [0m | [95m8.165    [0m | [95m21.97    [0m | [95m0.409    [0m | [95m11.47    [0m | [95m56.79    [0m | [95m4.308    [0m | [95m21.6     [0m | [95m37.4     [0m | [95m56.68    [0m | [95m85.94    [0m | [95m58.42    [0m |
| [95m275      [0m | [95m1.382e+04[0m | [95m3.959    [0m | [95m34.53    [0m | [95m6.189    [0m | [95m0.5632   [0m | [95m12.36    [0m | [95m51.71    [0m | [95m5.804    [

In [70]:
optimizer_POI_exploitation.set_gp_params(alpha=1e-3, kernel=None, n_restarts_optimizer=5)

In [71]:
optimizer_POI_exploitation.maximize(
    init_points=100,
    n_iter=1500,
    acquisition_function=acquisition_function
)

|   iter    |  target   | p_ATR_SL  | p_ATR_... |  p_AT_l   |  p_AT_m   | p_AT_m... | p_AT_m... | p_RR_r... |  p_STC_b  | p_STC_... |  p_STC_l  | p_STC_... |   p_ema   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [95m2        [0m | [95m7.107e+03[0m | [95m2.746    [0m | [95m11.7     [0m | [95m6.995    [0m | [95m0.4922   [0m | [95m10.23    [0m | [95m55.08    [0m | [95m2.898    [0m | [95m22.97    [0m | [95m28.59    [0m | [95m50.18    [0m | [95m43.21    [0m | [95m61.45    [0m |
| [95m9        [0m | [95m1.085e+04[0m | [95m3.6      [0m | [95m22.29    [0m | [95m33.45    [0m | [95m0.2327   [0m | [95m16.02    [0m | [95m50.62    [0m | [95m4.177    [0m | [95m15.98    [0m | [95m32.2     [0m | [95m95.27    [0m | [95m44.24    [0m | [95m22.49    [0m |


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[91mData point [ 3.60047157 22.29117055 33.44694065  0.23270928 16.02398954 50.61865942
  4.17680021 15.9839305  32.19829091 95.27178591 44.23664743 22.48960214] is not unique. 1 duplicates registered. Continuing ...[0m
| [95m104      [0m | [95m1.252e+04[0m | [95m3.944    [0m | [95m33.94    [0m | [95m6.011    [0m | [95m0.4121   [0m | [95m18.15    [0m | [95m50.44    [0m | [95m5.492    [0m | [95m24.07    [0m | [95m33.19    [0m | [95m76.54    [0m | [95m121.7    [0m | [95m48.07    [0m |
| [95m158      [0m | [95m1.295e+04[0m | [95m3.87     [0m | [95m8.165    [0m | [95m21.97    [0m | [95m0.409    [0m | [95m11.47    [0m | [95m56.79    [0m | [95m4.308    [0m | [95m21.6     [0m | [95m37.4     [0m | [95m56.68    [0m | [95m85.94    [0m | [95m58.42    [0m |
| [95m275      [0m | [95m1.382e+04[0m | [95m3.959    [0m | [95m34.53    [0m | [95m6.189    [0m | [95m0.5632   [0m | [95m12.36    [0m | [95m51.71    [0m | [95m5.804    [

In [73]:
# Obtention des résultats des itérations
l_poi_exploitation = optimizer_POI_exploitation.space.res()

# Conversion en df + tri ordre décroissant
df_poi_exploitation = pd.DataFrame(l_poi_exploitation, columns=["target","params"])\
            .sort_values(by=["target"], ascending=False)\
            .reset_index(drop=True)

Unnamed: 0,target,params
0,15440.0,"{'p_ATR_SL': 3.607086312033497, 'p_ATR_SL_l': ..."


In [73]:
df_poi_exploitation.head(1)

Unnamed: 0,target,params
0,15440.0,"{'p_ATR_SL': 3.607086312033497, 'p_ATR_SL_l': ..."


#### Agrégation et export

POI/EXPLORATION

In [74]:
optimizer_POI_exploration = BayesianOptimization(
    f = best_Long,
    pbounds = pbounds,
    random_state = 1664,
    verbose = 1,
    allow_duplicate_points = True,
)

In [75]:
optimizer_POI_exploration.set_gp_params(alpha=1e-3, kernel=None, n_restarts_optimizer=5)

In [76]:
# Prefer exploration (xi=0.1)
# exploitation xi=1e-4
acquisition_function_poi = UtilityFunction(kind="poi", xi=0.1)

In [77]:
st2 = time.time()

optimizer_POI_exploration.maximize(
    init_points = 50,
    n_iter = 1500,
    acquisition_function = acquisition_function_poi,
)

et2 = time.time()

|   iter    |  target   | p_ATR_SL  | p_ATR_... |  p_AT_l   |  p_AT_m   | p_AT_m... | p_AT_m... | p_RR_r... |  p_STC_b  | p_STC_... |  p_STC_l  | p_STC_... |   p_ema   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [95m4        [0m | [95m7.164e+03[0m | [95m3.983    [0m | [95m13.94    [0m | [95m31.06    [0m | [95m0.8496   [0m | [95m15.8     [0m | [95m51.54    [0m | [95m5.952    [0m | [95m21.67    [0m | [95m14.54    [0m | [95m123.2    [0m | [95m57.51    [0m | [95m104.7    [0m |
| [95m5        [0m | [95m9.558e+03[0m | [95m3.236    [0m | [95m39.17    [0m | [95m17.69    [0m | [95m1.017    [0m | [95m18.87    [0m | [95m51.02    [0m | [95m4.368    [0m | [95m33.66    [0m | [95m21.44    [0m | [95m67.26    [0m | [95m59.55    [0m | [95m29.65    [0m |
[91mData point [ 3.23584487 39.16510926 17.69355741  1.01734503 18.

In [78]:
elapsed_time = (et2 - st2)/60
print('Execution time: {} minutes'.format(int(elapsed_time)))

In [79]:
l_poi_exploration = optimizer_POI_exploration.space.res()

In [81]:
df_poi_exploration = pd.DataFrame(l_poi_exploration, columns=["target","params"])\
            .sort_values(by=["target"], ascending=False)\
            .reset_index(drop=True)

In [81]:
df_poi_exploration.head(1)

Merge

In [82]:
df_poi = pd.concat([df_poi_exploitation, df_poi_exploration])
df_bayes_concat = pd.concat([df_poi, df_ucb])

In [83]:
wslib = access_project_or_space()
asset = wslib.save_data("Bayes_expl_EG.csv", data=str.encode(df_bayes_concat.to_csv(header=True, index=False)), overwrite=True)
wslib.show(asset)

## Test Unitaire fonctions adaptées Bayes

In [None]:
st = time.time()
a, b = best_Long(p_ema=80, p_AT_m=0.2, p_AT_l=14, p_AT_mfi_l=12, p_AT_mfi_s=54, p_STC_l=100, p_STC_slow_l=90, p_STC_fast_l=15, p_STC_b=20, p_ATR_SL_l=36, p_ATR_SL=3, p_RR_ratio=3)
et = time.time()
elapsed_time = (et - st)

In [39]:
print("Profit Factor : {}\nExpected Gain : {}\nDurée : {:.2f}s".format(a, b, elapsed_time))

Profit Factor : 6.51
Expected Gain : 6968
Durée : 0.55s


## Performance détaillée sur la base des meilleures combinaisons de paramètres obtenus

In [91]:
# Unnest de la colonne params
df_bayes_unnested = df_bayes_concat["params"].apply(pd.Series)

In [92]:
# jointure sur l'index pour retrouver "target"
df_bayes_unnested = df_bayes_concat.join(df_bayes_unnested)

# suppression de l'ancienne colonne params au format json
df_bayes_unnested.drop(columns=["params"],inplace=True)

df_bayes_unnested.sort_values(by=["target"], ascending=False, inplace=True)

In [93]:
df_bayes_unnested.head(1)

Unnamed: 0,target,p_ATR_SL,p_ATR_SL_l,p_AT_l,p_AT_m,p_AT_mfi_l,p_AT_mfi_s,p_RR_ratio,p_STC_b,p_STC_fast_l,p_STC_l,p_STC_slow_l,p_ema
0,29859.0,4.0,13.775355,17.141627,0.2,10.0,58.0,6.0,35.0,22.307464,62.122463,49.038183,20.0


In [94]:
# Réorganisation des colonnes pour correspondre à l'ordre de la classe
df_bayes_unnested = df_bayes_unnested[["target","p_ema","p_AT_l","p_AT_m","p_AT_mfi_l","p_AT_mfi_s","p_STC_l","p_STC_slow_l","p_STC_fast_l","p_STC_b","p_ATR_SL_l","p_ATR_SL","p_RR_ratio"]]

In [95]:
l_params = []

for i in range(0, df_bayes_unnested.shape[0]):
    # Conversion de la ligne en liste de paramètres
    ligne = df_bayes_unnested.iloc[i,1:].to_list()
    l_params.append(ligne)

In [98]:
def mesure_performance_bayes(combinaison):
    try : 
        # nouvel objet
        bayes = Strat_AT_STC_EMA()

        # Attribution des paramètres variables de test
        bayes.ema_l = int(combinaison[0])
        bayes.at_l = int(combinaison[1])
        bayes.at_m = combinaison[2]
        bayes.at_mfi_l = int(combinaison[3])
        bayes.at_mfi_s = int(combinaison[4])
        bayes.stc_l = int(combinaison[5])
        bayes.stc_s_l = int(combinaison[6])
        bayes.stc_f_l = int(combinaison[7])
        bayes.stc_seuil_b = int(combinaison[8])
        bayes.ATR_SL_l = int(combinaison[9])
        bayes.ATR_SL = combinaison[10]
        bayes.RR_ratio = combinaison[11]
    
        # Calculs
        df_indicateurs_techniques = bayes.make_technical_indicators(df_historical_data)
        df_signaux = bayes.get_entries_signals(df_indicateurs_techniques)
        df_entrees_sorties = bayes.apply_long_strategy(df_signaux)

        ligne_resultat = bayes.performance_metrics_long(df_entrees_sorties)
        return ligne_resultat
    
    except Exception:
        logging.exception("f(%r) failed" % (args,))

In [99]:
resultat = []

for combi in l_params :
    resultat.append(mesure_performance_bayes(combi))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [100]:
df_perf_bayes = pd.DataFrame(resultat, columns = ["win_rate", "moyenne_gains_nets", "moyenne_pertes_nettes", "expectancy", "expectancy_ratio", "profit_factor", "equity_curve", "parametres"])
df_perf_bayes.shape

(56938, 8)

In [101]:
# On écarte les trades supposés non profitables
df_perf_bayes_best_only = df_perf_bayes.loc[ df_perf_bayes["profit_factor"]>2 ]

In [102]:
def get_nb_trades(liste_trades):
    '''déduit le nombre de trades de la longueur de la liste equity_curve'''
    return len(liste_trades)

In [103]:
s_nb_trades = df_perf_bayes_best_only["equity_curve"].apply(get_nb_trades)
df_perf_bayes_best_only["nb_trades"] = s_nb_trades

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_perf_bayes_best_only["nb_trades"] = s_nb_trades


In [104]:
df_perf_bayes_best_only["expected_gain"] = df_perf_bayes_best_only["nb_trades"] * df_perf_bayes_best_only["expectancy"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_perf_bayes_best_only["expected_gain"] = df_perf_bayes_best_only["nb_trades"] * df_perf_bayes_best_only["expectancy"]


In [105]:
df_perf_bayes_best_only = df_perf_bayes_best_only.sort_values(by=["expected_gain","profit_factor"], ascending=False)

In [106]:
df_perf_bayes_best_only.shape

(39936, 10)

In [107]:
wslib = access_project_or_space()
asset = wslib.save_data("Bayes_params_detailed_performance_EG.csv", data=str.encode(df_perf_bayes_best_only.to_csv(header=True, index=False)), overwrite=True)
wslib.show(asset)

{'name': 'Bayes_params_detailed_performance_EG.csv',
 'asset_type': 'data_asset',
 'asset_id': '4aca7051-8706-497d-a66f-6d434610d5f1',
 'attachment_id': '1d0a8552-db7c-4ab2-98e9-f9cfd24abe35',
 'filepath': 'Bayes_params_detailed_performance_EG.csv',
 'data_size': None,
 'mime': 'text/csv',
 'summary': ['created or overwritten file',
             'created data asset',
             'created attachment']}


# Tests fonctionnels

Transformation de la colonne params dict en colonnes

In [55]:
df_ucb.head()

Unnamed: 0,target,params
0,20152.0,"{'p_ATR_SL': 3.9731278546206505, 'p_ATR_SL_l':..."
1,18792.0,"{'p_ATR_SL': 3.622195555830843, 'p_ATR_SL_l': ..."
2,17280.0,"{'p_ATR_SL': 3.740846730186993, 'p_ATR_SL_l': ..."
3,16432.0,"{'p_ATR_SL': 3.557398383916127, 'p_ATR_SL_l': ..."
4,16344.0,"{'p_ATR_SL': 3.9709824266133875, 'p_ATR_SL_l':..."


In [56]:
type(df_ucb.iloc[0,1])

dict

In [62]:
df_unnested = df_ucb["params"].apply(pd.Series)

In [66]:
df_ucb_unnested = df_ucb.join(df_unnested)
df_ucb_unnested.drop(columns=["params"],inplace=True)
df_ucb_unnested.sort_values(by=["target"], ascending=False, inplace=True)

In [67]:
df_ucb_unnested.head(1)

Unnamed: 0,target,p_ATR_SL,p_ATR_SL_l,p_AT_l,p_AT_m,p_AT_mfi_l,p_AT_mfi_s,p_RR_ratio,p_STC_b,p_STC_fast_l,p_STC_l,p_STC_slow_l,p_ema
0,29859.0,4.0,13.775355,17.141627,0.2,10.0,58.0,6.0,35.0,22.307464,62.122463,49.038183,20.0
