# H_22082024
# Analisis del volumen profile para encontrar zonas de alta probabilidad y como se comporta el precio en esas zonas.

In [98]:
import pandas as pd
import numpy as np
import MetaTrader5 as mt5
import pytz
from datetime import datetime
import matplotlib.pyplot as plt
import json
import plotly.express as px
import plotly.graph_objects as go

In [99]:
# Variables

STD_MULTIPLIER = 1
ticker = "SI_Z"
start_dt = "2021-05-25"
end_dt = "2024-06-28"
minute_data = "SI_Z_M1_200912010000_202406062259.csv"
daily_data = "SI_Z_Daily_200912010000_202406060000.csv"

## Recopilación de datos, manipulación y limpieza

In [100]:
def data_from_mt5():
    # connect to MetaTrader 5
    if not mt5.initialize():
        print("initialize() failed")
        mt5.shutdown()

    # set time zone to UTC
    timezone = pytz.timezone("Etc/UTC")
    # create 'datetime' objects in UTC time zone to avoid the implementation of a local time zone offset
    utc_from = datetime.strptime(start_dt, "%Y-%m-%d")
    utc_to = datetime.strptime(end_dt, "%Y-%m-%d")#datetime(2024, 8, 1, tzinfo=timezone)
    # request AUDUSD ticks within 11.01.2020 - 11.01.2020
    ohlcv = mt5.copy_rates_range(ticker, mt5.TIMEFRAME_M1, utc_from, utc_to)
    daily_ohlcv = mt5.copy_rates_range(ticker, mt5.TIMEFRAME_D1, utc_from, utc_to)
    print(ohlcv)

    mt5.shutdown()

    df = pd.DataFrame(ohlcv)
    df['time']=pd.to_datetime(df['time'], unit='s')

    # adaptamos el dataframe D1 para luego hacer los analisis de las sesiones
    daily_df = pd.DataFrame(daily_ohlcv)
    daily_df['time']=pd.to_datetime(daily_df['time'], unit='s')

    df = df.set_index('time')
    del df['real_volume']
    del df['spread']

    daily_df = daily_df.set_index('time')
    del daily_df['real_volume']
    del daily_df['spread']

    return df, daily_df

def data_from_csv():
    df = pd.read_csv('C:/Users/iamfr/AlgoTrading/DATA/'+minute_data, sep='\t')
    df['<DATE>'] = pd.to_datetime(df['<DATE>'] + ' ' + df['<TIME>'])
    del df['<TIME>']
    del df['<VOL>']
    del df['<SPREAD>']
    df.columns = ['time', 'open','high', 'low', 'close', 'tick_volume']
    df = df.set_index('time')

    daily_df = pd.read_csv('C:/Users/iamfr/AlgoTrading/DATA/'+daily_data, sep='\t')
    #df['<DATE>'] = pd.to_datetime(df['<DATE>'] + ' ' + df['<TIME>'])
    #del df['<TIME>']
    del daily_df['<VOL>']
    del daily_df['<SPREAD>']
    daily_df.columns = ['time', 'open','high', 'low', 'close', 'tick_volume']
    daily_df = daily_df.set_index('time')
    #daily_df = df.resample('1B').agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'tick_volume': 'sum'})

    return df, daily_df

df, daily_df = data_from_csv()

df

Unnamed: 0_level_0,open,high,low,close,tick_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-12-01 00:00:00,23.00,23.00,22.99,23.00,22
2009-12-01 00:01:00,23.01,23.02,23.00,23.00,67
2009-12-01 00:02:00,23.01,23.01,23.00,23.01,43
2009-12-01 00:03:00,23.01,23.02,23.01,23.01,13
2009-12-01 00:04:00,23.01,23.03,23.01,23.03,57
...,...,...,...,...,...
2024-06-06 22:55:00,31.30,31.30,31.29,31.29,42
2024-06-06 22:56:00,31.29,31.29,31.29,31.29,36
2024-06-06 22:57:00,31.29,31.29,31.29,31.29,21
2024-06-06 22:58:00,31.29,31.30,31.28,31.30,62


In [101]:
def create_market_profile(data, getPOC=True):
    profile = data.groupby('close')['tick_volume'].sum().reset_index()
    total_volume = profile['tick_volume'].sum()
    profile['volume_cumsum'] = profile['tick_volume'].cumsum()

    value_area_cutoff = total_volume * 0.70
    value_area_df = profile[profile['volume_cumsum'] <= value_area_cutoff]
    POC = 0
    if getPOC:
        POC = profile.loc[profile['tick_volume'].idxmax(), 'close']
    else:
        POC = profile.loc[profile['tick_volume'].idxmin(), 'close']

    return profile, value_area_df, POC

def plot_market_profile(profile, value_area_df, POC):
    plt.figure(figsize=(10, 6))
    plt.barh(profile['close'], profile['tick_volume'], color='blue', edgecolor='black')
    plt.barh(value_area_df['close'], value_area_df['tick_volume'], color='green', edgecolor='black')
    plt.axhline(POC, color='red', linestyle='--', label=f'POC: {POC}')

    plt.xlabel('Volume')
    plt.ylabel('Price')
    plt.title(f'Market Profile')
    plt.legend()
    plt.show()

In [102]:
def get_no_fair_range_zone(df, threshold):
    # Crear una máscara booleana para identificar dónde 'tick_volume' es inferior al umbral
    df.sort_values(by=['close'])
    mask = df['tick_volume'] < threshold

    # Encontrar los índices donde empieza y termina cada zona
    rangos = []
    inicio = None
    rsize = 0
    rango_max = []

    for i in range(len(df)):
        if mask[i]:
            if inicio is None:  # Se inicia una nueva zona
                inicio = i
        else:
            if inicio is not None:  # Se cierra la zona actual
                rangos.append([inicio, i - 1])
                inicio = None

    # Si la última zona no se cierra explícitamente en el bucle
    if inicio is not None:
        rangos.append([inicio, len(df) - 1])
                 
    for rango in rangos:
        if rsize <= (rango[1] - rango[0]):  
              rsize = rango[1] - rango[0]
              rango_max = rango

    if len(rangos) < 1:
        return False
    return [df.loc[rango_max[0], 'close'], df.loc[rango_max[1], 'close']]

def get_max_vol_zone(df, threshold):
    # Crear una máscara booleana para identificar dónde 'tick_volume' es inferior al umbral
    df.sort_values(by=['close'])
    mask = df['tick_volume'] > threshold

    # Encontrar los índices donde empieza y termina cada zona
    rangos = []
    inicio = None
    rsize = 0
    rango_max = []

    for i in range(len(df)):
        if mask[i]:
            if inicio is None:  # Se inicia una nueva zona
                inicio = i
        else:
            if inicio is not None:  # Se cierra la zona actual
                rangos.append([inicio, i - 1])
                inicio = None

    # Si la última zona no se cierra explícitamente en el bucle
    if inicio is not None:
        rangos.append([inicio, len(df) - 1])
                 
    for rango in rangos:
        if rsize <= (rango[1] - rango[0]):  
              rsize = rango[1] - rango[0]
              rango_max = rango

    if len(rangos) < 1:
        return False
    return [df.loc[rango_max[0], 'close'], df.loc[rango_max[1], 'close']]

In [103]:
# iteramos cada dia, y cada zona
def get_no_fair_zone_by_day(df):
    out = []

    for index1, day in df.groupby(df.index.date):

        profile, value_area_df, MIN = create_market_profile(day, False)
        mean = profile['tick_volume'].mean()
        stddev = profile['tick_volume'].std()
        threshold = mean + stddev * STD_MULTIPLIER

        no_fair_value_zone = get_no_fair_range_zone(profile, threshold)

        if no_fair_value_zone == False:
            output = {
                "time": index1,
                "min_zone_high": np.nan,
                "min_zone_low": np.nan,
                "MIN": np.nan,
            }
            out.append(output)
        else:
            output = {
                "time": index1,
                "min_zone_high": no_fair_value_zone[1],
                "min_zone_low": no_fair_value_zone[0],
                "MIN": MIN,
            }
            out.append(output)

    return out

# iteramos cada dia, y cada zona
def get_max_vol_zone_by_day(df):
    out = []

    for index1, day in df.groupby(df.index.date):

        profile, value_area_df, POC = create_market_profile(day)
        mean = profile['tick_volume'].mean()
        stddev = profile['tick_volume'].std()
        threshold = mean + stddev * STD_MULTIPLIER

        max_value_zone = get_max_vol_zone(profile, threshold)

        if max_value_zone == False:
            output = {
                "time": index1,
                "max_zone_high": np.nan,
                "max_zone_low": np.nan,
                "POC": np.nan,
            }
            out.append(output)
        else:
            output = {
                "time": index1,
                "max_zone_high": max_value_zone[1],
                "max_zone_low": max_value_zone[0],
                "POC": POC,
            }
            out.append(output)

    return out

## Analisis de datos

In [104]:
#zones = get_no_fair_zone_by_day(df)
max_zones = get_max_vol_zone_by_day(df)
df_max_zones = pd.DataFrame(max_zones)
#df_zones['time'].astype('datetime64[ns]')
df_max_zones = df_max_zones.set_index('time')
df_max_zones.index.astype('datetime64[ns]')

#zones = get_no_fair_zone_by_day(df)
min_zones = get_no_fair_zone_by_day(df)
df_min_zones = pd.DataFrame(min_zones)
#df_zones['time'].astype('datetime64[ns]')
df_min_zones = df_min_zones.set_index('time')
df_min_zones.index.astype('datetime64[ns]')

main_df_2 = pd.merge(df_max_zones, df_min_zones, on=daily_df.index)
main_df_2 = main_df_2.set_index('key_0')
main_df = pd.merge(daily_df, main_df_2, on=daily_df.index)
#daily_df.join(df_zones, on=daily_df.index , how='inner')
main_df = main_df.set_index('key_0')
main_df.dropna()
main_df

#main_df.to_csv( TICKER+'_H22082024_Output_Data.csv', index=True)

Unnamed: 0_level_0,open,high,low,close,tick_volume,max_zone_high,max_zone_low,POC,min_zone_high,min_zone_low,MIN
key_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2009.12.01,23.00,24.26,22.99,24.09,33033,23.64,23.61,23.63,23.53,23.31,23.42
2009.12.02,24.09,24.45,23.94,24.20,32991,24.29,24.29,24.29,24.43,24.30,23.95
2009.12.03,24.20,24.49,23.67,23.87,37566,24.18,24.12,24.14,24.47,24.30,24.46
2009.12.04,23.87,24.06,23.20,23.33,42983,23.29,23.24,23.40,24.04,23.61,23.78
2009.12.05,23.34,23.39,22.97,23.27,10580,23.31,23.29,23.29,23.20,22.98,23.03
...,...,...,...,...,...,...,...,...,...,...,...
2024.05.31,31.19,31.79,30.20,30.39,226013,30.44,30.39,31.22,31.05,30.49,30.81
2024.06.03,30.41,30.82,29.81,30.74,176930,30.54,30.41,30.51,30.40,29.85,29.95
2024.06.04,30.77,30.90,29.38,29.49,205818,29.56,29.51,29.53,30.89,29.98,30.85
2024.06.05,29.50,30.05,29.39,30.00,151927,29.60,29.54,29.58,29.93,29.70,30.04


### SETUP_1. Cierre de rango en la siguiente sesión

Analizaremos si en la siguiente sesión, el precio cierra el rango del dia anterior.

In [105]:
def num1_next_session_close_zone(df):
    results = []

    for i in range(len(df.index)-1):
        if i == 0: pass
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        zone_high = df.iloc[i]['max_zone_high']
        zone_low = df.iloc[i]['max_zone_low']

        # Comprobar si se cumple la condición
        if zone_high <= high and zone_high >= low and zone_low >= low and zone_low <= high:
            results.append(True)  # Cumple la condición
        else:
            results.append(False)  # No cumple la condición

    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    results.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['SETUP_1'] = results

    return df['SETUP_1'].value_counts(normalize=True).mul(100).astype(str)+'%'

print("Porcentaje de cierre de la zona: ", num1_next_session_close_zone(main_df))

Porcentaje de cierre de la zona:  SETUP_1
True      71.72318973418882%
False    28.276810265811182%
Name: proportion, dtype: object


### SETUP_2. Cierre de medio rango en la siguiente sesion

Analizaremos si en la siguiente sesión, el precio cierra la midat del rango.

In [106]:
def num2_next_session_close_half_zone(df):
    results = []

    for i in range(len(df.index) - 1):
        if i == 0: pass
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        zone_high = df.iloc[i]['max_zone_high']
        zone_low = df.iloc[i]['max_zone_low']
        zone_mid = ((zone_high - zone_low) / 2) + zone_low

        if apertura >= zone_high:
            # Mitad superior
            if zone_high <= high and zone_high >= low and zone_mid >= low and zone_mid <= high:
                results.append(True)
            else:
                results.append(False)
        elif apertura <= zone_low:
            # Mitad inferior
            if zone_mid <= high and zone_mid >= low and zone_low >= low and zone_low <= high:
                results.append(True)
            else:
                results.append(False)
        else:
            results.append(False)
    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    results.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['SETUP_2'] = results

    return df['SETUP_2'].value_counts(normalize=True).mul(100).astype(str)+'%'

print("Porcentaje de cierre de media zona: ", num2_next_session_close_half_zone(main_df))

Porcentaje de cierre de media zona:  SETUP_2
True     66.54445462878094%
False    33.45554537121907%
Name: proportion, dtype: object


### SETUP_3. Desde apertura a extremo de zona

Analisis de la cantidad de veces que el precio recorre el rango entre la apertura de la sesion y un valor extremo.

Depende de si la apertura es por debajo o per encima de la zona de alta capitalización, buscaremos largos o cortos.

In [107]:
def num3_from_open_to_range_max_volume(df):
    results = []

    for i in range(len(df.index) - 1):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        zone_high = df.iloc[i]['max_zone_high']
        zone_low = df.iloc[i]['max_zone_low']

        if apertura >= zone_high:
            # Bajista
            if zone_low <= high and zone_low >= low:
                results.append(True)
            else:
                results.append(False)
        elif apertura <= zone_low:
            # Alcista
            if zone_high <= high and zone_high >= low:
                results.append(True)
            else:
                results.append(False)
        else: 
            results.append(False)

    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    results.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['SETUP_3'] = results

    return df['SETUP_3'].value_counts(normalize=True).mul(100).astype(str)+'%'

print("Porcentaje de Apertura-Extremo: ", num3_from_open_to_range_max_volume(main_df))

Porcentaje de Apertura-Extremo:  SETUP_3
True     65.14665444546289%
False    34.85334555453712%
Name: proportion, dtype: object


### SETUP_4. Recorrido a POC anterior session desde apertura 

Analisis de la cantidad de veces que se testea el valor POC de la anterior sesion.

In [108]:
def num4_POC_test(df):
    results = []

    for i in range(len(df.index) - 1):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        poc = df.iloc[i]['POC']

        if apertura >= poc:
            # Bajista
            if poc <= high and poc >= low:
                results.append(True)
            else:
                results.append(False)

        elif apertura <= poc:
            # Alcista
            if poc <= high and poc >= low:
                results.append(True)
            else:
                results.append(False)
        else: 
            results.append(False)

    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    results.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['SETUP_4'] = results

    return df['SETUP_4'].value_counts(normalize=True).mul(100).astype(str)+'%'

print("Porcentaje de test POC: ", num4_POC_test(main_df))

Porcentaje de test POC:  SETUP_4
True     74.47296058661779%
False    25.52703941338222%
Name: proportion, dtype: object


### Nivel de mayor probabilidad estadistica: SETUP_4

A continuación, sacaremos más estadisticas que complementarán el estudio.

In [109]:
def cummulative_not_setup4_true(df):
    results = []
    tmp = 0

    for index, row in df.iterrows():
        if row['SETUP_4'] == False:
            tmp = tmp + 1
        else:
            results.append(tmp)
            tmp = 0

    nparr = np.array(results)

    return nparr.max()

print("Maximos fallos consecutivos de SETUP 4: ", cummulative_not_setup4_true(main_df))

Maximos fallos consecutivos de SETUP 4:  5


In [110]:
def setup4_range_stddev(df):
    df['SETUP_4_range'] = abs(df['POC'] - df['open'])

    true_df = df.query('SETUP_4 == True')
    tmean = true_df['SETUP_4_range'].mean()
    tstddev = true_df['SETUP_4_range'].std()

    false_df = df.query('SETUP_4 == False')
    fmean = false_df['SETUP_4_range'].mean()
    fstddev = false_df['SETUP_4_range'].std()

    return tmean, fmean, tstddev, fstddev

print("Datos de rango MEAN, STDDEV\n", setup4_range_stddev(main_df))

Datos de rango MEAN, STDDEV
 (0.23362461538461535, 0.24350766456266898, 0.3218254059018542, 0.37992880672735735)


In [111]:
main_df

Unnamed: 0_level_0,open,high,low,close,tick_volume,max_zone_high,max_zone_low,POC,min_zone_high,min_zone_low,MIN,SETUP_1,SETUP_2,SETUP_3,SETUP_4,SETUP_4_range
key_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2009.12.01,23.00,24.26,22.99,24.09,33033,23.64,23.61,23.63,23.53,23.31,23.42,False,False,False,False,0.63
2009.12.02,24.09,24.45,23.94,24.20,32991,24.29,24.29,24.29,24.43,24.30,23.95,True,True,True,True,0.20
2009.12.03,24.20,24.49,23.67,23.87,37566,24.18,24.12,24.14,24.47,24.30,24.46,False,False,False,False,0.06
2009.12.04,23.87,24.06,23.20,23.33,42983,23.29,23.24,23.40,24.04,23.61,23.78,True,True,True,False,0.47
2009.12.05,23.34,23.39,22.97,23.27,10580,23.31,23.29,23.29,23.20,22.98,23.03,True,True,True,True,0.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024.05.31,31.19,31.79,30.20,30.39,226013,30.44,30.39,31.22,31.05,30.49,30.81,True,False,False,False,0.03
2024.06.03,30.41,30.82,29.81,30.74,176930,30.54,30.41,30.51,30.40,29.85,29.95,True,True,True,True,0.10
2024.06.04,30.77,30.90,29.38,29.49,205818,29.56,29.51,29.53,30.89,29.98,30.85,True,True,True,True,1.24
2024.06.05,29.50,30.05,29.39,30.00,151927,29.60,29.54,29.58,29.93,29.70,30.04,False,False,False,False,0.08


In [112]:
def low_prob_MIN(df):
    counter = 0

    for i in range(len(df.index) - 2):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        poc = df.iloc[i]['MIN']

        if high >= poc >= low:
            counter = counter + 1

    return (counter / (len(df.index) - 1)) * 100

def low_prob_YL(df):
    counter = 0

    for i in range(len(df.index) - 2):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        yl = df.iloc[i]['low']

        if high >= yl >= low:
            counter = counter + 1

    return (counter / (len(df.index) - 1)) * 100

def low_prob_YH(df):
    counter = 0

    for i in range(len(df.index) - 2):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        yh = df.iloc[i]['high']

        if high >= yh >= low:
            counter = counter + 1

    return (counter / (len(df.index) - 1)) * 100

def low_prob_YO(df):
    counter = 0

    for i in range(len(df.index) - 2):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        yo = df.iloc[i]['open']

        if high >= yo >= low:
            counter = counter + 1

    return (counter / (len(df.index) - 1)) * 100


print("Porcentaje de test MIN Volume: ", low_prob_MIN(main_df))
print("Porcentaje de test YL: ", low_prob_YL(main_df))
print("Porcentaje de test YH: ", low_prob_YH(main_df))
print("Porcentaje de test YO: ", low_prob_YO(main_df))

Porcentaje de test MIN Volume:  49.3696997478799
Porcentaje de test YL:  46.45885858354343
Porcentaje de test YH:  49.62181984872794
Porcentaje de test YO:  53.564061425624566


In [113]:
percentiles_df = main_df.copy()

del percentiles_df['tick_volume']
del percentiles_df['max_zone_high']
del percentiles_df['max_zone_low']
del percentiles_df['min_zone_high']
del percentiles_df['min_zone_low']
del percentiles_df['MIN']
del percentiles_df['SETUP_1']
del percentiles_df['SETUP_2']
del percentiles_df['SETUP_3']
del percentiles_df['SETUP_4']
del percentiles_df['SETUP_4_range']

def POC_percentiles(df):
    perc20 = []
    perc40 = []
    perc60 = []
    perc80 = []
    perc90 = []
    
    for i in range(len(df.index) - 1):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        poc = df.iloc[i]['POC']
        decime = abs(poc - apertura) / 10

        if apertura >= poc:
            # Bajista
            if apertura-(decime*2) <= high and apertura-(decime*2) >= low:
                perc20.append(True)
            else:
                perc20.append(False)

            if apertura-(decime*4) <= high and apertura-(decime*4) >= low:
                perc40.append(True)
            else:
                perc40.append(False)

            if apertura-(decime*6) <= high and apertura-(decime*6) >= low:
                perc60.append(True)
            else:
                perc60.append(False)

            if apertura-(decime*8) <= high and apertura-(decime*8) >= low:
                perc80.append(True)
            else:
                perc80.append(False)

            if apertura-(decime*9) <= high and apertura-(decime*9) >= low:
                perc90.append(True)
            else:
                perc90.append(False)

        elif apertura <= poc:
            # Alcista
            if apertura+(decime*2) <= high and apertura+(decime*2) >= low:
                perc20.append(True)
            else:
                perc20.append(False)

            if apertura+(decime*4) <= high and apertura+(decime*4) >= low:
                perc40.append(True)
            else:
                perc40.append(False)

            if apertura+(decime*6) <= high and apertura+(decime*6) >= low:
                perc60.append(True)
            else:
                perc60.append(False)

            if apertura+(decime*8) <= high and apertura+(decime*8) >= low:
                perc80.append(True)
            else:
                perc80.append(False)

            if apertura+(decime*9) <= high and apertura+(decime*9) >= low:
                perc90.append(True)
            else:
                perc90.append(False)
        else: 
            perc20.append(False)
            perc40.append(False)
            perc60.append(False)
            perc80.append(False)
            perc90.append(False)

    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    perc20.append(False)
    perc40.append(False)
    perc60.append(False)
    perc80.append(False)
    perc90.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['POC 20%'] = perc20
    df['POC 40%'] = perc40
    df['POC 60%'] = perc60
    df['POC 80%'] = perc80
    df['POC 90%'] = perc90

    p20 = df['POC 20%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p40 = df['POC 40%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p60 = df['POC 60%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p80 = df['POC 80%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p90 = df['POC 90%'].value_counts(normalize=True).mul(100).astype(str)+'%'

    return p20, p40, p60, p80, p90

print("Porcentaje de test POC: ", POC_percentiles(percentiles_df))

Porcentaje de test POC:  (POC 20%
True     93.30889092575619%
False    6.691109074243813%
Name: proportion, dtype: object, POC 40%
True     87.76351970669111%
False    12.23648029330889%
Name: proportion, dtype: object, POC 60%
True       82.4243813015582%
False    17.575618698441797%
Name: proportion, dtype: object, POC 80%
True     77.86434463794684%
False    22.13565536205316%
Name: proportion, dtype: object, POC 90%
True     75.59578368469295%
False    24.40421631530706%
Name: proportion, dtype: object)


In [114]:
#main_df.to_csv("OUTPUT.csv")

## Relación entre el Point Of Control (POC) y la zona de mayor transito de ticks (PHF).

In [115]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from datetime import timedelta

In [116]:
# connect to MetaTrader 5
if not mt5.initialize():
    print("initialize() failed")
    mt5.shutdown()

main_df = main_df.loc[start_dt:]
main_df['PHF'] = np.NaN # Create new column to allocate data
# iterate main_df for adding zone to data
for index, row in main_df.iterrows():
    dt1 = datetime.strptime(index.replace('.', '-'), "%Y-%m-%d")
    dt2 = dt1 + timedelta(hours=23, minutes=59, seconds=59) 
    
    ticks = mt5.copy_ticks_range(ticker, dt1, dt2, mt5.COPY_TICKS_ALL)
    # create DataFrame out of the obtained data
    df = pd.DataFrame(ticks)
    # convert time in seconds into the datetime format
    df['time']=pd.to_datetime(df['time'], unit='s')

    df = df.set_index('time')
    del df['last']
    del df['flags']
    del df['volume']
    del df['time_msc']
    del df['volume_real']

    flag_full_session = True #Flag for analising full day, not only the opening
    zone_output = 0 # Output from K-Means algorithm
    for index1, day in df.groupby(df.index.date):
        # Iterating each session
        start_session = pd.to_datetime(index1.strftime('%Y-%m-%d') + ' ' + '15:30:00')
        end_session = pd.to_datetime(index1.strftime('%Y-%m-%d') + ' ' + '16:00:00')

        # DF contains the session to analise.
        if flag_full_session:
            refdf = day
        else:
            refdf = day.loc[start_session:end_session]
        
        # create a MinMaxScaler object
        scaler = MinMaxScaler()

        # fit and transform the data
        normalized_data = scaler.fit_transform(refdf)

        # create a new DataFrame with the normalized data
        ndf = pd.DataFrame(normalized_data, columns=refdf.columns)

        ndf['ask'] = ndf['ask'].fillna(ndf['bid'])
        ndf['bid'] = ndf['bid'].fillna(ndf['ask'])

        ndf_to_matrix = ndf.values
        # Preparing data for clustering: Normalize time and price to have similar scales
        X_time = np.linspace(0, 1, len(ndf_to_matrix)).reshape(-1, 1)
        X_price = (ndf['ask'].values - np.min(ndf['ask'])) / (np.max(ndf['ask']) - np.min(ndf['ask']))
        X_cluster = np.column_stack((X_time, X_price))

        # Applying KMeans clustering
        num_clusters = 1
        kmeans = KMeans(n_clusters=num_clusters)
        kmeans.fit(ndf_to_matrix)

        # Extract cluster centers and rescale back to original price range
        cluster_centers = kmeans.cluster_centers_[:, 1] * (np.max(refdf['ask']) - np.min(refdf['ask'])) + np.min(refdf['ask'])
        
        zones = cluster_centers.tolist()
        #output = {
        #    "date": index1.strftime('%Y-%m-%d'),
        #    "zones": zones,
        #}
        zone_output = cluster_centers

    # Add output to main_df
    main_df.loc[index, 'PHF'] = zone_output

mt5.shutdown()
main_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  main_df['PHF'] = np.NaN # Create new column to allocate data
  X_price = (ndf['ask'].values - np.min(ndf['ask'])) / (np.max(ndf['ask']) - np.min(ndf['ask']))
  X_price = (ndf['ask'].values - np.min(ndf['ask'])) / (np.max(ndf['ask']) - np.min(ndf['ask']))


Unnamed: 0_level_0,open,high,low,close,tick_volume,max_zone_high,max_zone_low,POC,min_zone_high,min_zone_low,MIN,SETUP_1,SETUP_2,SETUP_3,SETUP_4,SETUP_4_range,PHF
key_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021.01.01,27.21,27.30,27.05,27.23,6445,27.18,27.18,27.13,27.30,27.19,27.30,False,False,False,False,0.08,0.000000
2021.01.04,27.49,28.49,27.45,28.05,83209,28.27,28.23,28.41,28.11,27.55,27.74,True,True,True,True,0.92,0.000000
2021.01.05,28.06,28.44,27.63,28.35,76410,28.22,28.18,28.20,28.07,27.71,28.41,True,True,True,True,0.14,0.000000
2021.01.06,28.36,28.86,27.41,27.73,119269,28.23,28.17,28.20,28.84,28.39,28.79,True,True,True,True,0.16,0.000000
2021.01.07,27.73,28.23,27.69,27.96,76402,27.93,27.91,27.87,28.22,28.01,27.70,True,True,True,True,0.14,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024.05.31,31.19,31.79,30.20,30.39,226013,30.44,30.39,31.22,31.05,30.49,30.81,True,False,False,False,0.03,31.948410
2024.06.03,30.41,30.82,29.81,30.74,176930,30.54,30.41,30.51,30.40,29.85,29.95,True,True,True,True,0.10,31.309603
2024.06.04,30.77,30.90,29.38,29.49,205818,29.56,29.51,29.53,30.89,29.98,30.85,True,True,True,True,1.24,30.904280
2024.06.05,29.50,30.05,29.39,30.00,151927,29.60,29.54,29.58,29.93,29.70,30.04,False,False,False,False,0.08,30.585494


In [117]:
main_df = main_df.drop(main_df[main_df.PHF <= 0.00].index)
main_df

Unnamed: 0_level_0,open,high,low,close,tick_volume,max_zone_high,max_zone_low,POC,min_zone_high,min_zone_low,MIN,SETUP_1,SETUP_2,SETUP_3,SETUP_4,SETUP_4_range,PHF
key_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021.05.24,28.34,28.68,28.20,28.51,44052,28.50,28.43,28.46,28.42,28.26,28.34,True,True,True,True,0.12,27.867188
2021.05.25,28.52,28.83,28.20,28.72,63677,28.70,28.68,28.76,28.62,28.44,28.56,True,True,True,True,0.24,27.890444
2021.05.26,28.72,29.02,28.47,28.59,59534,28.60,28.59,28.63,28.87,28.73,28.56,True,True,True,False,0.09,28.168940
2021.05.27,28.59,28.60,28.23,28.50,54289,28.45,28.40,28.42,28.39,28.24,28.24,True,True,True,True,0.17,27.837756
2021.05.28,28.51,28.70,28.12,28.70,59933,28.61,28.56,28.57,28.49,28.14,28.69,True,True,True,True,0.06,27.889249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024.05.31,31.19,31.79,30.20,30.39,226013,30.44,30.39,31.22,31.05,30.49,30.81,True,False,False,False,0.03,31.948410
2024.06.03,30.41,30.82,29.81,30.74,176930,30.54,30.41,30.51,30.40,29.85,29.95,True,True,True,True,0.10,31.309603
2024.06.04,30.77,30.90,29.38,29.49,205818,29.56,29.51,29.53,30.89,29.98,30.85,True,True,True,True,1.24,30.904280
2024.06.05,29.50,30.05,29.39,30.00,151927,29.60,29.54,29.58,29.93,29.70,30.04,False,False,False,False,0.08,30.585494


In [118]:
def num5_PHF_test(df):
    results = []

    for i in range(len(df.index) - 1):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        phf = df.iloc[i]['PHF']

        if apertura >= phf:
            # Bajista
            if phf <= high and phf >= low:
                results.append(True)
            else:
                results.append(False)

        elif apertura <= phf:
            # Alcista
            if phf <= high and phf >= low:
                results.append(True)
            else:
                results.append(False)
        else: 
            results.append(False)

    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    results.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['SETUP_5'] = results

    return df['SETUP_5'].value_counts(normalize=True).mul(100).astype(str)+'%'

print("Porcentaje de test POC: ", num5_PHF_test(main_df))

Porcentaje de test POC:  SETUP_5
False     75.25510204081633%
True     24.744897959183675%
Name: proportion, dtype: object


In [119]:
def cummulative_not_setup5_true(df):
    results = []
    tmp = 0

    for index, row in df.iterrows():
        if row['SETUP_4'] == False:
            tmp = tmp + 1
        else:
            results.append(tmp)
            tmp = 0

    nparr = np.array(results)

    return nparr.max()

print("Maximos fallos consecutivos de SETUP 5: ", cummulative_not_setup5_true(main_df))

Maximos fallos consecutivos de SETUP 5:  4


In [120]:
#Calculate crossed probability of SETUP 4 || 5
def cross_prob_setup4_OR_5(df):
    results = []
    tmp = 0

    for index, row in df.iterrows():
        if row['SETUP_4'] == True or row['SETUP_5'] == True:
            tmp = tmp + 1
    
    return (tmp/len(df))*100

#Calculate crossed probability of SETUP 4 && 5
def cross_prob_setup4_AND_5(df):
    results = []
    tmp = 0

    for index, row in df.iterrows():
        if row['SETUP_4'] == True and row['SETUP_5'] == True:
            tmp = tmp + 1
    
    return (tmp/len(df))*100

print("Probabilidad de SETUP 4 or SETUP 5: ", cross_prob_setup4_OR_5(main_df))
print("Probabilidad de SETUP 4 and SETUP 5: ", cross_prob_setup4_AND_5(main_df))

Probabilidad de SETUP 4 or SETUP 5:  81.25
Probabilidad de SETUP 4 and SETUP 5:  18.239795918367346


In [121]:
percentiles_phf_df = main_df.copy()

del percentiles_phf_df['tick_volume']
del percentiles_phf_df['max_zone_high']
del percentiles_phf_df['max_zone_low']
del percentiles_phf_df['min_zone_high']
del percentiles_phf_df['min_zone_low']
del percentiles_phf_df['MIN']
del percentiles_phf_df['SETUP_1']
del percentiles_phf_df['SETUP_2']
del percentiles_phf_df['SETUP_3']
del percentiles_phf_df['SETUP_4']
del percentiles_phf_df['SETUP_4_range']
del percentiles_phf_df['SETUP_5']

def PHF_percentiles(df):
    perc20 = []
    perc40 = []
    perc60 = []
    perc80 = []
    perc90 = []
    
    for i in range(len(df.index) - 1):
        apertura = df.iloc[i+1]['open']
        high = df.iloc[i+1]['high']
        low = df.iloc[i+1]['low']
        poc = df.iloc[i]['PHF']
        decime = abs(poc - apertura) / 10

        if apertura >= poc:
            # Bajista
            if apertura-(decime*2) <= high and apertura-(decime*2) >= low:
                perc20.append(True)
            else:
                perc20.append(False)

            if apertura-(decime*4) <= high and apertura-(decime*4) >= low:
                perc40.append(True)
            else:
                perc40.append(False)

            if apertura-(decime*6) <= high and apertura-(decime*6) >= low:
                perc60.append(True)
            else:
                perc60.append(False)

            if apertura-(decime*8) <= high and apertura-(decime*8) >= low:
                perc80.append(True)
            else:
                perc80.append(False)

            if apertura-(decime*9) <= high and apertura-(decime*9) >= low:
                perc90.append(True)
            else:
                perc90.append(False)

        elif apertura <= poc:
            # Alcista
            if apertura+(decime*2) <= high and apertura+(decime*2) >= low:
                perc20.append(True)
            else:
                perc20.append(False)

            if apertura+(decime*4) <= high and apertura+(decime*4) >= low:
                perc40.append(True)
            else:
                perc40.append(False)

            if apertura+(decime*6) <= high and apertura+(decime*6) >= low:
                perc60.append(True)
            else:
                perc60.append(False)

            if apertura+(decime*8) <= high and apertura+(decime*8) >= low:
                perc80.append(True)
            else:
                perc80.append(False)

            if apertura+(decime*9) <= high and apertura+(decime*9) >= low:
                perc90.append(True)
            else:
                perc90.append(False)
        else: 
            perc20.append(False)
            perc40.append(False)
            perc60.append(False)
            perc80.append(False)
            perc90.append(False)

    # Agregar un 0 adicional al final para igualar el tamaño de la columna con el dataframe original
    perc20.append(False)
    perc40.append(False)
    perc60.append(False)
    perc80.append(False)
    perc90.append(False)

    # Crear una nueva columna en el dataframe con los resultados
    df['PHF 20%'] = perc20
    df['PHF 40%'] = perc40
    df['PHF 60%'] = perc60
    df['PHF 80%'] = perc80
    df['PHF 90%'] = perc90

    p20 = df['PHF 20%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p40 = df['PHF 40%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p60 = df['PHF 60%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p80 = df['PHF 80%'].value_counts(normalize=True).mul(100).astype(str)+'%'
    p90 = df['PHF 90%'].value_counts(normalize=True).mul(100).astype(str)+'%'

    return p20, p40, p60, p80, p90

print("Porcentajes percentiles de PHF: ", PHF_percentiles(percentiles_phf_df))

Porcentajes percentiles de PHF:  (PHF 20%
True     72.5765306122449%
False    27.4234693877551%
Name: proportion, dtype: object, PHF 40%
True     53.69897959183674%
False    46.30102040816326%
Name: proportion, dtype: object, PHF 60%
False    59.183673469387756%
True     40.816326530612244%
Name: proportion, dtype: object, PHF 80%
False     69.00510204081633%
True     30.994897959183675%
Name: proportion, dtype: object, PHF 90%
False    71.42857142857143%
True     28.57142857142857%
Name: proportion, dtype: object)


In [122]:
export_csv_df = main_df.copy()

del export_csv_df['close']
del export_csv_df['open']
del export_csv_df['high']
del export_csv_df['tick_volume']
del export_csv_df['max_zone_high']
del export_csv_df['max_zone_low']
del export_csv_df['low']
del export_csv_df['min_zone_high']
del export_csv_df['min_zone_low']
del export_csv_df['MIN']
del export_csv_df['SETUP_1']
del export_csv_df['SETUP_2']
del export_csv_df['SETUP_3']
del export_csv_df['SETUP_4']
del export_csv_df['SETUP_4_range']
del export_csv_df['SETUP_5']

#export_csv_df.to_csv('df_output.csv')						

## Backtesting