In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy import stats

def xml_2_xlsx(path):
    
    f = open(path, encoding='utf-8')
    xml_data = f.read()
    f.close()

    root = ET.fromstring(xml_data)

    all = []
    col = []
    for i in range(len(root[2][0])):
        for n in range(len(root[2][0][0])):
            if root[2][0][i][n][0].text==None:
                col.append(root[2][0][i][n][0].text)    
            else: col.append(root[2][0][i][n][0].text.strip())
        all.append(col)
        col = []

    df = pd.DataFrame(all)
    df = df.set_axis(df.iloc[0,:], axis=1)
    df = df.drop(0, axis=0)
    df=df.astype({'Trades': int})
    df=df[df.Trades>0]
    dict={'Pass':int, 'Result': float, 'Profit': float, 'Expected Payoff': float, 'Profit Factor': float,
       'Recovery Factor': float, 'Sharpe Ratio': float, 'Custom': float, 'Equity DD %': float, 'Trades': int,
       'SlFactor': float, 'TpFactor': float, 'atrPeriod': int, 'delta': float, 'option': int, 'fastEmaPeriod': int,
       'slowEMAPeriod': int, 'vwapZoneMultiplyer': int}

    df=df.astype(dict)
    return df 





"""Ratio list variable toma 2 pesos relativos, el primero es resultado > 0 respecto al total, la segunda es resultado > 1 respecto al total, """


# iterate over files in the folder directory
# return a list of df with all the data of the xml files
def file_XLM_to_df(directory,cleaned=True):
    DataFrameDict={}
    CleanDataFrameDict={}
    for filename in os.listdir(directory):
        F = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(F): 
            if F[-4:]==".xml":
            
                DF = xml_2_xlsx(F)
                DataFrameDict[F]=DF
                print(f"Original data length: {len(DF)}")
                DF=DF.groupby("Profit").agg({'Result':np.mean, 
                        'Expected Payoff':np.mean, 
                        'Profit Factor':np.mean,
                        'Recovery Factor':np.mean, 
                        'Sharpe Ratio':np.mean,
                        'Custom': np.mean, 
                        'Equity DD %':np.mean, 
                        'Trades': lambda x: stats.mode(x)[0][0], 
                        'SlFactor':np.mean,
                        'TpFactor':np.mean, 
                        'atrPeriod':lambda x: stats.mode(x)[0][0], 
                        'delta':np.mean, 
                        'option':lambda x: stats.mode(x)[0][0], 
                        'fastEmaPeriod':lambda x: stats.mode(x)[0][0],
                        'slowEMAPeriod':lambda x: stats.mode(x)[0][0], 
                        'vwapZoneMultiplyer':lambda x: stats.mode(x)[0][0]})
                DF['delta']= DF['delta'].round(2)
                DF=DF.sort_values(by=["Result"],ascending=False)
                DF=DF.reset_index()
                print(f"Cleaned data lenght: {len(DF)}")
                print(f"******************************** file  {F} cleaned *********************************")
                CleanDataFrameDict[F]=DF
               
    if cleaned:
        return CleanDataFrameDict
    else: return DataFrameDict
Directory = "2015"

_2015data=file_XLM_to_df("2015")
_2016data=file_XLM_to_df("2016")
_2017data=file_XLM_to_df("2017")
_2018data=file_XLM_to_df("2018")
_2019data=file_XLM_to_df("2019")
_2020data=file_XLM_to_df("2020")



Original data length: 5256
Cleaned data lenght: 4095
******************************** file  2015\2015W40.xml cleaned *********************************
Original data length: 4090
Cleaned data lenght: 3281
******************************** file  2015\2015W41.xml cleaned *********************************
Original data length: 4683
Cleaned data lenght: 3796
******************************** file  2015\2015W42.xml cleaned *********************************
Original data length: 4348
Cleaned data lenght: 2828
******************************** file  2015\2015W43.xml cleaned *********************************
Original data length: 4690
Cleaned data lenght: 4017
******************************** file  2015\2015W44.xml cleaned *********************************
Original data length: 4292
Cleaned data lenght: 3151
******************************** file  2015\2015W45.xml cleaned *********************************
Original data length: 5066
Cleaned data lenght: 4169
******************************** file  20

KeyboardInterrupt: 

In [None]:
_AllData = {**_2015data,**_2016data,**_2017data,**_2018data,**_2019data,**_2020data}
allData_df = pd.DataFrame()
total = 0
for i in _AllData:
    total += len(_AllData[i])
    allData_df = pd.concat([allData_df, _AllData[i][_AllData[i].Result>1]], ignore_index=True)
print(total)
allData_df

In [None]:

data={ 'Result': float, 'Custom': float, 'Trades': int,
       'SlFactor': float, 'TpFactor': float, 'atrPeriod': int, 'delta': float, 'option': int, 'fastEmaPeriod': int,
       'slowEMAPeriod': int, 'vwapZoneMultiplyer': int}

axis0=0
fig,ax = plt.subplots(len(data),figsize=(15, 50))
  
for column in data:
    if column=="delta":
        ax[axis0].hist(allData_df[allData_df.option >0][column],bins=100,label=i[5:12]+" "+column)
        ax[axis0].set_title(column)
    else:    
        ax[axis0].hist(allData_df[column],bins=100,label=i[5:12]+" "+column)
        ax[axis0].set_title(column)

    axis0+=1

In [None]:
_2015data["2015\\2015W41.xml"]


In [None]:
_2015Best={}
_2016Best={}
_2017Best={}
_2018Best={}
_2019Best={}
_2020Best={}


for i in _2015data:
    _2015Best[i[5:-4]]=_2015data[i][_2015data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2016data:
    _2016Best[i[5:-4]]=_2016data[i][_2016data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2017data:
    _2017Best[i[5:-4]]=_2017data[i][_2017data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2018data:
    _2018Best[i[5:-4]]= _2018data[i][_2018data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2019data:
    _2019Best[i[5:-4]]=_2019data[i][_2019data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2020data:
    _2020Best[i[5:-4]]=_2020data[i][_2020data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    


In [None]:
_2015BestMean={}
_2016BestMean={}
_2017BestMean={}
_2018BestMean={}
_2019BestMean={}
_2020BestMean={}


for i in _2015data:
    _2015BestMean[i[5:-4]]=_2015data[i][_2015data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2016data:
    _2016BestMean[i[5:-4]]=_2016data[i][_2016data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2017data:
    _2017BestMean[i[5:-4]]=_2017data[i][_2017data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2018data:
    _2018BestMean[i[5:-4]]= _2018data[i][_2018data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2019data:

    top10=_2019data[i].iloc[:30,:]
    data={'Pass':int, 'Result': float, 'Profit': float, 'Expected Payoff': float, 'Profit Factor': float,
       'Recovery Factor': float, 'Sharpe Ratio': float, 'Custom': float, 'Equity DD %': float, 'Trades': int,
       'SlFactor': float, 'TpFactor': float, 'atrPeriod': int, 'delta': float, 'option': int, 'fastEmaPeriod': int,
       'slowEMAPeriod': int, 'vwapZoneMultiplyer': int}
    
    
    _2019BestMean[i[5:-4]]=_2019data[i][_2019data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    
for i in _2020data:
    _2020BestMean[i[5:-4]]=_2020data[i][_2020data[i].Trades>30].iloc[0,:]
    print(f"*****************************\nFile {i}")
    


In [None]:
_AllBestData = {**_2015Best,**_2016Best,**_2017Best,**_2018Best,**_2019Best,**_2020Best}

indexList =[]
index1List=[]
index2List=[]
for i in _AllBestData:
    index1List.append(i[:5])
    index2List.append(i[5:])
    indexList.append(i)

index=[np.array(index1List),np.array(index2List)]
allBestData_df = pd.DataFrame()

for i in _AllBestData:
    
    allBestData_df = pd.concat([allBestData_df, _AllBestData[i]], ignore_index=True,axis=1)

new_index={}
count=0
for i in allBestData_df.columns:
    new_index[i]=indexList[count]
    count+=1

allBestData_df=allBestData_df.rename(new_index,axis=1)
allBestData_df

In [None]:
"""definimos un funcion que a partir de dos listas saque la diferencia y devuelva un numpy array para sacar su norma su norma

"""
def normOfDiference(list1,list2):
    arr1=np.array(list1)
    arr2=np.array(list2)
    dif=np.subtract(arr1,arr2)
    return round(np.linalg.norm(dif),3)


    
    
    

In [None]:
_2015df=pd.DataFrame(_2015Best)

_2016df=pd.DataFrame(_2016Best)

_2017df=pd.DataFrame(_2017Best)

_2018df=pd.DataFrame(_2018Best)

_2019df=pd.DataFrame(_2019Best)

_2020df=pd.DataFrame(_2020Best)

allBestData_df=pd.concat([_2015df,_2016df,_2017df,_2018df,_2019df,_2020df],axis=1)



list1=allBestData_df.iloc[-8:,0].to_list()
list2=allBestData_df.iloc[-8:,1].to_list()

print(f"Norm: {normOfDiference(list1,list2)}")
allBestData_df

In [None]:
allBestData_df.iloc[-8:,:]

In [None]:
_2015df

In [None]:
len(_2015data)
count=0
for column in data:
    print(f"****** {count} ******")
    print(_2015data["2015\\2015W41.xml"][column])
    count+=1

In [None]:

data={ 'Result': float, 'Custom': float, 'Trades': int,
       'SlFactor': float, 'TpFactor': float, 'atrPeriod': int, 'delta': float, 'option': int, 'fastEmaPeriod': int,
       'slowEMAPeriod': int, 'vwapZoneMultiplyer': int}
axis1=0
axis0=0
fig,ax = plt.subplots(len(_2015data),len(data),figsize=(100, 100))
print(len(_2015data),len(data))
for i in _2015data:    
    for column in data:
        ax[axis0,axis1].hist(_2015data[i][column],bins=10,label=i[5:12]+" "+column)
        ax[axis0,axis1].set_title(i[6:])
#         print((axis0,axis1))
        axis1+=1
#         print(i[5:13]+" "+column)
        
#     print("******************************************")
    axis0+=1
    axis1=0
    

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
ax.hist(x=_2015df.T["option"],bins=10,label="2015")
ax.hist(x=_2016df.T["option"],bins=10,label="2016")
ax.hist(x=_2017df.T["option"],bins=10,label="2017")
ax.hist(x=_2018df.T["option"],bins=10,label="2018")
ax.hist(x=_2019df.T["option"],bins=10,label="2019")
ax.hist(x=_2020df.T["option"],bins=10,label="2020")
ax.legend()

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
ax.bar(x=_2015df.T.index.tolist(),height=_2015df.T["Result"],label="2015")
ax.bar(x=_2016df.T.index.tolist(),height=_2016df.T["Result"],label="2016")
ax.bar(x=_2017df.T.index.tolist(),height=_2017df.T["Result"],label="2017")
ax.bar(x=_2018df.T.index.tolist(),height=_2018df.T["Result"],label="2018")
ax.bar(x=_2019df.T.index.tolist(),height=_2019df.T["Result"],label="2019")
ax.bar(x=_2020df.T.index.tolist(),height=_2020df.T["Result"],label="2020")
ax.legend()

In [None]:
_2018df

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
ax.scatter(_2015df.T["Result"],_2015df.T["Profit"],label="2015")
ax.scatter(_2016df.T["Result"],_2016df.T["Profit"],label="2016")
ax.scatter(_2017df.T["Result"],_2017df.T["Profit"],label="2017")
ax.scatter(_2018df.T["Result"],_2018df.T["Profit"],label="2018")
ax.scatter(_2019df.T["Result"],_2019df.T["Profit"],label="2019")
ax.scatter(_2020df.T["Result"],_2020df.T["Profit"],label="2020")
ax.legend()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
ax.scatter(_2015df.T['Equity DD %'],_2015df.T["Profit"],label="2015")
ax.scatter(_2016df.T['Equity DD %'],_2016df.T["Profit"],label="2016")
ax.scatter(_2017df.T['Equity DD %'],_2017df.T["Profit"],label="2017")
ax.scatter(_2018df.T['Equity DD %'],_2018df.T["Profit"],label="2018")
ax.scatter(_2019df.T['Equity DD %'],_2019df.T["Profit"],label="2019")
ax.scatter(_2020df.T['Equity DD %'],_2020df.T["Profit"],label="2020")
ax.legend()

In [None]:
_2015df.T.plot(y="Result",figsize=(15,8))

In [None]:
_2016df.T.plot(y="Result",figsize=(15,8))

In [None]:
_2017df.T.plot(y="Result",figsize=(15,8))

In [None]:
_2018df.T.plot(y="Result",figsize=(15,8))

In [None]:
_2019df.T.plot(y="Result",figsize=(15,8))

In [None]:
_2020df.T.plot(y="Result",figsize=(15,8))