In [None]:
import pickle
import pandas as pd
import os
import yfinance as yf
from utils.plot_decision_trees import plot_tree


from utils.pre_process_data import get_data
from math import ceil
from utils.units import Volume, Dollar
from deap import gp, creator, base, tools
from deap.gp import Terminal
from operator import or_, and_, gt
from fitness_functions import *
from utils.plot_decision_trees import plot_tree
from utils.save_info import save_results
from genetic_functions.cx_functions import cxSubTree
from genetic_functions.mut_functions import mutation_half, mutBranch
from genetic_functions.genetic_program import GPAlgo
import matplotlib.pyplot as plt
import pendulum


In [None]:
eth_ts = get_data(
        ticker = "ETH-USD", 
        start='2016-01-01', 
        end='2024-06-30',
        ma_lags = 50,
        lag_lags = 50,
        pers_col_num = 10
    )

In [None]:
df = get_data()
df_train= df.iloc[ : ceil(len(df)*0.7)]
df_test= df.iloc[ceil(len(df)*0.7) : ]

arg_names = list(df_train.columns)
vol_args = [arg for arg in arg_names if "volume" in arg.lower()]
dol_args = [arg for arg in arg_names if "volume" not in arg.lower()]


pset = gp.PrimitiveSetTyped("main",[Volume]*len(vol_args) + [Dollar]*len(dol_args),bool)
        #Rename the arguments:
arg_vol_mapping = {f"ARG{ind}": val for ind,val in enumerate(vol_args)}
pset.renameArguments(**arg_vol_mapping)
arg_dol_mapping = {f"ARG{len(vol_args)+ind}": val for ind,val in enumerate(dol_args)}
pset.renameArguments(**arg_dol_mapping)
#Check that all arguments were renamed:
unnamed_args=[i for i in pset.arguments if "ARG" in i]
if  unnamed_args:
    print(f"Some arguments were not renamed: {unnamed_args}")
pset.addPrimitive(gt, [Dollar,Dollar],bool)
pset.addPrimitive(lambda x:x ,[Dollar],Dollar, name="dollar placeholder")

pset.addPrimitive(gt, [Volume,Volume],bool)
pset.addPrimitive(lambda x:x ,[Volume],Volume, name="volume placeholder")


#Boolean operators:
pset.addPrimitive(and_, [bool,bool],bool)
pset.addPrimitive(or_,[bool,bool],bool)

for v_arg in vol_args:
        pset.addTerminal(v_arg,Volume)
for d_arg in dol_args:
        pset.addTerminal(d_arg,Dollar)

# --- Remove all the ARG terminals ---
pset.terminals[Volume] = [i for i in pset.terminals[Volume] if "ARG" not in i.name]
pset.terminals[Dollar] = [i for i in pset.terminals[Dollar] if "ARG" not in i.name]

def generate(pset):
    run=True
    while run:
        try:
            expr = toolbox.individual()            
            #Remove all the Lambda functions:
            expr=  list(filter(lambda x: x.name!="dollar placeholder", expr))
            expr=  list(filter(lambda x: x.name!="volume placeholder", expr))
            if len(expr)>3:
                run=False
        except IndexError:
            continue
    # return gp.PrimitiveTree(expr)
    return creator.Individual(expr)

# --- GP OPERATORS ----

creator.create("fitness", base.Fitness, weights=(1,))
creator.create("Individual", gp.PrimitiveTree, fitness= creator.fitness)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=5)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("custom_individual",generate, pset)
toolbox.register("population", tools.initRepeat, list, toolbox.custom_individual)
toolbox.register("evaluate", fitness_function, df=df_train, pset=pset)

toolbox.register("mate",       cxSubTree)
toolbox.register("select",     tools.selRanked) 
toolbox.register("mutate",     mutation_half, pset=pset)

hof   = tools.HallOfFame(maxsize=50)

#STATS:
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean, axis=0) 
stats.register("std", np.std, axis=0)
stats.register("min", np.min, axis=0)
stats.register("max", np.max, axis=0)

In [None]:
run_list = os.listdir("results_run3_gen2")

In [None]:
best_sol_pop = []
base_dir = r""
for dir in run_list:
    if os.path.isdir(rf"{base_dir}\results_run3_gen2\{dir}") and "run" in dir:
        with open(rf"{base_dir}\results_run3_gen2\{dir}\hof.pkl", 'rb') as file:
        # Serialize the object and write it to the file
            try:
                hof = pickle.load(file)
                for i in range(10):
                    best_sol_pop.append(hof[i])
            except:
                print(dir," File not found.")

In [None]:
# with open(os.path.join(REPO_DIRECTORY_PATH,"results_run2_gen3","start_pop.pkl"), 'wb') as file:
#     # Serialize the object and write it to the file
#     pickle.dump(best_sol_pop, file)
#     file.close()

In [None]:
len(best_sol_pop)

In [None]:
best_sol_pop.sort(key = lambda x : x.fitness.values, reverse=True)
print(len(best_sol_pop))
[i.fitness.values for i in best_sol_pop][:5]

In [None]:
per_list = []
for k in best_sol_pop:
    per = [i.split("_")[-1] for i in str(k).split("'") if "percentage" in i]
    if per:
        per_list.append(per)
print(set([i[0] for i in per_list]))

In [None]:
bs = best_sol_pop[:20]
lst_bsp_test = []
lst_bsp_train = []

for i in bs:
    lst_bsp_test.append(trading_strat(individual = i, df=df_test,pset=pset)[:2])
    lst_bsp_train.append(trading_strat(individual = i, df=df_train ,pset=pset)[:2])
    

In [None]:
print(df_train['Open'].iloc[-1])
print(df_train['Open'].iloc[0])

df_train['Open'].iloc[-1]-df_train['Open'].iloc[0]

In [None]:
[(i.fitness.values)[0] for i in best_sol_pop]

In [None]:
plt.plot(range(1,len(best_sol_pop)+1),[(i.fitness.values)[0]/2 for i in best_sol_pop],'r.')
plt.title("Fitness values of the strategies.")
plt.xlabel("Trading Strategy Number")
plt.ylabel("Fitness Value")

In [None]:
plt.figure(figsize=(15,7))
plt.title("Fitness Values of the top 20 trading strategies on the Test set.")
plt.plot(range(1,len(bs)+1),[i[0] for i in lst_bsp_test],'r.', label = "Final Portfolio Value")
plt.plot(range(1,len(bs)+1),[i[1] for i in lst_bsp_test],'b.', label='MDD')
plt.plot(range(1,len(bs)+1),[i[0]+i[1]  for i in lst_bsp_test],'k.', label="Fitness Value")
plt.legend()
plt.xlabel("Trading Strategy Number")
plt.ylabel("$")

In [None]:
plt.figure(figsize=(15,7))
plt.plot(range(1,len(bs)+1),[i[0] for i in lst_bsp_train],'r.')
plt.plot(range(1,len(bs)+1),[i[1] for i in lst_bsp_train],'b.')
plt.plot(range(1,len(bs)+1),[i[0]+i[1]  for i in lst_bsp_train],'k.')

In [None]:
def get_macd_value(df, val = 1000 , tc = 0.01):
    val_ts=[val]
    long=False

    # Add the required columns to the DF:
    df['ema12'] = df['Open'].ewm(span=12, adjust = False).mean()
    df['ema26'] = df['Open'].ewm(span=26, adjust = False).mean()
    df['macd'] = df['ema12'] - df['ema26']
    df['signal_line'] = df['macd'].ewm(span = 9, adjust=False).mean()
    
    df['macd_above'] = (df['macd'] - df['signal_line'])>0
    df['macd_above_lagged'] = df['macd_above'] .shift(1)

    #Impliment the trading strategy:
    for cnt, row in enumerate(df[['Open','macd_above','macd_above_lagged']].iterrows()):
        if (row[1]['macd_above']==True)  and (row[1]['macd_above_lagged']==False) and long==False:
            shares = ((1-tc)*val)/row[1]['Open']
            long=True
        elif (row[1]['macd_above']==False)  and (row[1]['macd_above_lagged']==True) and long==True:
            val = (1-tc)*shares*row[1]['Open']
            val_ts.append(val)
            long=False
        if cnt == len(df) and long:
            val = shares*row[1]['Open']
            val_ts.append(val)
    return val, val_ts

macd_test_val, macd_test_val_ts = get_macd_value(df = df_test)
macd_train_val, macd_train_val_ts = get_macd_value(df = df_train)

In [None]:
[i.fitness.values for i in best_sol_pop][:20]

In [None]:
tc = 0.01
#Test data
test_df = pd.DataFrame(columns=["Buy and Hold","Evolved Trading Strategy","MACD Trading Strategy"])
for ind,bs in enumerate(best_sol_pop):
    test_results = {
        "Buy and Hold":(1000/df_test.iloc[0]['Open'])*df_test.iloc[-1]['Open']*(1-tc)**2,
        "Evolved Trading Strategy":trading_strat(individual = bs, df=df_test,pset=pset)[0],
        "MACD Trading Strategy": macd_test_val,
    }
    test_df.loc[ind] = test_results

In [None]:
test_df.plot(title = "Performance of the trading strategies on the test dataset.",xlabel = "Stratgy number", ylabel = "Final portfolio value")

In [None]:
tc = 0.01
#Train data
train_df = pd.DataFrame(columns=["Buy and Hold","Evolved Trading Strategy","MACD Trading Strategy"])
for ind,bs in enumerate(best_sol_pop):
    train_results = {
        "Buy and Hold":(1000/df_train.iloc[0]['Open'])*df_train.iloc[-1]['Open']*(1-tc)**2,
        "Evolved Trading Strategy":trading_strat(individual = bs, df=df_train,pset=pset)[0],
        "MACD Trading Strategy": macd_train_val
    }
    train_df.loc[ind] = train_results
train_df.plot(title = "Performance of the trading strategies on the testing dataset.",xlabel = "Stratgy number", ylabel = "Final portfolio value")

In [None]:
trading_strat(individual = best_sol_pop[3], df=df_train,pset=pset)

In [None]:
fig, ax = plt.subplots(ncols = 3, nrows = 5, figsize = (15,15))
ax[0,0].title.set_text("Bitcoin Training Data")
ax[0,0].set_ylabel("Portfolio \nValue.")
ax[0,0].set_xlabel("Trade number.")
ax[0,1].title.set_text("Bitcoin Testing Data")
ax[0,2].title.set_text("Ethereum Testing Data")

for ind in range(5):
    train_ts = trading_strat(individual = best_sol_pop[ind], df=df_train,pset=pset)[2]
    test_ts = trading_strat(individual = best_sol_pop[ind], df=df_test,pset=pset)[2]
    eth_trade_ts = trading_strat(individual = best_sol_pop[ind], df=eth_ts,pset=pset)[2]

    ax[ind,0].plot(range(len(train_ts)),train_ts,color='b')
    ax[ind,1].plot(range(len(test_ts)),test_ts,color='r')
    ax[ind,2].plot(range(len(eth_trade_ts)),eth_trade_ts, color='r')


In [None]:
tc = 0.01
macd_eth_val, macd_eth_val_ts = get_macd_value(df = eth_ts)

#ETH data
eth_df = pd.DataFrame(columns=["Buy and Hold","Evolved Trading Strategy","MACD Trading Strategy"])
for ind,bs in enumerate(best_sol_pop):
    train_results = {
        "Buy and Hold":(1000/eth_ts.iloc[0]['Open'])*eth_ts.iloc[-1]['Open']*(1-tc)**2,
        "Evolved Trading Strategy":trading_strat(individual = bs, df=eth_ts,pset=pset)[0],
        "MACD Trading Strategy": macd_eth_val
    }
    eth_df.loc[ind] = train_results
eth_df.plot()

In [None]:
fig,ax = plt.subplots(ncols=3,nrows=1, figsize=(15,7))
plt.suptitle("The final portfolio values of the evolved trading strategy compared \nto the benchmark strategies for different datasets.")
train_df.plot(title = "Bitcoin train dataset.",xlabel = "Stratgy number", ylabel = "Final portfolio value", ax=ax[0], color=["black","red","blue"],legend=False)
test_df.plot(title = "Bitcoin test dataset.", ax = ax[1], color=["black","red","blue"])
eth_df.plot(title = "Ethereum test dataset.", ax = ax[2], color=["black","red","blue"],legend=False)
fig.subplots_adjust(top=0.83)

In [None]:
best_solution = best_sol_pop[0]
tc = 0.01
print("TEST")
print("MACD: ", macd_test_val)
print("Buy and hold:",(1000/df_test.iloc[0]['Open'])*df_test.iloc[-1]['Open']*(1-tc)**2)
print("mtv         :",maximum_theoretical_value(df_test))
print("strat value :",trading_strat(individual = best_solution, df=df_test,pset=pset)[0],'\n')

In [None]:
#PLOT DECISION TREES:
for int,bes_sol in enumerate(best_sol_pop):
    plot_tree(bes_sol, name=f"run3_gen2/run3_gen2_bs{int}.png")

In [None]:
tc = 0.01
print("TRAIN")
print("MACD: ", macd_train_val)
print("Buy and hold:",(1000/df_train.iloc[0]['Open'])*df_train.iloc[-1]['Open']*(1-tc)**2)
print("mtv         :",maximum_theoretical_value(df_train))
print("strat value :",trading_strat(individual = best_solution, df=df_train,pset=pset)[0],'\n')

In [None]:
print("ETHEREUM ")
print("MACD: ", macd_train_val)
print("Buy and hold:",(1000/eth_ts.iloc[0]['Open'])*eth_ts.iloc[-1]['Open']*(1-tc)**2)
print("mtv         :",maximum_theoretical_value(eth_ts))
print("strat value :",trading_strat(individual = best_solution, df=eth_ts,pset=pset)[0],'\n')

In [None]:
# plot_tree(best_sol_pop[8], name="bs_dt8")
from fitness_functions import trading_strat

In [None]:
val, mdd, ts_val, trade_rows = trading_strat(individual = best_sol_pop[8], df=eth_ts,pset=pset, ret_trade_row=True)
# plt.plot(range(len(bs8)),bs8)
# plt.plot([0,3],df)

In [None]:
eth_val_ts = []
for i in trade_rows:
    eth_val_ts.append(eth_ts.loc[i[0]]['Open'])
eth_val_ts

In [None]:
# buy_and_hold:
def buy_and_hold(df,strat):
    val, mdd, ts_val, trade_rows = trading_strat(individual =strat, df=df,pset=pset, ret_trade_row=True)
    bh_val_ts = [1000]
    shares = 1000/df.iloc[0]['Open']
    for i in trade_rows:
        bh_val_ts.append(shares*df.loc[i[0]]['Open']*(1-tc)**2)
    bh_val_ts.append(shares*df.iloc[-1]['Open']*(1-tc)**2)
    return bh_val_ts

eth_bh_ts = buy_and_hold(df=eth_ts ,strat = best_sol_pop[8])

In [None]:
plt.figure(1)
plt.plot(range(len(ts_val)), ts_val)
plt.plot(range(len(eth_bh_ts)), eth_bh_ts)

In [None]:
print("Buy and hold:",(1000/df_train.iloc[0]['Open'])*df_train.iloc[-1]['Open']*(1-tc)**2)


In [None]:
plt.figure(figsize=(15,7))
plt.plot(range(1,len(hof2)+1),[i[0] for i in lst_hof_test],'r.')
plt.plot(range(1,len(hof2)+1),[i[1] for i in lst_hof_test],'b.')
plt.plot(range(1,len(hof2)+1),[i[0]+i[1]  for i in lst_hof_test],'k.')


plt.title("test data")

In [None]:
[i[0]+i[1]  for i in lst_hof_test]

In [None]:
plt.figure(figsize=(15,7))
plt.plot(range(1,len(hof2)+1),[i[0] for i in lst_hof_train],'r.')
plt.plot(range(1,len(hof2)+1),[i[1] for i in lst_hof_train],'b.')
plt.plot(range(1,len(hof2)+1),[i[0]-i[1] for i in lst_hof_train],'k.')


plt.title("train data")
print([i[0]-i[1] for i in lst_hof_train])

In [None]:
with open(rf"\home\khann\masters\results_run2\run_2024-08-10_02-20\hof.pkl", 'rb') as file:
        # Serialize the object and write it to the file
            hof = pickle.load(file)

In [None]:
lst_hof_test = []
for i in hof.items:
    lst_hof_test.append(trading_strat(individual = i, df=df_test,pset=pset)[:2])

lst_hof_train = []
for i in hof.items:
    lst_hof_train.append(trading_strat(individual = i, df=df_train,pset=pset)[:2])

In [None]:
lst_hof_test

In [None]:
plt.figure(figsize=(15,7))
plt.plot(range(1,len(hof)+1),[i[0] for i in lst_hof_test],'r.')
plt.plot(range(1,len(hof)+1),[i[1] for i in lst_hof_test],'b.')
plt.plot(range(1,len(hof)+1),[i[0]+i[1]  for i in lst_hof_test],'k.')

In [None]:
plt.figure(figsize=(15,7))
plt.plot(range(1,len(hof)+1),[i[0] for i in lst_hof_train],'r.')
plt.plot(range(1,len(hof)+1),[i[1] for i in lst_hof_train],'b.')
plt.plot(range(1,len(hof)+1),[i[0]+i[1]  for i in lst_hof_train],'k.')

In [None]:
[i.fitness.values for i in hof.items]

In [None]:
# import os
# run_list = os.listdir("\home\khann\masters\results_run2_gen2\start_pop")
# run_list

In [None]:
# cnt=0
# for dir in run_list:
#     if os.path.isdir(rf"\home\khann\masters\results_run2\{dir}"):
#         if cnt == 0:
#             df_tmp = pd.read_csv(rf"\home\khann\masters\results_run2\{dir}\run_info.csv")
#             df_tmp.drop('Unnamed: 0',axis=1, inplace=True)
#         else:
#             d = pd.read_csv(rf"\home\khann\masters\results_run2\{dir}\run_info.csv")
#             d.drop('Unnamed: 0',axis=1, inplace=True)
#             df_tmp = pd.concat([df_tmp, d], ignore_index=True, sort=False)
#         cnt+=1