In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
from bp import data_gather_from_files,run_strategy_optimised,run_strategy_eval
import time
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [2]:
def error_check(params,n):
    if len(params) != n:
        raise IndexError('The number of parameters is not correct')

def generate_date_ranges_for_walk_forward(start_month_year, end_month_year, day=15, n_months = 1):
    # Initialize an empty list to store the date ranges
    date_ranges = []
    if day not in range(1, 29):
        raise ValueError('Day must be between 1 and 28')
    # Convert the input strings to datetime objects, using the given day
    start_date = datetime.strptime(f"{day} {start_month_year}", '%d %b %Y')
    end_date = datetime.strptime(f"{day} {end_month_year}", '%d %b %Y')
    
    # Generate the date ranges
    current_date = start_date
    while current_date <= end_date:
        next_date = current_date + relativedelta(months=n_months)
        date_range = [current_date.strftime('%d %b %Y'), (next_date - relativedelta(days=1)).strftime('%d %b %Y')]
        date_ranges.append(date_range)
        current_date = next_date
    
    return date_ranges[:-1]

def get_previous_n_months(end_date_str, n_months):
    # Convert the input string to a datetime object
    end_date = datetime.strptime(end_date_str, '%d %b %Y')
    
    # Calculate the start date
    start_date = end_date - relativedelta(months=n_months)
    
    # Create the date range
    date_range = [start_date.strftime('%d %b %Y'), (end_date - relativedelta(days=1)).strftime('%d %b %Y')]
    
    return date_range

In [None]:
def error_check(params,n):
    if len(params) != n:
        raise IndexError('The number of parameters is not correct')

def generate_date_ranges_for_walk_forward(start_month_year, end_month_year, day=15, n_months = 1):
    # Initialize an empty list to store the date ranges
    date_ranges = []
    if day not in range(1, 29):
        raise ValueError('Day must be between 1 and 28')
    # Convert the input strings to datetime objects, using the given day
    start_date = datetime.strptime(f"{day} {start_month_year}", '%d %b %Y')
    end_date = datetime.strptime(f"{day} {end_month_year}", '%d %b %Y')
    
    # Generate the date ranges
    current_date = start_date
    while current_date <= end_date:
        next_date = current_date + relativedelta(months=n_months)
        date_range = [current_date.strftime('%d %b %Y'), (next_date - relativedelta(days=1)).strftime('%d %b %Y')]
        date_ranges.append(date_range)
        current_date = next_date
    
    return date_ranges[:-1]

def get_previous_n_months(end_date_str, n_months):
    # Convert the input string to a datetime object
    end_date = datetime.strptime(end_date_str, '%d %b %Y')
    
    # Calculate the start date
    start_date = end_date - relativedelta(months=n_months)
    
    # Create the date range
    date_range = [start_date.strftime('%d %b %Y'), (end_date - relativedelta(days=1)).strftime('%d %b %Y')]
    
    return date_range

In [3]:
def walk_forward_analysis(evaluation_start, evaluation_end, evaluation_day,parameters,optimization_function = None, optimizer_params =[],  lookback_in_months = 6,evaluation_period = 3):
    generated_date_ranges = generate_date_ranges_for_walk_forward(evaluation_start, evaluation_end,evaluation_day,n_months = evaluation_period)
    df = {}
    for dates in generated_date_ranges:
        train_period = get_previous_n_months(dates[0], lookback_in_months)
        train_data = data_gather_from_files(train_period[0],train_period[1])['EURUSD.mid']
        print('Data gathered for training period: ',train_period[0],train_period[1])
        test_data = data_gather_from_files(dates[0],dates[1])['EURUSD.mid']
        print('Data gathered for testing period: ',dates[0],dates[1])
        max_loss, U_PNL, max_position, R_PNL,profit,optimal_params = optimization_function(train_data,test_data,parameters,optimizer_params)
        print('Optimal parameters are: ',optimal_params)
        #add optimization phrase evaluation G,n,d
        a, b, c, d, e = run_strategy_optimised(train_data, optimal_params[0],optimal_params[1],optimal_params[2])
        print('Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are: ',a, b, c, d, e)
        ##
        print('Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are: ',max_loss, U_PNL, max_position, R_PNL,profit)
        df[dates[0] +'-'+ dates[1]] = [max_loss, U_PNL, max_position, R_PNL,profit]
    df = pd.DataFrame(df).T
    df.columns = ['max_loss', 'min_U_PNL', 'max_position', 'R_PNL','profit']
    return df

In [4]:
import warnings
warnings.simplefilter("ignore", category=RuntimeWarning)

In [5]:
import random
from deap import base, creator, tools

def deap_optimiser_g_n_d(train_data, test_data, parameters, optimization_params):
    ngen = optimization_params[0]  # number of generations
    npop = optimization_params[1]  # number of population
    error_check(parameters,3)
    
    grid_params = parameters[0]
    position_params = parameters[1]
    depth_params = parameters[2]

    creator.create("FitnessMax", base.Fitness, weights=(1.0,)) #maximizing
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()

    # Define the genes for our individual
    toolbox.register("G_gene", random.randint, grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
    toolbox.register("n_gene", random.randint, position_params[0]//position_params[2], position_params[1]//position_params[2])
    toolbox.register("d_gene", random.randint, depth_params[0], depth_params[1])

    # Create an individual with the genes
    toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.G_gene, toolbox.n_gene, toolbox.d_gene), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def objective(individual):
        G, n, d = individual[0]*grid_params[2], individual[1]*position_params[2], individual[2]
        max_loss, U_PNL, max_position, R_PNL, profit = run_strategy_optimised(train_data, G, n, d)
    
        constraints = [
            max_position > 10e6,
            U_PNL < -150e3,
            max_loss < -500e3
        ]

        if any(constraints):
            return float('-inf'),  # Return large negative value when constraints are not satisfied
        return profit,

    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutUniformInt, low=[grid_params[0]//grid_params[2], position_params[0]//position_params[2], depth_params[0]], 
                     up=[grid_params[1]//grid_params[2], position_params[1]//position_params[2], depth_params[1]], indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=3)
    toolbox.register("evaluate", objective)

    population = toolbox.population(n=npop)
    CXPB, MUTPB = 0.5, 0.2

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    #initiate early stopping
    stagnant_generations = 0  # Counter for generations without improvement
    MAX_STAGNANT_GEN = optimization_params[2]  # Early stopping criterion: stop if no improvement over x generations
    best_fitness_so_far = float('-inf')  # since we're maximizing
    ##
    for gen in range(ngen):
        offspring = toolbox.select(population, len(population))
        offspring = list(map(toolbox.clone, offspring))
        
        # Crossover
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        
        # Mutation
        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        fitnesses = list(map(toolbox.evaluate, offspring))
        for ind, fit in zip(offspring, fitnesses):
            ind.fitness.values = fit

        #early stopping
        current_best_fitness = max(ind.fitness.values[0] for ind in population)

        if current_best_fitness > best_fitness_so_far:
            best_fitness_so_far = current_best_fitness
            stagnant_generations = 0  # Reset counter
        else:
            stagnant_generations += 1

        if stagnant_generations >= MAX_STAGNANT_GEN:
            print(f"Early stopping on generation {gen} due to no improvement.")
            break
        ##
        population[:] = offspring

    best_ind = tools.selBest(population, 1)[0]
    optimal_g = best_ind[0]*grid_params[2]
    optimal_n = best_ind[1]*position_params[2]
    optimal_d = best_ind[2]
    print("optimisation completed")
    max_loss, U_PNL, max_position, R_PNL,profit = run_strategy_optimised(test_data, optimal_g,optimal_n,optimal_d)
    return max_loss, U_PNL, max_position, R_PNL,profit,[optimal_g,optimal_n,optimal_d]


## GA with unrevised strategy

In [52]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
depth_params = [3,12,1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_depth_params = ((depth_params[1]-depth_params[0])/depth_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of depth params:-',(n_depth_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_depth_params))

# Adjust these parameter according to search space
n_trials = 30 #NGEN
npop = 100
early_stopping_gen = 30
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params,depth_params]
results = walk_forward_analysis('jan 2021','jan 2022',1,parameters,optimization_function=deap_optimiser_g_n_d,optimizer_params=optimizer_param,lookback_in_months=1,evaluation_period=1)
results

number of grid params:- 18.0
number of lot params:- 19.0
number of depth params:- 9.0
total_number_of_combinations:- 3078.0
Data gathered for training period:  01 Dec 2020 31 Dec 2020
Data gathered for testing period:  01 Jan 2021 31 Jan 2021
optimisation completed
Optimal parameters are:  [0.0015, 2000000, 3]
Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -105000.0 -59000.0 7380000.0 48601.9203 45600.0
Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -24000.0 -42000.0 7407000.0 68200.0003 69400.0
Data gathered for training period:  01 Jan 2021 31 Jan 2021
Data gathered for testing period:  01 Feb 2021 28 Feb 2021
optimisation completed
Optimal parameters are:  [0.001, 2000000, 4]
Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -26000.0 -39875.0 9872000.0 129356.7711 132400.0
Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -158000.0 -47500.0 9792000.0 -135650.0002 -155600.0
Data gathered for training pe

Unnamed: 0,max_loss,min_U_PNL,max_position,R_PNL,profit
01 Jan 2021-31 Jan 2021,-24000.0,-42000.0,7407000.0,68200.0003,69400.0
01 Feb 2021-28 Feb 2021,-158000.0,-47500.0,9792000.0,-135650.0002,-155600.0
01 Mar 2021-31 Mar 2021,-125600.0,-112800.0,9560000.0,-12800.0,-125600.0
01 Apr 2021-30 Apr 2021,-2750.0,-3750.0,485000.0,1000.0,-2750.0
01 May 2021-31 May 2021,-32000.0,-40777.7778,7356000.0,15533.3339,16400.0
01 Jun 2021-30 Jun 2021,-171000.0,-99562.5,9732000.0,-83000.0001,-83000.0
01 Jul 2021-31 Jul 2021,0.0,0.0,118800.0,1900.0,1900.0
01 Aug 2021-31 Aug 2021,-144000.0,-120000.0,18752000.0,-22560.0,-1200.0
01 Sep 2021-30 Sep 2021,-96000.0,-120000.0,13920000.0,4400.0,-93600.0
01 Oct 2021-31 Oct 2021,0.0,-400.0,4623200.0,17000.0,16600.0


Run time : 14 m, 1:1 ratio 3000 comb

In [54]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
depth_params = [3,12,1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_depth_params = ((depth_params[1]-depth_params[0])/depth_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of depth params:-',(n_depth_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_depth_params))

# Adjust these parameter according to search space
n_trials = 30 #NGEN
npop = 100
early_stopping_gen = 30
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params,depth_params]
results = walk_forward_analysis('jan 2021','jan 2022',1,parameters,optimization_function=deap_optimiser_g_n_d,optimizer_params=optimizer_param,lookback_in_months=6,evaluation_period=3)
results

number of grid params:- 18.0
number of lot params:- 19.0
number of depth params:- 9.0
total_number_of_combinations:- 3078.0
Data gathered for training period:  01 Jul 2020 31 Dec 2020
Data gathered for testing period:  01 Jan 2021 31 Mar 2021
optimisation completed
Optimal parameters are:  [0.0085, 2000000, 4]
Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -150000.0 -102000.0 9384000.0 224000.0 224000.0
Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -41625.0 -125375.0 9520000.0 -41625.0 -41625.0
Data gathered for training period:  01 Oct 2020 31 Mar 2021
Data gathered for testing period:  01 Apr 2021 30 Jun 2021
optimisation completed
Optimal parameters are:  [0.006500000000000001, 1000000, 12]
Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -78000.0 -122239.7959 9828000.0 247000.0 247000.0
Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -117000.0 -109416.6667 8554000.0 28697.2788 57500.0
Data gathere

Unnamed: 0,max_loss,min_U_PNL,max_position,R_PNL,profit
01 Jan 2021-31 Mar 2021,-41625.0,-125375.0,9520000.0,-41625.0,-41625.0
01 Apr 2021-30 Jun 2021,-117000.0,-109416.6667,8554000.0,28697.2788,57500.0
01 Jul 2021-30 Sep 2021,-10080.0,-68880.0,8121400.0,58800.0,-10080.0
01 Oct 2021-31 Dec 2021,-40000.0,-60000.0,6720000.0,24400.0,28800.0


Run time: 35 m 6:3 ratio

In [6]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
depth_params = [3,12,1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_depth_params = ((depth_params[1]-depth_params[0])/depth_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of depth params:-',(n_depth_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_depth_params))

# Adjust these parameter according to search space
n_trials = 30 #NGEN
npop = 100
early_stopping_gen = 30
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params,depth_params]
results = walk_forward_analysis('jan 2021','jan 2022',1,parameters,optimization_function=deap_optimiser_g_n_d,optimizer_params=optimizer_param,lookback_in_months=3,evaluation_period=1)
results

number of grid params:- 18.0
number of lot params:- 19.0
number of depth params:- 9.0
total_number_of_combinations:- 3078.0
Data gathered for training period:  01 Oct 2020 31 Dec 2020
Data gathered for testing period:  01 Jan 2021 31 Jan 2021
optimisation completed
Optimal parameters are:  [0.002, 2000000, 3]
Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -32000.0 -66666.6667 7380000.0 111442.798 104600.0
Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -16000.0 -50000.0 7404000.0 113200.0 113200.0
Data gathered for training period:  01 Nov 2020 31 Jan 2021
Data gathered for testing period:  01 Feb 2021 28 Feb 2021
optimisation completed
Optimal parameters are:  [0.0015, 2000000, 4]
Optimization phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -135000.0 -96333.3333 9876000.0 192200.0001 192200.0
Training phrase; Max loss, U_PNL, max_position, R_PNL,profit are:  -126000.0 -58619.3848 9792000.0 -94499.9998 -124600.0
Data gathered for tra

Unnamed: 0,max_loss,min_U_PNL,max_position,R_PNL,profit
01 Jan 2021-31 Jan 2021,-16000.0,-50000.0,7404000.0,113200.0,113200.0
01 Feb 2021-28 Feb 2021,-126000.0,-58619.3848,9792000.0,-94499.9998,-124600.0
01 Mar 2021-31 Mar 2021,-141600.0,-147266.6667,9452000.0,5666.6667,-141600.0
01 Apr 2021-30 Apr 2021,-7350.0,-6650.0,364350.0,-5107.4074,-6600.0
01 May 2021-31 May 2021,-34000.0,-42500.0,7344000.0,11333.3333,3400.0
01 Jun 2021-30 Jun 2021,-81600.0,-102000.0,10098000.0,-2720.0,-54400.0
01 Jul 2021-31 Jul 2021,-9750.0,-9750.0,3061500.0,22100.0,22100.0
01 Aug 2021-31 Aug 2021,0.0,-12000.0,4680000.0,31400.0,31400.0
01 Sep 2021-30 Sep 2021,-46500.0,-62994.5172,8925000.0,-11462.6315,-36300.0
01 Oct 2021-31 Oct 2021,-28000.0,-47500.0,9352000.0,71812.5004,57200.0


In [None]:
data = data_gather_from_files('1 mar 2021','31 mar 2021')['EURUSD.mid']
run_strategy_optimised(data, 0.0055, 1900000, 5)