In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
from bp import data_gather_from_files,run_strategy_optimised,run_strategy_eval
import time
from datetime import datetime
from dateutil.relativedelta import relativedelta
from GA_optimiser import *

If import GA_optimiser error (line 53) then change Grid_Search to None

## Minimising loss by U_PNL vol

## Group 1 : Deap optimiser with basic strategy

In [2]:
def deap_optimiser_g_n_std(train_data, test_data, parameters, optimization_params):
    """
    Optimizes the parameters of a trading strategy using a genetic algorithm.
    
    Args:
    train_data (pandas.DataFrame): The training data used to optimize the strategy.
    test_data (pandas.DataFrame): The test data used to evaluate the optimized strategy.
    parameters (list): A list of two lists, where the first list contains the grid parameters and the second list contains the position parameters.
    optimization_params (list): A list of three parameters: the number of generations, the number of population, and the maximum number of stagnant generations before early stopping.
    
    Returns:
    tuple: A tuple containing the maximum loss, the return per unit of risk, the profit, and the optimal values of G and n.
    """
    ngen = optimization_params[0]  # number of generations
    npop = optimization_params[1]  # number of population

    error_check(parameters,2)
    
    grid_params = parameters[0]
    position_params = parameters[1]

    creator.create("FitnessMax", base.Fitness, weights=(1.0,)) #maximizing
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()

    # Define the genes for our individual
    toolbox.register("G_gene", random.randint, grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
    toolbox.register("n_gene", random.randint, position_params[0]//position_params[2], position_params[1]//position_params[2])

    # Create an individual with the genes
    toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.G_gene, toolbox.n_gene), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def objective(individual):
        G, n = individual[0]*grid_params[2], individual[1]*position_params[2]
        max_loss, R_PNL, profit, std = run_strategy_optimised(train_data, G, n)
    
        constraints = [
            max_loss < -500e3
        ]

        if any(constraints):
            return float('-inf'),  # Return large negative value when constraints are not satisfied
        return profit/std,

    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", tools.mutUniformInt, low=[grid_params[0]//grid_params[2], position_params[0]//position_params[2]], 
                     up=[grid_params[1]//grid_params[2], position_params[1]//position_params[2]], indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=3)   ##############
    toolbox.register("evaluate", objective)

    population = toolbox.population(n=npop)
    CXPB, MUTPB = 0.5, 0.2

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    #initiate early stopping
    stagnant_generations = 0  # Counter for generations without improvement
    MAX_STAGNANT_GEN = optimization_params[2]  # Early stopping criterion: stop if no improvement over x generations
    best_fitness_so_far = float('-inf')  # since we're maximizing
    ##
    for gen in range(ngen):
        offspring = toolbox.select(population, len(population))
        offspring = list(map(toolbox.clone, offspring))
        
        # Crossover
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        
        # Mutation
        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        fitnesses = list(map(toolbox.evaluate, offspring))
        for ind, fit in zip(offspring, fitnesses):
            ind.fitness.values = fit

        #early stopping
        current_best_fitness = max(ind.fitness.values[0] for ind in population)

        if current_best_fitness > best_fitness_so_far:
            best_fitness_so_far = current_best_fitness
            stagnant_generations = 0  # Reset counter
        else:
            stagnant_generations += 1

        if stagnant_generations >= MAX_STAGNANT_GEN:
            print(f"Early stopping on generation {gen} due to no improvement.")
            break
        ##
        population[:] = offspring

    best_ind = tools.selBest(population, 1)[0]
    optimal_g = np.round(best_ind[0]*grid_params[2],5)
    optimal_n = best_ind[1]*position_params[2]
    print("optimisation completed")
    max_loss, R_PNL,profit,std  = run_strategy_optimised(test_data, optimal_g,optimal_n)
    return max_loss, R_PNL,profit,[optimal_g,optimal_n]

In [3]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params))

# Adjust these parameter according to search space
n_trials = 10 #NGEN
npop = 100
early_stopping_gen = 10 # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params]

results0 = walk_forward_analysis('jan 2022','jan 2023',1,parameters,optimization_function=deap_optimiser_g_n_std,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results0


number of grid params:- 18.0
number of lot params:- 19.0
total_number_of_combinations:- 342.0
Data gathered for training period:  01 Jan 2021 31 Dec 2021
Data gathered for testing period:  01 Jan 2022 31 Jan 2022
optimisation completed
Optimal parameters are:  [0.001, 1300000]
Max loss,R_PNL,profit are:  -123500.0 -97262.2428 -96460.0
Data gathered for training period:  01 Feb 2021 31 Jan 2022
Data gathered for testing period:  01 Feb 2022 28 Feb 2022
optimisation completed
Optimal parameters are:  [0.009, 2000000]
Max loss,R_PNL,profit are:  -54000.0 72000.0 72000.0
Data gathered for training period:  01 Mar 2021 28 Feb 2022
Data gathered for testing period:  01 Mar 2022 31 Mar 2022
optimisation completed
Optimal parameters are:  [0.0045, 1400000]
Max loss,R_PNL,profit are:  -198037.5 -74487.5 -135877.5
Data gathered for training period:  01 Apr 2021 31 Mar 2022
Data gathered for testing period:  01 Apr 2022 30 Apr 2022
optimisation completed
Optimal parameters are:  [0.0025, 400000]


Unnamed: 0,max_loss,R_PNL,profit
01 Jan 2022-31 Jan 2022,-123500.0,-97262.2428,-96460.0
01 Feb 2022-28 Feb 2022,-54000.0,72000.0,72000.0
01 Mar 2022-31 Mar 2022,-198037.5,-74487.5,-135877.5
01 Apr 2022-30 Apr 2022,-160143.4734,-150903.4734,-150903.4734
01 May 2022-31 May 2022,0.0,126854.0,77350.0
01 Jun 2022-30 Jun 2022,-124950.0,65450.0,53040.0
01 Jul 2022-31 Jul 2022,-176250.0,86250.0,85350.0
01 Aug 2022-31 Aug 2022,-78599.9999,-10599.9999,-18759.9999
01 Sep 2022-30 Sep 2022,-38700.0,49350.0001,13050.0
01 Oct 2022-31 Oct 2022,-54400.0,111009.9999,110500.0


In [4]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params))

# Adjust these parameter according to search space
n_trials = 10 #NGEN
npop = 100
early_stopping_gen = 10 # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params]

results0 = walk_forward_analysis('jan 2022','jan 2023',1,parameters,optimization_function=deap_optimiser_g_n,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results0


number of grid params:- 18.0
number of lot params:- 19.0
total_number_of_combinations:- 342.0
Data gathered for training period:  01 Jan 2021 31 Dec 2021
Data gathered for testing period:  01 Jan 2022 31 Jan 2022
optimisation completed
Optimal parameters are:  [0.0015, 2000000]
Max loss,R_PNL,profit are:  -129000.0 -98105.6422 -103800.0
Data gathered for training period:  01 Feb 2021 31 Jan 2022
Data gathered for testing period:  01 Feb 2022 28 Feb 2022
optimisation completed
Optimal parameters are:  [0.009, 2000000]
Max loss,R_PNL,profit are:  -54000.0 72000.0 72000.0
Data gathered for training period:  01 Mar 2021 28 Feb 2022
Data gathered for testing period:  01 Mar 2022 31 Mar 2022
optimisation completed
Optimal parameters are:  [0.007, 2000000]
Max loss,R_PNL,profit are:  -192000.0 -115000.0 -188800.0
Data gathered for training period:  01 Apr 2021 31 Mar 2022
Data gathered for testing period:  01 Apr 2022 30 Apr 2022
optimisation completed
Optimal parameters are:  [0.009, 1600000

Unnamed: 0,max_loss,R_PNL,profit
01 Jan 2022-31 Jan 2022,-129000.0,-98105.6422,-103800.0
01 Feb 2022-28 Feb 2022,-54000.0,72000.0,72000.0
01 Mar 2022-31 Mar 2022,-192000.0,-115000.0,-188800.0
01 Apr 2022-30 Apr 2022,-157200.0,-154960.0,-154960.0
01 May 2022-31 May 2022,0.0,126854.0,77350.0
01 Jun 2022-30 Jun 2022,-124950.0,65450.0,53040.0
01 Jul 2022-31 Jul 2022,-176250.0,86250.0,85350.0
01 Aug 2022-31 Aug 2022,-57750.0,62265.0,69300.0
01 Sep 2022-30 Sep 2022,-25049.9999,66070.0,43630.0001
01 Oct 2022-31 Oct 2022,-54400.0,111009.9999,110500.0


In [18]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params))

# Adjust these parameter according to search space
n_trials = 10 #NGEN
npop = 100
early_stopping_gen = 10 # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params]

results0 = walk_forward_analysis('jan 2018','jan 2023',1,parameters,optimization_function=deap_optimiser_g_n_std,optimizer_params=optimizer_param,lookback_in_months=3,evaluation_period=1)
results0


number of grid params:- 18.0
number of lot params:- 19.0
total_number_of_combinations:- 342.0
Data gathered for training period:  01 Oct 2017 31 Dec 2017
Data gathered for testing period:  01 Jan 2018 31 Jan 2018
optimisation completed
Optimal parameters are:  [0.0075, 1300000]
Max loss,R_PNL,profit are:  -107250.0 9360.0 -2600.0
Data gathered for training period:  01 Nov 2017 31 Jan 2018
Data gathered for testing period:  01 Feb 2018 28 Feb 2018
optimisation completed
Optimal parameters are:  [0.0025, 1900000]
Max loss,R_PNL,profit are:  -47500.0 709.7165 -42180.0
Data gathered for training period:  01 Dec 2017 28 Feb 2018
Data gathered for testing period:  01 Mar 2018 31 Mar 2018
optimisation completed
Optimal parameters are:  [0.004, 1300000]
Max loss,R_PNL,profit are:  -93600.0 24960.0 40430.0
Data gathered for training period:  01 Jan 2018 31 Mar 2018
Data gathered for testing period:  01 Apr 2018 30 Apr 2018
optimisation completed
Optimal parameters are:  [0.001, 500000]
Max loss

Unnamed: 0,max_loss,R_PNL,profit
01 Jan 2018-31 Jan 2018,-107250.0,9360.0,-2600.0
01 Feb 2018-28 Feb 2018,-47500.0,709.7165,-42180.0
01 Mar 2018-31 Mar 2018,-93600.0,24960.0,40430.0
01 Apr 2018-30 Apr 2018,-52500.0,-4438.2784,-8000.0
01 May 2018-31 May 2018,-278000.0,-191498.1439,-182400.0
01 Jun 2018-30 Jun 2018,-9350.0,112200.0001,112200.0
01 Jul 2018-31 Jul 2018,-9000.0,117000.0,115600.0
01 Aug 2018-31 Aug 2018,-285600.0,-234517.1758,-236640.0
01 Sep 2018-30 Sep 2018,-51000.0,17000.0,33320.0
01 Oct 2018-31 Oct 2018,-54750.0,7500.0,-54750.0


In [17]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params))

# Adjust these parameter according to search space
n_trials = 10 #NGEN
npop = 100
early_stopping_gen = 10 # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params]

results1 = walk_forward_analysis('jan 2018','jan 2021',1,parameters,optimization_function=deap_optimiser_g_n_std,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results1


number of grid params:- 18.0
number of lot params:- 19.0
total_number_of_combinations:- 342.0
Data gathered for training period:  01 Jan 2017 31 Dec 2017
Data gathered for testing period:  01 Jan 2018 31 Jan 2018
optimisation completed
Optimal parameters are:  [0.007, 2000000]
Max loss,R_PNL,profit are:  -164000.0 -132200.0 -132200.0
Data gathered for training period:  01 Feb 2017 31 Jan 2018
Data gathered for testing period:  01 Feb 2018 28 Feb 2018
optimisation completed
Optimal parameters are:  [0.0065, 1800000]
Max loss,R_PNL,profit are:  -46800.0 109200.0 61740.0
Data gathered for training period:  01 Mar 2017 28 Feb 2018
Data gathered for testing period:  01 Mar 2018 31 Mar 2018
optimisation completed
Optimal parameters are:  [0.007, 1900000]
Max loss,R_PNL,profit are:  0 66500.0 79230.0
Data gathered for training period:  01 Apr 2017 31 Mar 2018
Data gathered for testing period:  01 Apr 2018 30 Apr 2018
optimisation completed
Optimal parameters are:  [0.0065, 1900000]
Max loss,R

Unnamed: 0,max_loss,R_PNL,profit
01 Jan 2018-31 Jan 2018,-164000.0,-132200.0,-132200.0
01 Feb 2018-28 Feb 2018,-46800.0,109200.0,61740.0
01 Mar 2018-31 Mar 2018,0.0,66500.0,79230.0
01 Apr 2018-30 Apr 2018,0.0,30970.0,18810.0
01 May 2018-31 May 2018,-121737.5,-116037.5,-121737.5
01 Jun 2018-30 Jun 2018,-15200.0,76950.0,76950.0
01 Jul 2018-31 Jul 2018,0.0,56000.0,52600.0
01 Aug 2018-31 Aug 2018,-125400.0,-6966.6667,-2280.0
01 Sep 2018-30 Sep 2018,-7100.0,22460.0007,22520.0
01 Oct 2018-31 Oct 2018,0.0,45000.0,8000.0


In [37]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params))

# Adjust these parameter according to search space
n_trials = 10 #NGEN
npop = 100
early_stopping_gen = 10 # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params]

results1 = walk_forward_analysis('jan 2021','jan 2023',1,parameters,optimization_function=deap_optimiser_g_n_std,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results1


number of grid params:- 18.0
number of lot params:- 19.0
total_number_of_combinations:- 342.0
Data gathered for training period:  01 Jan 2020 31 Dec 2020
Data gathered for testing period:  01 Jan 2021 31 Jan 2021
optimisation completed
Optimal parameters are:  [0.0085, 1800000]
Max loss,R_PNL,profit are:  0 34425.0 42120.0
Data gathered for training period:  01 Feb 2020 31 Jan 2021
Data gathered for testing period:  01 Feb 2021 28 Feb 2021
optimisation completed
Optimal parameters are:  [0.0045, 2000000]
Max loss,R_PNL,profit are:  -54000.0 72000.0 65600.0
Data gathered for training period:  01 Mar 2020 28 Feb 2021
Data gathered for testing period:  01 Mar 2021 31 Mar 2021
optimisation completed
Optimal parameters are:  [0.0015, 1800000]
Max loss,R_PNL,profit are:  -72900.0 -30157.1707 -56340.0
Data gathered for training period:  01 Apr 2020 31 Mar 2021
Data gathered for testing period:  01 Apr 2021 30 Apr 2021
optimisation completed
Optimal parameters are:  [0.004, 1700000]
Max loss,R

KeyboardInterrupt: 

In [34]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])

print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params))

# Adjust these parameter according to search space
n_trials = 10 #NGEN
npop = 100
early_stopping_gen = 10 # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters = [grid_params,lot_params]

results1 = walk_forward_analysis('jan 2021','jan 2023',1,parameters,optimization_function=deap_optimiser_g_n_std,optimizer_params=optimizer_param,lookback_in_months=3,evaluation_period=1)
results1


number of grid params:- 18.0
number of lot params:- 19.0
total_number_of_combinations:- 342.0
Data gathered for training period:  01 Oct 2020 31 Dec 2020
Data gathered for testing period:  01 Jan 2021 31 Jan 2021
optimisation completed
Optimal parameters are:  [0.0015, 1700000]
Max loss,R_PNL,profit are:  -33150.0 89419.9997 89420.0
Data gathered for training period:  01 Nov 2020 31 Jan 2021
Data gathered for testing period:  01 Feb 2021 28 Feb 2021
optimisation completed
Optimal parameters are:  [0.001, 1600000]
Max loss,R_PNL,profit are:  -137600.0 -112174.4726 -135200.0
Data gathered for training period:  01 Dec 2020 28 Feb 2021
Data gathered for testing period:  01 Mar 2021 31 Mar 2021
optimisation completed
Optimal parameters are:  [0.008, 100000]
Max loss,R_PNL,profit are:  -8250.0 100.0 -8250.0
Data gathered for training period:  01 Jan 2021 31 Mar 2021
Data gathered for testing period:  01 Apr 2021 30 Apr 2021
optimisation completed
Optimal parameters are:  [0.005, 2000000]
Max

Unnamed: 0,max_loss,R_PNL,profit
01 Jan 2021-31 Jan 2021,-33150.0,89419.9997,89420.0
01 Feb 2021-28 Feb 2021,-137600.0,-112174.4726,-135200.0
01 Mar 2021-31 Mar 2021,-8250.0,100.0,-8250.0
01 Apr 2021-30 Apr 2021,-150000.0,-130000.0,-135000.0
01 May 2021-31 May 2021,-32400.0,10800.0,2160.0
01 Jun 2021-30 Jun 2021,-102000.0,0.0,-102000.0
01 Jul 2021-31 Jul 2021,0.0,2600.0,2600.0
01 Aug 2021-31 Aug 2021,0.0,5400.0,5400.0
01 Sep 2021-30 Sep 2021,-58800.0,-13745.5998,-47600.0
01 Oct 2021-31 Oct 2021,-25200.0,64631.25,51480.0


# Group 3 optimization without std

In [5]:
def deap_optimiser_indicator(train_data, test_data, parameters, optimization_params):
    ngen = optimization_params[0]  # number of generations
    npop = optimization_params[1]  # number of population

    error_check(parameters,5)
    
    grid_params = parameters[0]
    position_params = parameters[1]
    indicator_type_params = parameters[2]
    lookback_params = parameters[3]
    scaling_factor_params = parameters[4]

    creator.create("FitnessMax", base.Fitness, weights=(1.0,)) #maximizing
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()

    # Define the genes for our individual
    toolbox.register("G_gene", random.randint, grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
    toolbox.register("n_gene", random.randint, position_params[0]//position_params[2], position_params[1]//position_params[2])
    toolbox.register("indicator_gene", random.choice, indicator_type_params)
    toolbox.register("lookback_gene", random.choice, lookback_params)
    toolbox.register("scaling_gene", random.randint, scaling_factor_params[0]//scaling_factor_params[2], scaling_factor_params[1]//scaling_factor_params[2])

    CXPB, MUTPB = 0.5, 0.2

    def custom_mutate(individual): 
        if random.random() < MUTPB:
            individual[0] = random.randint(grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
        if random.random() < MUTPB:
            individual[1] = random.randint(position_params[0]//position_params[2], position_params[1]//position_params[2])
        if random.random() < MUTPB:
           individual[2] = random.choice(indicator_type_params)
        if random.random() < MUTPB:
            individual[3] = random.choice(lookback_params)
        if random.random() < MUTPB:
            individual[4] = random.randint(scaling_factor_params[0]//scaling_factor_params[2], scaling_factor_params[1]//scaling_factor_params[2])

        return individual,

    toolbox.register("mutate", custom_mutate)

    # Create an individual with the genes
    toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.G_gene, toolbox.n_gene, toolbox.indicator_gene, toolbox.lookback_gene, toolbox.scaling_gene), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def objective(individual):
        G, n, indicator_type, lookback, scaling_factor = individual[0]*grid_params[2], individual[1]*position_params[2], individual[2], individual[3], individual[4]*scaling_factor_params[2]

        max_loss, R_PNL, profit, _ = run_strategy_optimised(train_data, G, n, indicator_type = indicator_type, lookback = lookback, indicator_scale = scaling_factor)
    
        constraints = [
            max_loss < -500e3
        ]

        if any(constraints):
            return float('-inf'),  # Return large negative value when constraints are not satisfied
        return profit,

    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("select", tools.selTournament, tournsize=3)   
    toolbox.register("evaluate", objective)

    population = toolbox.population(n=npop)

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    #initiate early stopping
    stagnant_generations = 0  # Counter for generations without improvement
    MAX_STAGNANT_GEN = optimization_params[2]  # Early stopping criterion: stop if no improvement over x generations
    best_fitness_so_far = float('-inf')  # since we're maximizing
    ##
    for gen in range(ngen):
        offspring = toolbox.select(population, len(population))
        offspring = list(map(toolbox.clone, offspring))
        
        # Crossover
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        
        # Mutation
        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        fitnesses = list(map(toolbox.evaluate, offspring))
        for ind, fit in zip(offspring, fitnesses):
            ind.fitness.values = fit

        #early stopping
        current_best_fitness = max(ind.fitness.values[0] for ind in population)

        if current_best_fitness > best_fitness_so_far:
            best_fitness_so_far = current_best_fitness
            stagnant_generations = 0  # Reset counter
        else:
            stagnant_generations += 1

        if stagnant_generations >= MAX_STAGNANT_GEN:
            print(f"Early stopping on generation {gen} due to no improvement.")
            break
        ##
        population[:] = offspring

    best_ind = tools.selBest(population, 1)[0]
    optimal_g = np.round(best_ind[0]*grid_params[2],5)
    optimal_n = best_ind[1]*position_params[2]
    optimal_type = best_ind[2]
    optimal_l = best_ind[3]
    optimal_scaling = np.round(best_ind[4] *scaling_factor_params[2],5)

    print("optimisation completed")
    max_loss, R_PNL,profit,std  = run_strategy_optimised(test_data, optimal_g,optimal_n,indicator_type = optimal_type,lookback = optimal_l,indicator_scale=optimal_scaling)
    return max_loss, R_PNL,profit,[optimal_g,optimal_n,optimal_type,optimal_l,optimal_scaling]

In [None]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
I_type = ['v','a']
lookback_params = [9,20,30,50,100,150,200]
scaling_factor_params = [1,3,0.1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_I_type_params = len(I_type)
n_lookback_params = len(lookback_params)
n_scaling_factor_params = ((scaling_factor_params[1]-scaling_factor_params[0])/scaling_factor_params[2])
print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of I_type params:-',(n_I_type_params))
print('number of lookback params:-',(n_lookback_params))
print('number of scaling_factor params:-',(n_scaling_factor_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_I_type_params*n_lookback_params*n_scaling_factor_params))


# Adjust these parameter according to search space
n_trials = 50 #NGEN
npop = 100
early_stopping_gen = n_trials # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters2 = [grid_params,lot_params,I_type,lookback_params,scaling_factor_params]

results2 = walk_forward_analysis('jan 2018','jan 2023',1,parameters2,optimization_function=deap_optimiser_indicator,optimizer_params=optimizer_param,lookback_in_months=3,evaluation_period=1)
results2

In [6]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
I_type = ['v','a']
lookback_params = [9,20,30,50,100,150,200]
scaling_factor_params = [1,3,0.1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_I_type_params = len(I_type)
n_lookback_params = len(lookback_params)
n_scaling_factor_params = ((scaling_factor_params[1]-scaling_factor_params[0])/scaling_factor_params[2])
print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of I_type params:-',(n_I_type_params))
print('number of lookback params:-',(n_lookback_params))
print('number of scaling_factor params:-',(n_scaling_factor_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_I_type_params*n_lookback_params*n_scaling_factor_params))


# Adjust these parameter according to search space
n_trials = 50 #NGEN
npop = 100
early_stopping_gen = n_trials # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters2 = [grid_params,lot_params,I_type,lookback_params,scaling_factor_params]

results2 = walk_forward_analysis('jan 2018','jan 2023',1,parameters2,optimization_function=deap_optimiser_indicator,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results2

number of grid params:- 18.0
number of lot params:- 19.0
number of I_type params:- 2
number of lookback params:- 7
number of scaling_factor params:- 20.0
total_number_of_combinations:- 95760.0
Data gathered for training period:  01 Jan 2017 31 Dec 2017
Data gathered for testing period:  01 Jan 2018 31 Jan 2018
optimisation completed
Optimal parameters are:  [0.007, 2000000, 'v', 20, 2.2]
Max loss,R_PNL,profit are:  -211292.6829 -179657.3888 -182852.6829
Data gathered for training period:  01 Feb 2017 31 Jan 2018
Data gathered for testing period:  01 Feb 2018 28 Feb 2018
optimisation completed
Optimal parameters are:  [0.0065, 1400000, 'v', 9, 2.8]
Max loss,R_PNL,profit are:  -63700.0 35685.0 9128.0
Data gathered for training period:  01 Mar 2017 28 Feb 2018
Data gathered for testing period:  01 Mar 2018 31 Mar 2018
optimisation completed
Optimal parameters are:  [0.0065, 1700000, 'v', 9, 2.7]
Max loss,R_PNL,profit are:  -20995.0 77452.0 77452.0
Data gathered for training period:  01 Ap

KeyboardInterrupt: 

## Group 3 Optimization with std

In [17]:
def deap_optimiser_indicator_std(train_data, test_data, parameters, optimization_params):
    ngen = optimization_params[0]  # number of generations
    npop = optimization_params[1]  # number of population

    error_check(parameters,5)
    
    grid_params = parameters[0]
    position_params = parameters[1]
    indicator_type_params = parameters[2]
    lookback_params = parameters[3]
    scaling_factor_params = parameters[4]

    creator.create("FitnessMax", base.Fitness, weights=(1.0,)) #maximizing
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()

    # Define the genes for our individual
    toolbox.register("G_gene", random.randint, grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
    toolbox.register("n_gene", random.randint, position_params[0]//position_params[2], position_params[1]//position_params[2])
    toolbox.register("indicator_gene", random.choice, indicator_type_params)
    toolbox.register("lookback_gene", random.choice, lookback_params)
    toolbox.register("scaling_gene", random.randint, scaling_factor_params[0]//scaling_factor_params[2], scaling_factor_params[1]//scaling_factor_params[2])

    CXPB, MUTPB = 0.5, 0.2

    def custom_mutate(individual): 
        if random.random() < MUTPB:
            individual[0] = random.randint(grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
        if random.random() < MUTPB:
            individual[1] = random.randint(position_params[0]//position_params[2], position_params[1]//position_params[2])
        if random.random() < MUTPB:
           individual[2] = random.choice(indicator_type_params)
        if random.random() < MUTPB:
            individual[3] = random.choice(lookback_params)
        if random.random() < MUTPB:
            individual[4] = random.randint(scaling_factor_params[0]//scaling_factor_params[2], scaling_factor_params[1]//scaling_factor_params[2])

        return individual,

    toolbox.register("mutate", custom_mutate)

    # Create an individual with the genes
    toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.G_gene, toolbox.n_gene, toolbox.indicator_gene, toolbox.lookback_gene, toolbox.scaling_gene), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def objective(individual):
        G, n, indicator_type, lookback, scaling_factor = individual[0]*grid_params[2], individual[1]*position_params[2], individual[2], individual[3], individual[4]*scaling_factor_params[2]

        max_loss, R_PNL, profit, std = run_strategy_optimised(train_data, G, n, indicator_type = indicator_type, lookback = lookback, indicator_scale = scaling_factor)
    
        constraints = [
            max_loss < -500e3
        ]

        if any(constraints):
            return float('-inf'),  # Return large negative value when constraints are not satisfied
        return profit/std,

    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("select", tools.selTournament, tournsize=3)   
    toolbox.register("evaluate", objective)

    population = toolbox.population(n=npop)

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    #initiate early stopping
    stagnant_generations = 0  # Counter for generations without improvement
    MAX_STAGNANT_GEN = optimization_params[2]  # Early stopping criterion: stop if no improvement over x generations
    best_fitness_so_far = float('-inf')  # since we're maximizing
    ##
    for gen in range(ngen):
        offspring = toolbox.select(population, len(population))
        offspring = list(map(toolbox.clone, offspring))
        
        # Crossover
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        
        # Mutation
        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        fitnesses = list(map(toolbox.evaluate, offspring))
        for ind, fit in zip(offspring, fitnesses):
            ind.fitness.values = fit

        #early stopping
        current_best_fitness = max(ind.fitness.values[0] for ind in population)

        if current_best_fitness > best_fitness_so_far:
            best_fitness_so_far = current_best_fitness
            stagnant_generations = 0  # Reset counter
        else:
            stagnant_generations += 1

        if stagnant_generations >= MAX_STAGNANT_GEN:
            print(f"Early stopping on generation {gen} due to no improvement.")
            break
        ##
        population[:] = offspring

    best_ind = tools.selBest(population, 1)[0]
    optimal_g = np.round(best_ind[0]*grid_params[2],5)
    optimal_n = best_ind[1]*position_params[2]
    optimal_type = best_ind[2]
    optimal_l = best_ind[3]
    optimal_scaling = np.round(best_ind[4] *scaling_factor_params[2],5)

    print("optimisation completed")
    max_loss, R_PNL,profit,std  = run_strategy_optimised(test_data, optimal_g,optimal_n,indicator_type = optimal_type,lookback = optimal_l,indicator_scale=optimal_scaling)
    return max_loss, R_PNL,profit,[optimal_g,optimal_n,optimal_type,optimal_l,optimal_scaling]

In [None]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
I_type = ['v','a']
lookback_params = [9,20,30,50,100,150,200]
scaling_factor_params = [1,3,0.1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_I_type_params = len(I_type)
n_lookback_params = len(lookback_params)
n_scaling_factor_params = ((scaling_factor_params[1]-scaling_factor_params[0])/scaling_factor_params[2])
print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of I_type params:-',(n_I_type_params))
print('number of lookback params:-',(n_lookback_params))
print('number of scaling_factor params:-',(n_scaling_factor_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_I_type_params*n_lookback_params*n_scaling_factor_params))


# Adjust these parameter according to search space
n_trials = 50 #NGEN
npop = 100
early_stopping_gen = n_trials # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters2 = [grid_params,lot_params,I_type,lookback_params,scaling_factor_params]

results2 = walk_forward_analysis('jan 2018','jan 2023',1,parameters2,optimization_function=deap_optimiser_indicator_std,optimizer_params=optimizer_param,lookback_in_months=3,evaluation_period=1)
results2

In [None]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
I_type = ['v','a']
lookback_params = [9,20,30,50,100,150,200]
scaling_factor_params = [1,3,0.1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_I_type_params = len(I_type)
n_lookback_params = len(lookback_params)
n_scaling_factor_params = ((scaling_factor_params[1]-scaling_factor_params[0])/scaling_factor_params[2])
print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of I_type params:-',(n_I_type_params))
print('number of lookback params:-',(n_lookback_params))
print('number of scaling_factor params:-',(n_scaling_factor_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_I_type_params*n_lookback_params*n_scaling_factor_params))


# Adjust these parameter according to search space
n_trials = 50 #NGEN
npop = 100
early_stopping_gen = n_trials # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters2 = [grid_params,lot_params,I_type,lookback_params,scaling_factor_params]

results3 = walk_forward_analysis('jan 2018','jan 2023',1,parameters2,optimization_function=deap_optimiser_indicator_std,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results3

# Rollover (Re-initialising for group 3)

In [29]:
def deap_optimiser_indicator_rollover(train_start,train_end, test_start,test_end, parameters, optimization_params, position_turnover=1):
    train_pairs = get_date_pairs_(train_start, train_end, interval = position_turnover)
    test_pairs = get_date_pairs_(test_start, test_end, interval = position_turnover)
    
    
    ngen = optimization_params[0]  # number of generations
    npop = optimization_params[1]  # number of population

    error_check(parameters,5)
    
    grid_params = parameters[0]
    position_params = parameters[1]
    indicator_type_params = parameters[2]
    lookback_params = parameters[3]
    scaling_factor_params = parameters[4]

    creator.create("FitnessMax", base.Fitness, weights=(1.0,)) #maximizing
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()

    # Define the genes for our individual
    toolbox.register("G_gene", random.randint, grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
    toolbox.register("n_gene", random.randint, position_params[0]//position_params[2], position_params[1]//position_params[2])
    toolbox.register("indicator_gene", random.choice, indicator_type_params)
    toolbox.register("lookback_gene", random.choice, lookback_params)
    toolbox.register("scaling_gene", random.randint, scaling_factor_params[0]//scaling_factor_params[2], scaling_factor_params[1]//scaling_factor_params[2])

    CXPB, MUTPB = 0.5, 0.2

    def custom_mutate(individual): 
        if random.random() < MUTPB:
            individual[0] = random.randint(grid_params[0]//grid_params[2], grid_params[1]//grid_params[2])
        if random.random() < MUTPB:
            individual[1] = random.randint(position_params[0]//position_params[2], position_params[1]//position_params[2])
        if random.random() < MUTPB:
           individual[2] = random.choice(indicator_type_params)
        if random.random() < MUTPB:
            individual[3] = random.choice(lookback_params)
        if random.random() < MUTPB:
            individual[4] = random.randint(scaling_factor_params[0]//scaling_factor_params[2], scaling_factor_params[1]//scaling_factor_params[2])

        return individual,

    toolbox.register("mutate", custom_mutate)

    # Create an individual with the genes
    toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.G_gene, toolbox.n_gene, toolbox.indicator_gene, toolbox.lookback_gene, toolbox.scaling_gene), n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def objective(individual):
        G, n, indicator_type, lookback, scaling_factor = individual[0]*grid_params[2], individual[1]*position_params[2], individual[2], individual[3], individual[4]*scaling_factor_params[2]
        profit = 0
        for pair in train_pairs:
            train_data = data_gather_from_files(pair[0],pair[1])['EURUSD.mid']
            max_loss, R_PNL, month_profit, _ = run_strategy_optimised(train_data, G, n, indicator_type = indicator_type, lookback = lookback, indicator_scale = scaling_factor)
            constraints = [
                max_loss < -500e3
            ]
            if any(constraints):
                profit += -np.inf # Return large negative value when constraints are not satisfied
            else:
                profit += month_profit
        return profit,    

    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("select", tools.selTournament, tournsize=3)   
    toolbox.register("evaluate", objective)

    population = toolbox.population(n=npop)

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit

    #initiate early stopping
    stagnant_generations = 0  # Counter for generations without improvement
    MAX_STAGNANT_GEN = optimization_params[2]  # Early stopping criterion: stop if no improvement over x generations
    best_fitness_so_far = float('-inf')  # since we're maximizing
    ##
    for gen in range(ngen):
        offspring = toolbox.select(population, len(population))
        offspring = list(map(toolbox.clone, offspring))
        
        # Crossover
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        
        # Mutation
        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        fitnesses = list(map(toolbox.evaluate, offspring))
        for ind, fit in zip(offspring, fitnesses):
            ind.fitness.values = fit

        #early stopping
        current_best_fitness = max(ind.fitness.values[0] for ind in population)

        if current_best_fitness > best_fitness_so_far:
            best_fitness_so_far = current_best_fitness
            stagnant_generations = 0  # Reset counter
        else:
            stagnant_generations += 1

        if stagnant_generations >= MAX_STAGNANT_GEN:
            print(f"Early stopping on generation {gen} due to no improvement.")
            break
        ##
        population[:] = offspring

    best_ind = tools.selBest(population, 1)[0]
    optimal_g = np.round(best_ind[0]*grid_params[2],5)
    optimal_n = best_ind[1]*position_params[2]
    optimal_type = best_ind[2]
    optimal_l = best_ind[3]
    optimal_scaling = np.round(best_ind[4] *scaling_factor_params[2],5)

    train_df = {}
    test_df = {}
    
    for pair in train_pairs:
        tick_data = data_gather_from_files(pair[0],pair[1])['EURUSD.mid']
        max_loss, R_PNL,profit, _ = run_strategy_optimised(tick_data, optimal_g, optimal_n, indicator_type = optimal_type, lookback = optimal_l, indicator_scale = optimal_scaling)
        train_df[pair[0] +'-'+ pair[1]] = [max_loss, R_PNL,profit,optimal_g,optimal_n,optimal_type,optimal_l,optimal_scaling]
    print("optimisation completed")

    for pair in test_pairs:
        tick_data = data_gather_from_files(pair[0],pair[1])['EURUSD.mid']
        max_loss, R_PNL,profit, _ = run_strategy_optimised(tick_data, optimal_g, optimal_n, indicator_type = optimal_type, lookback = optimal_l, indicator_scale = optimal_scaling)
        test_df[pair[0] +'-'+ pair[1]] = [max_loss, R_PNL,profit,optimal_g,optimal_n,optimal_type,optimal_l,optimal_scaling]
    
    train_df = pd.DataFrame(train_df).T
    train_df.columns = ['max_loss', 'R_PNL','profit','optimal_g','optimal_n','optimal_type','optimal_l','optimal_scaling']
    
    test_df = pd.DataFrame(test_df).T
    test_df.columns = ['max_loss', 'R_PNL','profit','optimal_g','optimal_n','optimal_type','optimal_l','optimal_scaling']
    
    return [optimal_g,optimal_n,optimal_type,optimal_l,optimal_scaling],train_df,test_df

In [38]:
grid_params = [0.001,0.01,0.0005]
lot_params = [100000,2000000,100000]
I_type = ['v','a']
lookback_params = [9,20,30,50,100,150,200]
scaling_factor_params = [1,3,0.1]

n_grid_params = ((grid_params[1]-grid_params[0])/grid_params[2])
n_lot_params = ((lot_params[1]-lot_params[0])/lot_params[2])
n_I_type_params = len(I_type)
n_lookback_params = len(lookback_params)
n_scaling_factor_params = ((scaling_factor_params[1]-scaling_factor_params[0])/scaling_factor_params[2])
print('number of grid params:-',(n_grid_params))
print('number of lot params:-',(n_lot_params))
print('number of I_type params:-',(n_I_type_params))
print('number of lookback params:-',(n_lookback_params))
print('number of scaling_factor params:-',(n_scaling_factor_params))
print('total_number_of_combinations:-',(n_grid_params*n_lot_params*n_I_type_params*n_lookback_params*n_scaling_factor_params))


# Adjust these parameter according to search space
n_trials = 50 #NGEN
npop = 100
early_stopping_gen = n_trials # no early stopping
optimizer_param = [n_trials, npop, early_stopping_gen]

parameters2 = [grid_params,lot_params,I_type,lookback_params,scaling_factor_params]

results5 = walk_forward_analysis_rollover('jan 2018','jan 2023',1,parameters2,optimization_function=deap_optimiser_indicator_rollover,optimizer_params=optimizer_param,lookback_in_months=12,evaluation_period=1)
results5

number of grid params:- 18.0
number of lot params:- 19.0
number of I_type params:- 2
number of lookback params:- 7
number of scaling_factor params:- 20.0
total_number_of_combinations:- 95760.0
Data gathered for testing period:  01 Jan 2018 31 Jan 2018
optimisation completed
Optimal parameters are:  [0.006, 2000000, 'v', 9, 2.4]
Max loss,profit are:  -109200.0 -35780.0
Data gathered for testing period:  01 Feb 2018 28 Feb 2018
