In [3]:
import numpy as np
import pandas as pd
import pygad.pygad
import time
from numpy import genfromtxt

# Portfolio selection problem

In [4]:
# Data
r = genfromtxt("data/annualized_average_log_returns.csv", delimiter=",")[:,1]
# TODO: move stock labels to indexes and column names
r = np.reshape(r, (1,len(r)))
V = genfromtxt("data/cov_matrix.csv", delimiter=',')
pigre = 0.049149

In [9]:
V.shape

(472, 472)

In [3]:
runs = 10
numberOfVariables = 3                       ## To insert runs = 10                                   ## To insert
results = np.zeros((runs, 9))

## True solution

V1 = np.linalg.inv(V)
e = np.ones((1, numberOfVariables))
alfa = np.dot(np.dot(r, V1), r.T)
beta = np.dot(np.dot(r, V1), e.T)
gamma = np.dot(np.dot(e, V1), e.T)

numerator = (gamma * np.dot(V1, r.T) - beta * np.dot(V1, e.T)) * pigre + (alfa * np.dot(V1, e.T) - beta * np.dot(V1, r.T))
denominator = alfa * gamma - beta**2

true_solution = numerator/denominator 

In [4]:
print(true_solution)

[[0.16142742]
 [0.72045951]
 [0.11811307]]


In [19]:
## Fitess minimization
def fitness_function(ga_instance, solution, solution_index):

    out = (0.000748044*(solution[0]**2) +
           0.000841507*(solution[1]**2) +
           0.000707590*(solution[2]**2) +
           2*(-0.000238393)*solution[0]*solution[1] +
        2*(-0.000234123)*solution[0]*solution[2] +
        2*0.000053341*solution[1]*solution[2] +
        (1/1)*(abs(0.046492*solution[0] + 0.050035*solution[1] + 0.047376*solution[2] - 0.049149) +
        abs(solution[0] + solution[1] + solution[2] - 1)) +
           max(-solution[0], 0) +
           max(-solution[1], 0) +
           max(-solution[2], 0)
           )

    fitness = 1.0 / (np.abs(out - pigre) + 0.0000001)

    return fitness


In [20]:
num_generations = 200
num_parents_mating = 100
sol_per_pop = 200
num_genes = 3
gene_type = float
parent_selection_type = "rws"
keep_elitism= 100
crossover_type = "single_point"
crossover_probability = 0.05
mutation_type = "random"
mutation_probability = 0.01
save_solutions = True
save_best_solutions = True
stop_criteria = "saturate_10"    # stop the run() if fitness not change for 10 consecutive generations
parallel_processing = 4
suppress_warnings = True

In [33]:

for i in range(runs):

    ga_instance = pygad.GA(fitness_func=fitness_function,
                           num_generations=num_generations,
                           num_parents_mating=num_parents_mating,
                           sol_per_pop=sol_per_pop,
                           num_genes=num_genes,
                           gene_type=gene_type,
                           parent_selection_type=parent_selection_type,
                           crossover_type=crossover_type,
                           crossover_probability=crossover_probability,
                           mutation_type=mutation_type,
                           mutation_probability=mutation_probability,
                           save_solutions=save_solutions,
                           save_best_solutions=save_best_solutions,
                           #stop_criteria=stop_criteria,
                           parallel_processing=parallel_processing,
                           suppress_warnings=suppress_warnings)
    
    # Execution time for algorithm run
    start = time.time()
    ga_instance.run()
    stop = time.time()
    
    solution, solution_fitness, solution_idx = ga_instance.best_solution()
    results[i, 0] = solution[0]
    results[i, 1] = solution[1]
    results[i, 2] = solution[2]
    results[i, 3] = solution_fitness
    results[i, 4] = np.dot(np.dot(solution, V), solution.T)
    results[i, 5] = np.dot(r, solution)[0] - pigre
    results[i, 6] = np.sum(solution) - 1
    results[i, 7] = ga_instance.best_solution_generation if ga_instance.best_solution_generation != -1 else None
    results[i, 8] = stop-start
    
    # Save algorithm

    filename = f'runs/genetic_{i}'
    ga_instance.save(filename=filename)


In [34]:
results = pd.DataFrame(results, columns=['Asset1',
                                         'Asset2',
                                         'Asset3',
                                         'Solution fitness',
                                         'x*V*x',
                                         'Return error',
                                         'Capital not invested',
                                         'Best solution generation',
                                         'Run time']
                       )

results#.sort_values('Solution fitness', ascending=False)

Unnamed: 0,Asset1,Asset2,Asset3,Solution fitness,x*V*x,Return error,Capital not invested,Best solution generation,Run time
0,0.412971,0.364318,0.176581,2005.447969,0.000162,-0.003355,-0.04613,108.0,25.155219
1,-0.010866,1.048684,-0.037246,660.195163,0.000928,0.001052,0.000572,88.0,17.509573
2,0.689357,0.137634,0.221131,2125.847516,0.000293,0.000263,0.048123,144.0,18.303636
3,0.472118,0.516574,-0.018145,1063.813874,0.000278,-0.002212,-0.029453,108.0,17.698849
4,0.055968,0.990528,-0.020954,5901.691513,0.0008,0.002021,0.025543,198.0,27.417838
5,0.178302,0.430453,0.440106,664.778062,0.000264,0.001529,0.048861,32.0,8.979712
6,0.000522,0.24618,0.799955,1142.143712,0.000525,0.001092,0.046657,67.0,14.926695
7,0.349797,-0.02879,0.663056,826.163215,0.000297,-0.002914,-0.015937,7.0,3.624997
8,0.001581,0.003123,0.951451,1219.4444,0.00064,-0.003843,-0.043846,110.0,30.262758
9,0.013009,-0.011619,0.96641,749.337313,0.000654,-0.003341,-0.0322,20.0,7.358442


## Load runs

In [35]:
import os
current_dir = os.getcwd()
run_dir = os.listdir(current_dir + "/runs")
run_dir.sort()

gas_trained = []

for file in run_dir:
    if file.startswith("run_"):
        continue
    file_name, file_extension = os.path.splitext(file)
    #file_path = os.path.join(current_dir, "runs", file)
    globals()["run_" + file_name] = pygad.load(f"runs/{file_name}")
    gas_trained.append(f"run_{file_name}")

## Plot fitness of runs

In [None]:
n = 0
for variable in gas_trained:
 
    eval(variable).plot_fitness(title=f"Run {n}",
                           save_dir=f"graphs/run_fitness_{n}")
    n += 1

## Plot genes behaviour

In [None]:
n = 0
for variable in gas_trained:
 
    eval(variable).plot_genes(title=f"Run {n}",
                           save_dir=f"graphs/run_genes_{n}")
    n += 1

## Plot solution rate

In [None]:
n = 0
for variable in gas_trained:
 
    eval(variable).plot_new_solution_rate(title=f"Run {n}",
                           save_dir=f"graphs/run_solution_rate_{n}")
    n += 1