In [12]:
import numpy as np
import pandas as pd
import EA_funcs as ea

## Portfolio generating mechanism:
See `random_search()` in the `algorithms.py` file for the source code.
Chromosomes of an adequate length are randomly generated and normalized so that all its elements (genes) sum to 1. Each gene represents the weight of the investment for its respective asset. Then follows a check for the constraints on maximum risk and maximum weight per asset. If these constraintsa are satisfied, the chromosome is added to the population, otherwise a new chromosome is generated. This process is repeated until the specified population size is reached. 

After the chromosomes are generated, the expected return can be obtained by performing its dot product with the vector of anual expected returns for the assets corresponding to the covariance matrix used to compute the risk during population creation.

The next sections explore the returns and risks obtained by this method for different sets of assets.

To simplify evaluation, we define the following functions:

In [143]:
# The expected returns for the portfolio
def expected_returns(cromosome, returns):
    return np.sum(cromosome * returns)

# And the risk associated, expressed as a %
def risk(chromosome, cov_matrix):
    return 100 * np.sqrt(252 * chromosome.T @ cov_matrix @ chromosome)

## Totally random search

Here we generate portfolios out of the whole search space, where all 402 assets are available. 

In [145]:
population = ea.random_search(ea.covariance_matrix)

for i in range(5):
    cromosome = population[i]
    fit = expected_returns(cromosome, ea.anual_returns.values)
    print(f"Expected return from portfolio #{i+1}: {fit}")

Expected return from portfolio #1: 0.08785280081298498
Expected return from portfolio #2: 0.08205191450772095
Expected return from portfolio #3: 0.07693205793962873
Expected return from portfolio #4: 0.1005246121034525
Expected return from portfolio #5: 0.09556933491283136


In [147]:
fitness_vals = np.zeros(len(population))
for i, chromosome in enumerate(population):
    fitness_vals[i] = expected_returns(chromosome, ea.anual_returns)

best = population[np.argmax(fitness_vals)]
print(f'best fitness in population: {round(fitness_vals.max(), 6)}')
print(f'with a risk of {round(risk(best, ea.covariance_matrix), 3)}%')

best fitness in population: 0.154614
with a risk of 12.334%


## Semi-random search

In [71]:
random_ticks = np.random.choice(ea.tickers_NC, 15)
random_ticks = np.append(tickers, np.random.choice(ea.tickers_IC, 16))
random_ticks = np.append(tickers, np.random.choice(ea.tickers_F, 6))
random_ticks = np.append(tickers, ea.tickers_BONDDIA)
random_ticks = np.append(tickers, ea.tickers_CETES)
random_ticks = np.append(tickers, ea.tickers_ENERFIN)

random_ticks.shape

(41,)

In [73]:
random_anual_returns = ea.anual_returns[random_ticks]
random_daily_returns = ea.daily_returns[random_ticks]
random_closing_prices = ea.all_data[random_ticks]

random_cov_matrix = random_daily_returns.cov()

population = ea.random_search(random_cov_matrix)

fitness_vals = np.zeros(len(population))
for i, chromosome in enumerate(population):
    fitness_vals[i] = expected_returns(chromosome, random_anual_returns)

best = population[np.argmax(fitness_vals)]

print(f'best fitness in population: {fitness_vals.max()}')
print(f'risk: {round(risk(best, random_cov_matrix), 3)}%')

best fitness in population: 0.235664195268253
risk: 12.169%


## Deterministic search

In [74]:
print("National")
national = ea.returns_to_risk_ratio(ea.tickers_NC, 15, display=True)

print("\nInternational")
international = ea.returns_to_risk_ratio(ea.tickers_IC, 16, display=True)

print("\nFIBRAS")
fibras = ea.returns_to_risk_ratio(ea.tickers_F, 6, display=True)

print("\nBONDDIA")
bonddia = ea.returns_to_risk_ratio(ea.tickers_BONDDIA, 1)

print("\nCETES")
cetes = ea.returns_to_risk_ratio(ea.tickers_CETES, 1)

print("\nENERFIN")
enerfin = ea.returns_to_risk_ratio(ea.tickers_ENERFIN, 1)

National
               return/risk  anual return
CHDRAUIB.MX      30.533351      0.502305
LAMOSA.MX        29.922172      0.418857
VISTAA.MX        27.913815      0.914456
BAFARB.MX        24.591972      0.387490
GPROFUT.MX       23.814817      0.294088
PSOTRAC15.MX     22.536592      0.166068
BBAJIOO.MX       22.288682      0.418751
FRAGUAB.MX       20.984662      0.356001
ALSEA.MX         20.428416      0.382220
AC.MX            19.126087      0.256323
LIVEPOL1.MX      18.766512      0.263872
GENTERA.MX       18.669513      0.428172
LIVEPOLC-1.MX    18.606501      0.282443
CIEB.MX          18.532940      0.549310
CMOCTEZ.MX       17.235585      0.150320

International
          return/risk  anual return
LLY.MX      24.500471      0.465206
NVDA.MX     20.369435      0.649226
DELLC.MX    18.615024      0.532571
STLD.MX     17.773125      0.425833
ORLY.MX     17.257585      0.247057
XLE.MX      16.726604      0.317184
COP.MX      16.105865      0.379874
XOM.MX      15.578893      0.324

In [75]:
top_ticks = np.concatenate([national, international, 
                                 fibras, bonddia, cetes, enerfin])

top_anual_returns = ea.anual_returns[top_ticks]
top_daily_returns = ea.daily_returns[top_ticks]
top_closing_prices = ea.all_data[top_ticks]

In [76]:
top_cov_matrix = top_daily_returns.cov()

population = ea.random_search(top_cov_matrix)

fitness_vals = np.zeros(len(population))
for i, chromosome in enumerate(population):
    fitness_vals[i] = expected_returns(chromosome, top_anual_returns)

best = population[np.argmax(fitness_vals)]

print(f'best fitness in population: {fitness_vals.max()}')
print(f'risk: {round(risk(best, top_cov_matrix), 3)}%')

best fitness in population: 0.38808675480894966
risk: 9.861%


## Benchmark: ETF (VMEX)

In [138]:
start = '2021-01-01'
end = '2024-04-05'

benchmark = pd.read_csv("../data/benchmark.csv")
ticks = benchmark["Tickers"].to_list()

benchmark_closing_prices = ea.get_historical_data(ticks, start, end)

benchmark_daily_returns= benchmark_closing_prices.pct_change().dropna()
benchmark_anual_returns = benchmark_daily_returns.sum() / len(daily_returns) * 252

benchmark_cov_matrix = benchmark_daily_returns.cov()

  data = yf.download(tickers, start=start, end=end)['Close']
[*********************100%***********************]  43 of 43 completed

1 Failed download:
['AGUILASCPO.MX']: YFPricesMissingError('possibly delisted; no price data found  (1d 2021-01-01 -> 2024-04-05) (Yahoo error = "Data doesn\'t exist for startDate = 1609480800, endDate = 1712296800")')


In [141]:
population = ea.random_search(benchmark_cov_matrix, num_chromosomes=int(1e6))

fitness_vals = np.zeros(len(population))
for i, chromosome in enumerate(population):
    fitness_vals[i] = expected_returns(chromosome, benchmark_anual_returns)

best = population[np.argmax(fitness_vals)]

print(f'best fitness in population: {fitness_vals.max()}')
print(f'risk: {round(risk(best, benchmark_cov_matrix), 3)}%')

best fitness in population: 0.2297214336932775
risk: 13.989%
