In [2]:
import numpy as np
import random
import pandas as pd
import plotly_express as px
import plotly.graph_objects as go

In [3]:
class GenAlg:

    def __init__(self, population_size, k_crossover, mutation_prob, rates, sigma):
        self.population_size = population_size
        self.k = k_crossover
        self.mutation_prob = mutation_prob
        self.number_of_stocks = len(rates)

        self.rates = rates.copy()
        self.sigma = sigma.copy()

        self.random_pop()
    


    def random_pop(self):
        self.population = np.random.rand(self.population_size,self.number_of_stocks)
        for  i in range(self.population.shape[0]):
            self.population[i,] = self.normalize(self.population[i,])
    
    def normalize(self,vector):
        sum_of_vector = sum(vector)
        for i in range(len(vector)):
            vector[i] =  vector[i]/sum_of_vector
        return vector
    
    def evaluate(self):
        best_fitness = None
        best = None
        for vector in self.population:
            f = self.fitness(vector)
            if best is None or f > best_fitness:
                best = vector
                best_fitness = f
        return best_fitness,best

    def fitness(self,vector):
        rate = np.sum(vector * self.rates)
        volatility = np.sqrt(np.dot(vector.T,np.dot(self.sigma,vector)))

        return rate/volatility

    def get_lucky_one(self):
        random_num = random.randrange(len(self.population))
        return self.population[random_num,:]
    
    def select_parents(self,lucky_one):
        # if lucky_one exists then remove him
        if lucky_one is not None:
            pop = np.ones(shape=(self.population_size-1,self.number_of_stocks))
            i = 0
            for vec in self.population:
                if not (vec == lucky_one).all():
                    pop[i,:] = vec
                    i+=1
        else:
            pop = self.population.copy()
        
        new_pop = np.array(sorted(pop,key=self.fitness,reverse=True))

        return new_pop[:len(new_pop)//2,:]

    def create_children(self,parents):
        
        children = np.zeros(shape=(self.population_size//2,self.number_of_stocks))
        np.random.shuffle(parents)
        first_p, second_p = map(list,np.split(parents,2))
        for i in range(len(parents)//2):
            parent1 = first_p.pop(0)
            parent2 = second_p.pop(0)

            child1, child2 = self.k_crossover(parent1,parent2)

            child1 = self.mutation(child1)
            child2 = self.mutation(child2)

            child1 = self.normalize(child1)
            child2 = self.normalize(child2)

            children[i,:] = child1
            children[i+len(parents)//2,:] = child2

        return children 
    
    def k_crossover(self,x,y):

        slices = set()
        while len(slices) < self.k-1:
            slice_index = random.choice([i for i in range(1,len(x)) if i not in slices])
            if slice_index not in slices:
                slices.add(slice_index)

        slices = sorted(list(slices))
        slices.append(self.number_of_stocks)

        new_x = np.zeros(self.number_of_stocks)
        new_y = np.zeros(self.number_of_stocks)

        first = 0
        switch = True
        for second in slices:
            if switch:
                new_x[first:second] = x[first:second]
                new_y[first:second] = y[first:second]
            else:
                new_x[first:second] = y[first:second]
                new_y[first:second] = x[first:second]
            first = second
            switch = not switch

        return new_x,new_y

    def mutation(self,child):
        for i in range(len(child)):
            if random.random() < self.mutation_prob:
                child[i] += random.gauss(0.25,0.1)
        return child

    def solve(self,max_generations, goal):
        
        best_fit, best = self.evaluate()
        
        for iteration in range(max_generations):
            #print(iteration,'iteration',end=' ')
            if best_fit > goal:
                break

            lucky_one = None
            if len(self.population)%2 == 1:
                lucky_one = self.get_lucky_one()

            parents = self.select_parents(lucky_one)

            children = self.create_children(parents)

            if lucky_one is not None:
                lucky_one = lucky_one.reshape((1,len(lucky_one)))
                self.population = np.concatenate((lucky_one,parents,children),axis=0)
            else:
                self.population = np.concatenate((parents,children),axis=0)

            best_fit, best = self.evaluate()
            #print('best ratio:',best_fit)
        #print('finnished')
        return best,best_fit

In [4]:
wanted_stocks = ['GOOG', 'SPG', 'GOOGL', 'MSFT', 'GD', 'ACN', 'COP', 'F', 'BAC', 'GS',
                'NVDA', 'AIG', 'MS', 'WFC', 'ORCL', 'XOM', 'TGT', 'LOW', 'EXC', 'COST',
                'AXP', 'BK', 'JPM', 'COF', 'CSCO', 'DHR', 'UNH', 'CVS', 'LLY', 'CVX',
                'MET', 'AMT', 'CRM', 'BLK', 'RTX', 'MCD', 'TMO', 'LIN', 'ADBE', 'EMR',
                'USB', 'UPS', 'TSLA', 'PFE', 'PM']
stocks = pd.read_csv('data/sap100_data_08112021.csv',index_col=0).loc[:,wanted_stocks]
returns = stocks/stocks.shift(1)-1
rates = returns.mean() * 252
sigma = returns.cov() * 252
g = GenAlg(population_size=100, k_crossover=5, mutation_prob=0.01, rates=rates,sigma=sigma)

best,best_fit = g.solve(10,8)
print(best,best_fit)
# print(g.population[0,:],'\n',g.population[1,:])

# g.k_crossover(g.population[0,:],g.population[1,:])

# test_pop = np.zeros(shape=(5,45))
# for i in range(5):
#     test_pop[i,i] = 1
# print(test_pop)
# for i in range(5):
#     print(g.fitness(test_pop[i,:]))
# g.population = test_pop
# v = g.get_lucky_one()
# print('lucky one: ',v)
# print('paretns')
# print(g.select_parents(v))

[0.01240208 0.01288231 0.02243559 0.01219072 0.00639835 0.00991161
 0.00015054 0.08133791 0.00565375 0.00683955 0.01348043 0.02974061
 0.008419   0.00428601 0.13255423 0.0083943  0.02853772 0.00361534
 0.02341435 0.00754242 0.01078742 0.00438022 0.00624153 0.00100068
 0.00369092 0.01760884 0.01448718 0.01172493 0.11648622 0.00786122
 0.00060447 0.1186733  0.01508795 0.00819112 0.00595958 0.00366833
 0.14993894 0.00057023 0.02283515 0.0038464  0.01140029 0.0105469
 0.00192438 0.01582426 0.00647273] 4.204527181966938


In [5]:
a = [0.01114155, 0.02112081, 0.0026389 , 0.04158689 ,0.02911615 ,0.00810653,
 0.00953936, 0.01291289, 0.03460458, 0.0172124,  0.02812051, 0.00615008,
 0.03882499, 0.01603352 ,0.03222546, 0.02002055, 0.01096212, 0.00879385,
 0.02504149, 0.02297195, 0.04096784, 0.04078883, 0.00404298 ,0.00336519,
 0.02037814, 0.0395612 , 0.041175,   0.03722034, 0.01283837, 0.02809674,
 0.0232359 , 0.03603028 ,0.04134022, 0.00582411, 0.01888116, 0.03858949,
 0.00809903, 0.03437509, 0.03463708 ,0.00438613 ,0.02915961, 0.01188739,
 0.00890677, 0.02284441, 0.01624411]
max(a)

0.04158689

In [6]:
b = [0.00022345, 0.04564275, 0.04497704, 0.04376311, 0.0033903 , 0.03373989,
 0.04167013,0.04474843, 0.03991494, 0.013077 ,  0.04262159 ,0.01209769,
 0.03196004, 0.01024975, 0.03971374 ,0.01291337, 0.00605673, 0.03494775,
 0.01216985, 0.00211064, 0.00081961, 0.03155706, 0.01703879, 0.02700199,
 0.00440683 ,0.01209849, 0.02187265, 0.03169752, 0.02678447 ,0.00618878,
 0.0080071 , 0.03951205 ,0.01762882, 0.01950138, 0.00031214, 0.02950566,
 0.04465236, 0.01634321 ,0.01039963, 0.00432588, 0.02807762, 0.00894588,
 0.03424738, 0.0192949,  0.02379159]
max(b)

0.04564275

In [7]:
def print_stats(vec):
    w = np.array(vec)
    r = np.sum(w*rates)
    print('rate',r)
    v = np.sqrt(np.dot(w.T,np.dot(sigma,w)))
    print('volatility:',v)
    print('shapre ratio',r/v)

In [8]:
print_stats(a)
print()
print_stats(b)

rate 0.4701581146929166
volatility: 0.12546335975815912
shapre ratio 3.747373859580875

rate 0.542997231753761
volatility: 0.1344691863204556
shapre ratio 4.038079255270691


In [9]:
import statistics

In [10]:
m_tri = []
m_jeden = []
for p in range(10, 25, 3):
    tri, jeden  = [], []
    for _ in range(100):
        g = GenAlg(population_size=100, k_crossover=5, mutation_prob=0.01, rates=rates,sigma=sigma)
        best,best_fit = g.solve(p,8)
        menej_ako_jedno_percenta = [i for i in best if i < 0.01]
        menej_ako_tri_percenta = [i for i in best if i < 0.03]
        tri.append(len(menej_ako_tri_percenta))
        jeden.append(len(menej_ako_jedno_percenta))
    m_tri.append(tri)
    m_jeden.append(jeden)
print([i for i in range(10,25,3)])
print(m_tri)
print(m_jeden)
'''
print(best,best_fit)
print('menej ako 1%', len(menej_ako_jedno_percenta))
print('menej ako 3%', len(menej_ako_tri_percenta))
'''


[10, 13, 16, 19, 22]
[[39, 37, 37, 34, 33, 37, 33, 37, 34, 34, 37, 37, 36, 36, 39, 34, 34, 30, 36, 35, 36, 38, 39, 39, 36, 40, 34, 40, 36, 40, 35, 35, 31, 35, 30, 34, 33, 31, 40, 36, 35, 29, 37, 29, 34, 34, 34, 33, 37, 39, 41, 38, 36, 42, 32, 41, 33, 34, 36, 36, 41, 35, 32, 36, 33, 40, 30, 27, 36, 41, 34, 35, 39, 39, 33, 40, 32, 35, 31, 38, 31, 33, 34, 31, 37, 36, 39, 36, 33, 36, 32, 33, 39, 37, 36, 36, 28, 29, 38, 38], [32, 36, 37, 35, 35, 40, 32, 33, 40, 33, 33, 38, 29, 35, 35, 35, 40, 37, 37, 31, 27, 32, 39, 37, 38, 39, 32, 39, 36, 35, 37, 39, 37, 38, 33, 30, 36, 33, 39, 35, 31, 35, 39, 38, 36, 32, 37, 32, 38, 34, 38, 40, 38, 33, 37, 38, 34, 40, 31, 33, 32, 38, 37, 33, 38, 34, 31, 30, 30, 38, 39, 37, 35, 31, 37, 37, 36, 34, 36, 37, 39, 37, 31, 33, 35, 31, 39, 34, 36, 36, 38, 31, 33, 33, 31, 35, 35, 35, 41, 35], [38, 37, 36, 39, 39, 32, 36, 36, 37, 36, 38, 39, 37, 40, 34, 35, 36, 36, 32, 33, 38, 38, 36, 35, 36, 35, 39, 34, 38, 38, 34, 37, 38, 40, 35, 37, 32, 35, 38, 38, 32, 40, 36, 3

"\nprint(best,best_fit)\nprint('menej ako 1%', len(menej_ako_jedno_percenta))\nprint('menej ako 3%', len(menej_ako_tri_percenta))\n"

In [11]:
df_menej_jeden = pd.DataFrame(m_jeden)
df_menej_jeden['index'] = [i for i in range(10,25,3)]
df_menej_jeden  = df_menej_jeden.set_index('index')
df_menej_jeden = df_menej_jeden.reset_index()
df_menej_jeden

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,90,91,92,93,94,95,96,97,98,99
0,10,19,15,15,10,16,16,15,15,17,...,14,15,21,17,20,18,12,12,15,11
1,13,15,16,13,15,12,17,19,14,15,...,24,14,16,15,16,13,15,20,14,16
2,16,16,16,14,17,16,19,18,18,20,...,21,20,16,17,14,15,13,16,16,14
3,19,21,18,13,14,19,22,17,18,20,...,19,15,23,20,19,17,15,17,14,19
4,22,17,19,22,21,19,17,17,17,21,...,18,18,22,19,23,18,14,20,17,20


In [12]:
fig = px.histogram(df_menej_jeden, color = 'index', opacity=0.5, barmode='overlay')
fig.update_traces(xbins_size=1)
fig.show()

In [13]:
the_best = []
menej = []
for k in range(2,25):
    best_fit_list = []
    menej_jedno = []
    for _ in range(50):
        g = GenAlg(population_size=100, k_crossover=k, mutation_prob=0.01, rates=rates,sigma=sigma)
        best,best_fit = g.solve(10,8)
        best_fit_list.append(best_fit)
        menej_ako_jedno_percenta = [i for i in best if i < 0.01]
        menej_jedno.append(len(menej_ako_jedno_percenta))
    the_best.append(statistics.mean(best_fit_list))
    menej.append(statistics.mean(menej_jedno))

print(the_best)
print(menej)


[4.209042186208235, 4.218543159704983, 4.228237516796444, 4.239845727074331, 4.250783175571005, 4.236729576097018, 4.238159511407988, 4.2430046807408965, 4.248845964576885, 4.247011862677305, 4.241895714671041, 4.247503785182175, 4.249413815633759, 4.2525158287997975, 4.250006296827944, 4.247481709462245, 4.251683343061667, 4.26248332576863, 4.244903923373909, 4.2597445759933725, 4.254803884337232, 4.24916548346142, 4.24594277041521]
[15.34, 16.02, 14.8, 15.28, 15.84, 15.32, 14.76, 15.2, 15.4, 14.8, 14.96, 15.76, 14.64, 14.74, 14.78, 14.98, 15.3, 15.7, 15.54, 15.84, 15.56, 15.24, 14.68]


In [14]:
menej

[15.34,
 16.02,
 14.8,
 15.28,
 15.84,
 15.32,
 14.76,
 15.2,
 15.4,
 14.8,
 14.96,
 15.76,
 14.64,
 14.74,
 14.78,
 14.98,
 15.3,
 15.7,
 15.54,
 15.84,
 15.56,
 15.24,
 14.68]

In [15]:
viac = [int(45-i) for i in menej]
df_k = pd.DataFrame({"k":[i for i in range(2,25)],"ratio": the_best, "> 1%":viac})
df_k

Unnamed: 0,k,ratio,> 1%
0,2,4.209042,29
1,3,4.218543,28
2,4,4.228238,30
3,5,4.239846,29
4,6,4.250783,29
5,7,4.23673,29
6,8,4.23816,30
7,9,4.243005,29
8,10,4.248846,29
9,11,4.247012,30


In [16]:
fig = go.Figure()
#mutation prob = 0.1, pop size 100
fig.add_trace(go.Scatter(x=df_k['k'], y=df_k['ratio'], name="ratio", yaxis="y1"))
fig.add_trace(go.Scatter(x=df_k['k'], y=df_k['> 1%'], name="> 1%", yaxis="y2"))
fig.update_layout(
    xaxis=dict(title='k'),
    yaxis=dict(
        title="ratio",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        )
    ),
    yaxis2=dict(
        title="> 1%",
        titlefont=dict(
            color="red"
        ),
        tickfont=dict(
            color="red"
        ),
        anchor="free",
        overlaying="y",
        side="right",
        position=1
    ))
fig.show()

In [17]:
the_best = []
menej = []
for p in [0.005, 0.010, 0.015, 0.020, 0.025, 0.030, 0.035, 0.040, 0.045, 0.050]:
    best_fit_list = []
    menej_jedno = []
    for _ in range(100):
        g = GenAlg(population_size=100, k_crossover=10, mutation_prob=p, rates=rates,sigma=sigma)
        best,best_fit = g.solve(10,8)
        best_fit_list.append(best_fit)
        menej_ako_jedno_percenta = [i for i in best if i < 0.01]
        menej_jedno.append(len(menej_ako_jedno_percenta))
    the_best.append(statistics.mean(best_fit_list))
    menej.append(statistics.mean(menej_jedno))

print(the_best)
print(menej)

[4.250553404645456, 4.254655332606942, 4.238467282737964, 4.239993011370794, 4.23457878347763, 4.231451448967875, 4.216864092022188, 4.233714850229523, 4.223719674417431, 4.211603520760883]
[14.85, 15.18, 15.76, 17.68, 18.98, 19.99, 20.62, 24.42, 25.26, 25.62]


In [18]:
viac = [int(45-i) for i in menej]
df_p = pd.DataFrame({"p":[0.005, 0.010, 0.015, 0.020, 0.025, 0.030, 0.035, 0.040, 0.045, 0.050],"ratio": the_best, "> 1%":viac})
df_p

Unnamed: 0,p,ratio,> 1%
0,0.005,4.250553,30
1,0.01,4.254655,29
2,0.015,4.238467,29
3,0.02,4.239993,27
4,0.025,4.234579,26
5,0.03,4.231451,25
6,0.035,4.216864,24
7,0.04,4.233715,20
8,0.045,4.22372,19
9,0.05,4.211604,19


In [19]:
fig = go.Figure()
#k = 8, pop size 100
fig.add_trace(go.Scatter(x=df_p['p'], y=df_p['ratio'], name="ratio", yaxis="y1"))
fig.add_trace(go.Scatter(x=df_p['p'], y=df_p['> 1%'], name="> 1%", yaxis="y2"))
fig.update_layout(
    xaxis=dict(title='p'),
    yaxis=dict(
        title="ratio",
        titlefont=dict(
            color="#1f77b4"
        ),
        tickfont=dict(
            color="#1f77b4"
        )
    ),
    yaxis2=dict(
        title="> 1%",
        titlefont=dict(
            color="red"
        ),
        tickfont=dict(
            color="red"
        ),
        anchor="free",
        overlaying="y",
        side="right",
        position=1
    ))
fig.show()

# vhodné parametre?

In [26]:
g = GenAlg(population_size=100, k_crossover=10, mutation_prob=0.01, rates=rates,sigma=sigma)

best,best_fit = g.solve(10,8)
print(best,best_fit)

[0.01645208 0.0322002  0.01365159 0.08322032 0.03114179 0.02503631
 0.02852424 0.03133215 0.00896659 0.02004556 0.01606103 0.01750719
 0.00217907 0.01416346 0.12899603 0.01344529 0.00154887 0.02063509
 0.01917004 0.0287973  0.01228395 0.00902642 0.01862372 0.00800605
 0.01203194 0.03146646 0.02361481 0.00148632 0.10470478 0.01947316
 0.00611707 0.0183105  0.02571685 0.00999524 0.00030258 0.00796955
 0.02272337 0.00152548 0.01253436 0.00851215 0.00813952 0.00551268
 0.02363687 0.03742157 0.01779043] 4.288565185575762
