In [1]:
import numpy as np 
import pandas as pd 
from functools import reduce 
import time

In [11]:
np.set_printoptions(suppress=True)

In [2]:
files = ['HDFCBANK.csv','ITC.csv','LT.csv','M&M.csv','TCS.csv']
dfs = []

for file in files : 
    temp = pd.read_csv('./data/'+file)
    temp.columns = ['Date',file.replace('.csv','')]
    dfs.append(temp)

stocks = reduce(lambda left ,right : pd.merge(left,right,on='Date'),dfs)
print(stocks.shape)
stocks.head()

(411, 6)


Unnamed: 0,Date,HDFCBANK,ITC,LT,M&M,TCS
0,2022-01-03,1519.650024,219.100006,1922.849976,829.799988,3817.75
1,2022-01-04,1528.550049,220.25,1937.550049,831.849976,3884.75
2,2022-01-05,1564.849976,220.550003,1948.599976,839.5,3860.949951
3,2022-01-06,1539.75,218.699997,1924.5,839.849976,3807.449951
4,2022-01-07,1550.550049,218.399994,1904.900024,829.0,3853.5


In [3]:
def hist_return(months): 
    idx = []
    df = pd.DataFrame()
    for mon in months : 
        temp =(stocks.iloc[0,1:] - stocks.iloc[mon,1:])/(stocks.iloc[mon,1:])
        idx.append(str(mon)+"_mon_return")
        df = pd.concat([df,temp.to_frame().T] ,ignore_index=True)
    df.index = idx
    return df 

In [4]:
hist_stocks_returns = hist_return([3,6,12,24,36])
hist_stocks_returns

Unnamed: 0,HDFCBANK,ITC,LT,M&M,TCS
3_mon_return,-0.013054,0.001829,-0.000857,-0.011966,0.002705
6_mon_return,-0.029536,-0.012841,-0.019529,-0.013787,-0.025064
12_mon_return,0.00079,-0.005673,-0.048518,-0.072227,-0.024753
24_mon_return,0.035078,-0.048219,0.02035,0.000362,0.010254
36_mon_return,0.012493,0.014587,0.056337,-0.024109,0.071258


In [5]:
pd.read_csv('./data/HDFCBANK.csv')

Unnamed: 0,Date,Close
0,2022-01-03,1519.650024
1,2022-01-04,1528.550049
2,2022-01-05,1564.849976
3,2022-01-06,1539.750000
4,2022-01-07,1550.550049
...,...,...
406,2023-08-23,1586.599976
407,2023-08-24,1579.300049
408,2023-08-25,1561.500000
409,2023-08-28,1577.750000


In [6]:
gene = np.random.rand()
gene

0.6221247435621776

In [7]:
def gene_mc_grid(rows,cols,n,N): 
    np.random.seed(seed=int(time.time()))
    layouts = np.zeros((n,rows*cols),dtype=np.int32)
    positionX = np.random.randint(0 ,cols , size=(N * n * 2))
    positionY = np.random.randint(0 ,rows, size=(N *n * 2))
    ind_rows = 0 
    ind_pos = 0

    while ind_rows < n : 
        layouts[ind_rows , positionX[ind_pos] + positionY[ind_pos] * cols] = 1 
        if np.sum(layouts[ind_rows , :]) == N : 
            ind_rows += 1 
        ind_pos += 1 
        if ind_pos >= N * n * 2 : 
            print("Not enough postiton")
            break 

    return layouts


def gene_mc_grid_with_NA_loc(rows,cols,n,N,NA_loc): 
    np.random.seed(seed=int(time.time()))
    layouts = np.zeros((n,rows*cols),dtype=np.int32)
    layouts_NA = np.zeros((n , rows*cols), dtype=np.int32)
    for i in NA_loc : 
        layouts_NA[: ,i-1]=2 

    positionX = np.random.randint(0 ,cols , size=(N * n * 2))
    positionY = np.random.randint(0 ,rows, size=(N *n * 2))
    ind_rows = 0 
    ind_pos = 0

    N_count = 0 
    while ind_rows < n : 
        cur_state = layouts_NA[ind_rows,positionX[ind_pos] +    positionY[ind_pos]*cols]
        if cur_state != 1 and cur_state !=2 : 
            layouts[ind_rows,positionX[ind_pos] + positionY[ind_pos]*cols]=1 
            layouts_NA[ind_rows,positionX[ind_pos] + positionY[ind_pos]*cols]=1
            N_count += 1 
        
            if np.sum(layouts[ind_rows , :]) == N : 
                ind_rows += 1 
                N_count = 0 
        ind_pos += 1 
        if ind_pos >= N * n * 2 : 
            print("Not enough positions")
            break 
    
    return layouts , layouts_NA 
        

In [8]:
gene_mc_grid(5, 5, 100, 50)
gene_mc_grid_with_NA_loc(5, 5, 100, 50,range(10))

Not enough postiton
Not enough positions


(array([[0, 0, 0, ..., 1, 1, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]),
 array([[2, 2, 2, ..., 1, 1, 2],
        [2, 2, 2, ..., 0, 0, 2],
        [2, 2, 2, ..., 0, 0, 2],
        ...,
        [2, 2, 2, ..., 0, 0, 2],
        [2, 2, 2, ..., 0, 0, 2],
        [2, 2, 2, ..., 0, 0, 2]]))

In [9]:
def chromosome(n): 
    ch = np.random.rand(n)
    return ch / sum(ch)

child = chromosome(5)
print(child , sum(child))

[0.32713656 0.07515453 0.42117466 0.14908363 0.02745062] 1.0000000000000002


In [12]:
n = 5 
pop_size = 100 

population = np.array([chromosome(n) for _ in range(pop_size)])
print(population.shape)
print(population)

(100, 5)
[[0.18914656 0.28014177 0.14096556 0.2586688  0.13107732]
 [0.30462847 0.21712353 0.08678813 0.11364621 0.27781366]
 [0.07367109 0.11442348 0.31722689 0.32054026 0.17413829]
 [0.13425689 0.06023929 0.28399842 0.28328331 0.23822209]
 [0.16715726 0.08919294 0.34634353 0.38327013 0.01403614]
 [0.14185942 0.15583763 0.28186024 0.26676931 0.1536734 ]
 [0.16567227 0.1685526  0.08774697 0.45880569 0.11922248]
 [0.33269183 0.18482355 0.2655254  0.17947407 0.03748515]
 [0.2163332  0.37137729 0.10061238 0.26050503 0.0511721 ]
 [0.49600024 0.12362447 0.02164036 0.1150241  0.24371082]
 [0.07439695 0.00875107 0.19006532 0.3185676  0.40821907]
 [0.01650248 0.2036822  0.34149268 0.14019781 0.29812483]
 [0.41418968 0.1267644  0.36210694 0.02875027 0.06818871]
 [0.28825345 0.24248542 0.21595091 0.20412615 0.04918407]
 [0.22885986 0.18143443 0.18610249 0.3425308  0.06107242]
 [0.26225322 0.33450884 0.1255042  0.17697376 0.10075998]
 [0.37023635 0.09943225 0.27717779 0.03367936 0.21947426]
 [0.3

In [13]:
cols = hist_stocks_returns.columns 
hist_stocks_returns[cols] = hist_stocks_returns[cols].apply(pd.to_numeric , errors="coerce")
print(hist_stocks_returns.info())

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 3_mon_return to 36_mon_return
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   HDFCBANK  5 non-null      float64
 1   ITC       5 non-null      float64
 2   LT        5 non-null      float64
 3   M&M       5 non-null      float64
 4   TCS       5 non-null      float64
dtypes: float64(5)
memory usage: 240.0+ bytes
None


In [15]:
cov_hist_return = hist_stocks_returns.cov()
print(cov_hist_return)

for i in range(5):
    cov_hist_return.iloc[i][i] = 0 


          HDFCBANK       ITC        LT       M&M       TCS
HDFCBANK  0.000606 -0.000275  0.000490  0.000090  0.000474
ITC      -0.000275  0.000558  0.000111 -0.000257  0.000340
LT        0.000490  0.000111  0.001578  0.000656  0.001464
M&M       0.000090 -0.000257  0.000656  0.000792  0.000306
TCS       0.000474  0.000340  0.001464  0.000306  0.001549


In [16]:
mean_hist_return = hist_stocks_returns.mean()
mean_hist_return

HDFCBANK    0.001154
ITC        -0.010063
LT          0.001556
M&M        -0.024346
TCS         0.006880
dtype: float64

In [17]:
sd_hist_return = hist_stocks_returns.std()
sd_hist_return

HDFCBANK    0.024613
ITC         0.023621
LT          0.039724
M&M         0.028142
TCS         0.039352
dtype: float64

In [22]:
def mean_portfolio_return(child): 
    return np.sum(np.multiply(child,mean_hist_return))

mean_portfolio_return(population[96])

-0.0059568953164233366

In [24]:
def var_portfolio_return(child): 
    part_1 = np.sum(np.multiply(child , sd_hist_return)**2)
    temp_lst = [] 
    for i in range(5): 
        for j in range(5): 
            temp = cov_hist_return.iloc[i][j] * child[i] * child[j]
            temp_lst.append(temp)
    part_2 = np.sum(temp_lst) 
    return part_1 + part_2 

var_portfolio_return(population[19])  

0.0007852135759296703

In [26]:
rf = 0.0697

def fitness_function(child):
    return(mean_portfolio_return(child) - rf)/np.sqrt(var_portfolio_return(child))

fitness_function(population[17])

-4.316752115629207

In [27]:
def Select_elite_population(population , frac = 0.3):
    population = sorted(population , key = lambda x : fitness_function(x) , reverse= True)
    percentage_elite_idx = int(np.floor(len(population)*frac))
    return population[:percentage_elite_idx]

Select_elite_population(population , frac=0.3)

[array([0.02762246, 0.0870635 , 0.32298355, 0.00818005, 0.55415044]),
 array([0.05746173, 0.02540199, 0.78658386, 0.02898056, 0.10157186]),
 array([0.00285317, 0.22414779, 0.34902586, 0.00018709, 0.42378609]),
 array([0.05698273, 0.08564942, 0.19765132, 0.13336993, 0.5263466 ]),
 array([0.05024806, 0.16267278, 0.43582166, 0.05580543, 0.29545207]),
 array([0.21537277, 0.05658122, 0.20197164, 0.06209136, 0.46398302]),
 array([0.25659573, 0.01119368, 0.30580973, 0.08170607, 0.34469479]),
 array([0.05609893, 0.14429031, 0.16093853, 0.11457123, 0.52410099]),
 array([0.29898263, 0.10440157, 0.31296567, 0.00133671, 0.28231343]),
 array([0.23294276, 0.11660294, 0.17416004, 0.05656487, 0.41972939]),
 array([0.01650248, 0.2036822 , 0.34149268, 0.14019781, 0.29812483]),
 array([0.07439695, 0.00875107, 0.19006532, 0.3185676 , 0.40821907]),
 array([0.1305344 , 0.2532253 , 0.37132338, 0.00565788, 0.23925904]),
 array([0.02897868, 0.25508718, 0.32735432, 0.09443764, 0.29414218]),
 array([0.20786165, 

In [28]:
[fitness_function(x) for x in population][:3]

[-4.2994714215119885, -3.4627071992612164, -2.9826612049840135]

In [31]:
def mutation(parent):
    child = parent.copy()
    n = np.random.choice(range(5),2)
    while(n[0] == n[1]): 
        n = np.random.choice(range(5),2)
    child[n[0]] , child[n[1]] = child[n[1]],child[n[0]]
    return child 

print(population[1])
mutation(population[1])



[0.30462847 0.21712353 0.08678813 0.11364621 0.27781366]


array([0.30462847, 0.21712353, 0.11364621, 0.08678813, 0.27781366])

In [32]:
def heuristic_crossover(parent1 , parent2): 
    ff1 = fitness_function(parent1) 
    ff2 = fitness_function(parent2)
    diff = parent1 - parent2 
    beta = np.random.rand()

    if ff1 > ff2 : 
        child1 = parent1 + beta * diff 
        child2 = parent2 - beta * diff 

    else : 
        child1 = parent1 - beta * diff 
        child2 = parent1 + beta * diff 

    return child1 , child2 

for i in population[:30]: 
    for j in population[:30]: 
        print(heuristic_crossover(i,j))

(array([0.18914656, 0.28014177, 0.14096556, 0.2586688 , 0.13107732]), array([0.18914656, 0.28014177, 0.14096556, 0.2586688 , 0.13107732]))
(array([0.30431787, 0.21729303, 0.08693385, 0.11403627, 0.27741899]), array([ 0.07397525,  0.34299051,  0.19499727,  0.40330132, -0.01526436]))
(array([0.11729703, 0.17703085, 0.25063642, 0.29716559, 0.15787011]), array([0.26099609, 0.38325269, 0.0312947 , 0.220172  , 0.10428452]))
(array([0.17637601, 0.22897958, 0.17424338, 0.26439557, 0.15600546]), array([0.20191711, 0.33130396, 0.10768774, 0.25294202, 0.10614917]))
(array([0.18010912, 0.20166325, 0.22537435, 0.309879  , 0.08297428]), array([0.198184  , 0.35862029, 0.05655677, 0.20745859, 0.17918035]))
(array([0.17223349, 0.23568224, 0.19135898, 0.26156609, 0.1391592 ]), array([0.20605963, 0.3246013 , 0.09057213, 0.25577151, 0.12299543]))
(array([0.16674602, 0.17365686, 0.09018127, 0.44965112, 0.11976474]), array([0.2115471 , 0.38662668, 0.19174985, 0.06768647, 0.14238989]))
(array([0.21330562, 0.

In [33]:
def Arithmetic_crossover(parent1 , parent2): 
    alpha = np.random.rand()
    child1 = alpha * parent1 + (1 - alpha)*parent2 
    child2 = (1 - alpha)*parent1 + alpha*parent2 
    return child1 , child2 

Arithmetic_crossover(population[2] , population[3])


(array([0.10755453, 0.08412023, 0.29864341, 0.2997038 , 0.20997804]),
 array([0.10037345, 0.09054254, 0.3025819 , 0.30411977, 0.20238234]))

In [35]:
def next_generation(pop_size , elite,crossover =heuristic_crossover): 
    new_population = []
    elite_range = range(len(elite))

    while len(new_population) < pop_size : 
        if len(new_population) > 2*pop_size : 
            mutate_or_crossover = np.random.choice([0,1] , p=[0.9 , 0.1])
        else : 
            mutate_or_crossover = np.random.choice([0,1] , p=[0.4 , 0.6])

        if mutate_or_crossover : 
            indx  = np.random.choice(elite_range)
            new_population.append(mutation(elite[indx]))
        else : 
            p1_idx , p2_idx = np.random.choice(elite_range ,2)
            c1 , c2 = crossover(elite[p1_idx] , elite[p2_idx])
            chk = 0 
            for gene in range(5): 
                if c1[gene] < 0 : 
                    chk += 1 
                
                else : 
                    chk +=0 

            if chk > 0 : 
                p1_idx , p2_idx = np.random.choice(elite_range,2)
                c1 , c2 = crossover(elite[p1_idx] , elite[p2_idx])
            new_population.extend([c1,c2])

    return new_population 

elite = Select_elite_population(population)
next_generation(100,elite)[:3]

[array([0.06996666, 0.00199969, 0.17467798, 0.31290432, 0.44045136]),
 array([0.09877439, 0.04590025, 0.27473344, 0.34972956, 0.23086237]),
 array([0.34379572, 0.09967973, 0.07705418, 0.01267801, 0.46679235])]

In [36]:
next_generation(100,elite,Arithmetic_crossover)[:3]


[array([0.09125265, 0.03443775, 0.24860882, 0.34011444, 0.28558633]),
 array([0.0774884 , 0.01346218, 0.20080259, 0.32251943, 0.38572739]),
 array([0.46679235, 0.09967973, 0.01267801, 0.07705418, 0.34379572])]

In [44]:
n = 5 
pop_size = 100 
population = np.array([chromosome(n) for _ in range(pop_size)])
elite = Select_elite_population(population)

iteration = 0 
Expected_returns = 0 
Expected_risk = 1 

while (Expected_returns < 0.30 and Expected_risk > 0.0005) or iteration <= 40 : 
    print("Iteratioon : ",iteration)
    population = next_generation(100,elite)
    elite = Select_elite_population(population)
    Expected_returns = mean_portfolio_return(elite[0])
    Expected_risk = var_portfolio_return(elite[0])
    print('Expected returns of {} with risk of {}\n' , Expected_returns , Expected_risk)
    iteration += 1 

print("portfolio of stocks after all iterations:\n")
[print(hist_stocks_returns.columns[i], ':' ,elite[0][i]) for i in list(range(5))]

Iteratioon :  0
Expected returns of {} with risk of {}
 0.0013889388878649614 0.0013360807431414962
Iteratioon :  1
Expected returns of {} with risk of {}
 0.0066509267823005105 0.001268158079695258
Iteratioon :  2
Expected returns of {} with risk of {}
 0.01606557264872737 0.001792293949816671
Iteratioon :  3
Expected returns of {} with risk of {}
 -0.0023018545809586673 0.0024892892324867074
Iteratioon :  4
Expected returns of {} with risk of {}
 0.011758198800846078 0.0036249010340772218
Iteratioon :  5
Expected returns of {} with risk of {}
 0.02177435004643169 0.009227373304676647
Iteratioon :  6
Expected returns of {} with risk of {}
 0.026110225960374014 0.013176890770210984
Iteratioon :  7
Expected returns of {} with risk of {}
 0.04813429821439411 0.026035424369565522
Iteratioon :  8
Expected returns of {} with risk of {}
 0.12392075626214001 0.03284580811775081
Iteratioon :  9
Expected returns of {} with risk of {}
 0.18223297944212632 0.05847339477289545
Iteratioon :  10
Exp

[None, None, None, None, None]

In [45]:
n=5 # Number of stocks = 6
pop_size=100 # initial population = 100

# Initial population
population = np.array([chromosome(n) for _ in range(pop_size)])

# Get initial elite population
elite = Select_elite_population(population)

iteration=0 
Expected_returns=0
Expected_risk=1
#(Expected_returns < 0.30 and Expected_risk > 0.0005) or
while ( iteration <= 40):
    print('Iteration:',iteration)
    population = next_generation(100,elite,Arithmetic_crossover)
    elite = Select_elite_population(population)
    Expected_returns=mean_portfolio_return(elite[0])
    Expected_risk=var_portfolio_return(elite[0])
    print('Expected returns of {} with risk of {}\n'.format(Expected_returns,Expected_risk))
    iteration+=1


print('Portfolio of stocks after all the iterations:\n')
[print(hist_stocks_returns.columns[i],':',elite[0][i]) for i in list(range(5))]


Iteration: 0
Expected returns of 0.0017837305400018504 with risk of 0.0010827286575944275

Iteration: 1
Expected returns of 0.00220721686163962 with risk of 0.001081885585233441

Iteration: 2
Expected returns of 0.0015000598993541727 with risk of 0.0011045780288435002

Iteration: 3
Expected returns of 0.0022701690064371592 with risk of 0.0010724356132446801

Iteration: 4
Expected returns of 0.0015000598993541727 with risk of 0.0011045780288435002

Iteration: 5
Expected returns of 0.0021404632578616673 with risk of 0.001072595000516351

Iteration: 6
Expected returns of 0.0015455207345260557 with risk of 0.0010900436200101237

Iteration: 7
Expected returns of 0.0015463056649668088 with risk of 0.001090005726953055

Iteration: 8
Expected returns of 0.0015977586644517058 with risk of 0.001089619250984281

Iteration: 9
Expected returns of 0.0016091893848011918 with risk of 0.001089221081871824

Iteration: 10
Expected returns of 0.0016766571140104884 with risk of 0.0010862102727006036

Itera

[None, None, None, None, None]

In [46]:
print('Portfolio of stocks after all the iterations:\n')
[print(hist_stocks_returns.columns[i],':',elite[0][i]) for i in list(range(5))]

print('\nExpected returns of {} with risk of {}\n'.format(Expected_returns,Expected_risk))

Portfolio of stocks after all the iterations:

HDFCBANK : 0.10164936106988647
ITC : 0.051643428885802894
LT : 0.38485933307896014
M&M : 0.0577630861431983
TCS : 0.4040847908221521

Expected returns of 0.001570457644909414 with risk of 0.0010883489492085942

