In [1]:
import os
import pandas as pd
import numpy as np

### Calculate stock returns from closing value of each stock for every month

In [7]:
# set the working direory to the drive where stocks directory present
os.chdir('./stocks/')

##### Define function to obtain .csv filenames

In [8]:
def obtain_filenames(path):
    name = []
#     This loop will search all the files in given path and selects file names which end with .csv
    for file in os.listdir(path):
        if file.endswith('.csv'):
            name.append(file.split(sep='.')[0])
#     name contains list of file names
    return name

##### Define function to calculate returns for each stock

In [9]:
def calculate_returns(scrip,l_o_m):
    stck_name_returns = {}
    for stck in scrip:
#         Read each stock data
        stck_data = pd.read_csv(stck + '.csv',header=0,sep=',')
        returns = []
#         Calculate returns for each stock according to list_of_months and append to stck_name_returns dictionary
        for i in l_o_m:
            present_close = stck_data.loc[0,'Close']
            last_close = stck_data.loc[0 + i,'Close']
            returns.append((present_close - last_close)/last_close)
        stck_name_returns[stck] = returns
    return stck_name_returns

In [10]:
scrip_names = obtain_filenames(os.getcwd())
scrip_names

['l&t', 'sunpha', 'tcs', 'hdfc', 'itc', 'm&m']

In [11]:
list_of_months = [3,6,12,18,24,36]

In [12]:
stck_returns = pd.DataFrame(calculate_returns(scrip_names,list_of_months),
                            i ndex=['3month','6month','12month','18month','24month','36month'])
stck_returns

Unnamed: 0,l&t,sunpha,tcs,hdfc,itc,m&m
3month,-0.030947,0.13151,0.296599,0.114515,0.039664,0.211564
6month,0.011417,-0.017957,0.368094,0.125163,0.011212,0.194062
12month,0.129783,0.010911,0.562537,0.275866,-0.178478,0.330899
18month,0.413121,-0.109885,0.564125,0.750581,0.104171,0.514277
24month,0.274461,-0.265911,0.44833,0.792712,0.084237,0.255179
36month,0.069614,-0.358785,0.447535,0.974847,0.266844,0.399938


### Genetic Algorithm for portfolio allocation

In [13]:
# Gene for this problem would be the percentage of amount to be allocated for a particular stock
# Chromosome would be the percentage of amount to be allocated for each stock
# The constraint is total amount to be invested is Rs.1,00,000/-
# Weighted returns for each stock is calculated according to weights list, with more weight given to most recent returns

##### Declare global variables

In [14]:
max_allocation = 100000 # maximum budget that can be allocated
weights = [0.30,0.20,0.15,0.15,0.10,0.10] # weights for n-month returns
mutation_probability = 0.5 # initial mutation probability
number_of_iterations = 500 # total number of GA iterations
number_of_chrom_in_population = 500 # size of population

##### Generate initial population
In this context, a gene is weightage of a particular scrip in the portfolio, and chromosome is a sequence of the weightages of different scrips in the porfolio. Initial population is a collection of different portfolios randomly generated.

In [15]:
def gen_init_population(pop,num_stocks):
    init_population = pd.DataFrame(columns=scrip_names)
    for i in range(pop):
#         For each scrip, generate a random number between 0 and 1. Normalise this sequence as sum of weightage for scrips
#         should be equal to one. This sequence is one chromosome. Repeat it 'pop' number of times to generate population
        random = np.random.random(num_stocks)
        chromosome = random/(sum(random))
        init_population.loc[i] = chromosome
    return init_population

In [16]:
# gen_init_population(number_of_chrom_in_population,len(scrip_names)).head()
# gen_init_population(number_of_chrom_in_population,len(scrip_names)).shape

##### Define fitness function
The fitness function calculates weighted return for the portfolio 

In [17]:
def fitness_func(max_alloc,population,stock_ret,wts):
#     For each scrip, calculate returns weighted by 'n-month' returns
    weighted_stck_returns = stock_ret.apply(lambda x:(x*wts).sum())
#     Portfolio return in % considering scrip allocation weightage. For instance, for stock 1 in a particular portfolio,
#     if the weighted return was 5% and this stock forms 20% of the portfolio, then % return on this stock is 
#     (20000*0.05)/100000
    pfolio_return = population.apply(lambda x: sum((x*max_alloc)*weighted_stck_returns)/max_alloc, axis = 1)
    return pfolio_return

##### Define selection function

In [18]:
def selection_func(fit_func,max_alloc,population,stock_ret,wts):
#     For each portfolio calculate portfolio returns using fitness_func
    portfolio_return = fit_func(max_alloc,population,stock_ret,wts)
#     Select top 20% portfolio returns as elite population
    selected_portfolio = portfolio_return.sort_values(ascending=False)[0:int(0.2*population.shape[0])]
    elite = population.iloc[selected_portfolio.index,:]
    return elite

In [19]:
tmp_pop = selection_func(fitness_func,max_allocation,gen_init_population(number_of_chrom_in_population,len(scrip_names)),
                         stck_returns,weights)
tmp_pop.head()

Unnamed: 0,l&t,sunpha,tcs,hdfc,itc,m&m
197,0.032174,0.004221,0.145178,0.436164,0.003586,0.378677
376,0.142316,0.017257,0.38291,0.392225,0.035982,0.02931
97,0.005173,0.094744,0.319579,0.244669,0.04613,0.289705
229,0.157119,0.050228,0.283169,0.315664,0.011424,0.182396
7,0.135487,0.023044,0.398192,0.202902,0.091227,0.149149


##### Define evolution function
Arithmetic crossover and scramble mutation has been used - you are free to use any other crossover methods like
one-point crossover, multi-point crossover or uniform crossover, and mutation methods like swap mutation and inversion
mutation

In [20]:
def evolution_func(elite_pop,mut_prob,scrip,num_of_chrom_in_pop):
    new_pop = pd.DataFrame(columns=scrip)
    new_pop_iter = 0
    while(new_pop_iter <= num_of_chrom_in_pop):
#         We would generate a random number and if the number is less than mutation probability, we will go for crossover
#         Initially, we will have high value of mutation probability to avoid being getting stuck at local minima and
#         explore the search space
        if np.random.random() < mut_prob:
#             The candidate (or one of the elite portfolios) is chosen randomly for mutation
            candidate = np.random.randint(low=0,high=elite_pop.shape[0],size=1)
            mutant = elite_pop.iloc[candidate,:].copy()
#             Mutation step - shuffle() randomly shuffles the allocation values for each scrip in a portfolio
            np.random.shuffle(mutant)
#             print(mutant)
            new_pop = new_pop.append(mutant,ignore_index=True)
#             We prefer to explore much more in the beginning of the search process to ensure diversity and avoid local 
#             optimum. As we progress towards the end of search process ( or iterations), we need to ensure the
#             convergence of the population to a good solution if not the best. Hence we keep on reducing mutation 
#             probability with each iteraation
            mut_prob = mut_prob/(new_pop_iter+1)
        else:
#             The parents (elite portfolios) for crossover are chosen randomly
            parent_indices = np.random.randint(low=0,high=elite_pop.shape[0],size=2)
            parents = elite_pop.iloc[parent_indices,:]
#             The proportion of characteristic that should be captured in child from each parent is determined by a 
#             randomly generated number - alpha
            alpha = np.random.random()
            child1 = alpha*(parents.iloc[0,:]) + (1-alpha)*(parents.iloc[1,:])
            child2 = (1-alpha)*(parents.iloc[0,:]) + alpha*(parents.iloc[1,:])
#             print(child1)
#             print(child2)
#             print(dict(child1))
#             print(dict(child2))
            new_pop = new_pop.append(dict(child1),ignore_index=True)
            new_pop = new_pop.append(dict(child2),ignore_index=True)
        new_pop_iter+=1
#     To create a new population, we include both new population and elite population as there may be a case that we are 
#     already at a global optimum solution
    new_pop = pd.concat([new_pop,elite_pop],ignore_index=True)
    return new_pop

In [21]:
evolution_func(tmp_pop,mutation_probability,scrip_names,number_of_chrom_in_population).head()

Unnamed: 0,l&t,sunpha,tcs,hdfc,itc,m&m
0,0.074545,0.211845,0.2786,0.021639,0.019967,0.393403
1,0.040846,0.177727,0.108516,0.294578,0.102508,0.275825
2,0.037432,0.170927,0.119249,0.287886,0.119766,0.26474
3,0.084715,0.150582,0.315359,0.332541,0.051706,0.065098
4,0.051082,0.017065,0.319465,0.28697,0.209818,0.115599


##### Define main function

In [22]:
def main_func():
    initial_population = gen_init_population(number_of_chrom_in_population,len(scrip_names))
#         initial_population.head()
    elite_population = selection_func(fitness_func,max_allocation,initial_population,stck_returns,weights)
    elite_population.reset_index(inplace=True,drop=True)
#         elite_population.head()
    new_population = evolution_func(elite_population,mutation_probability,scrip_names,number_of_chrom_in_population)
#     We run genetic algorithm for number_of_iterations times
    for i in range(number_of_iterations):
        new_elite_population = selection_func(fitness_func,max_allocation,new_population,stck_returns,weights)
        new_population = evolution_func(new_elite_population,mutation_probability,scrip_names,number_of_chrom_in_population)
    return new_population

##### Call main function

In [23]:
range_of_portfolios = main_func() #Final population

##### Select the topmost portfolio

In [24]:
top_portfolios = selection_func(fitness_func,max_allocation,range_of_portfolios,stck_returns,weights)
final_portfolio = top_portfolios.iloc[0]
final_portfolio

l&t       0.047013
sunpha    0.013315
tcs       0.358694
hdfc      0.102762
itc       0.071586
m&m       0.406630
Name: 1247, dtype: float64

##### Calculate portfolio returns

In [25]:
weighted_stck_returns = stck_returns.apply(lambda x:(x*weights).sum())
weighted_stck_returns

l&t       0.108842
sunpha   -0.041454
tcs       0.421184
hdfc      0.390110
itc       0.038104
m&m       0.294570
dtype: float64

In [26]:
portfolio_return = sum((final_portfolio*max_allocation)*weighted_stck_returns)/max_allocation
# In the past three years, BSE Sensex has given a return of 26.12%. Using only one stock from each sector 
# (all of them are top players by market capitalization, except m&m), we got a portfolio return of 33.7%. 
# That too excluding any expense ratio that you would have incurred for sensex indexed fund (roughly it is around 0.5%).
# If you do some research, you are beating some mutual funds in this category as well (one of the best funds 
# gave return of 35%)!!
print('The portfolio return is: {0:.3f}'.format(portfolio_return))

The portfolio return is: 0.318
