### References
- "Genetic Algorithm from Scratch in Python (tutorial with code)" - Kie Codes. https://www.youtube.com/watch?v=nhT56blfRpE.

In [71]:
import pandas as pd 
import numpy as np 
from numba import jit

from scipy.stats import skew

### Data Preparation

In [72]:
def compute_log_returns(df):
    log_df = np.log(df)
    log_returns = log_df.diff()

    return log_returns

In [73]:
# we load the datasets
ftse = pd.read_csv("C:\\Users\\Saverio\\Documents\\GitHub\\python_defi_project\\data\\ftse100.csv", dtype = float, parse_dates= True, index_col= 0)
hanseng = pd.read_csv("C:\\Users\\Saverio\\Documents\\GitHub\\python_defi_project\\data\\hanseng.csv", dtype = float, parse_dates= True, index_col= 0)
sp =  pd.read_csv("C:\\Users\\Saverio\\Documents\\GitHub\\python_defi_project\\data\\sp100.csv", dtype = float, parse_dates= True, index_col= 0)

# we calculate the log returns
ftse_rets = compute_log_returns(ftse)
hanseng_rets = compute_log_returns(hanseng)
sp_rets = compute_log_returns(sp)

# generate assets lists

ftse_assets = ftse.columns
hanseng_assets = hanseng.columns
sp_assets = sp.columns

### Genetic Algorithm Set-Up

In [None]:
class GA_PortfolioOptimization:

    class Chromosome:
        def __init__(self, assets, s_list):
            self.assets_dictionary = dict(zip(assets,s_list))
            self.fitness = None
            weights = None

    def __init__(self, assets_returns_df, K, min_holding_constraint, max_holding_constraint):

        self.asset_returns = assets_returns_df
        self.assets_list = assets_returns_df.columns
        self.assets_indices = list(range(assets_returns_df.shape[1]))

        self.expected_returns = assets_returns_df.mean()
        self.var_cov_matrix = assets_returns_df.cov()
        self.K = K
    
        # We assume a minimum and maximum holding constraint
        self.min_holding_constraint = min_holding_constraint
        self.max_holding_constraint = max_holding_constraint

        self.trade_off_param = 0
        self.weight_skewness = 0

    def create_chromosome(self):                
        assets = np.random.choice(self.assets_indices, size = self.K, replace= False)
        s_list = np.random.uniform(0,1, size = self.K)
  
        return self.Chromosome(assets = assets, s_list = s_list)
    
    def repair_weights(self, weights):

        weights = np.array(weights)

        # solve the problem of negative weights
        negative_weigths_mask = (weights < 0).astype(int)
        weights -= weights*negative_weigths_mask

        # solve the problem of weights summing to a number greater than 1
        weights = weights/np.sum(weights)

        clipped_weights = np.clip(weights, self.max_holding_constraint)
        total_excess = np.sum(weights-clipped_weights)

        while (total_excess > 0):        

            weights_within_bound = (weights > clipped_weights).astype(int)
            weights_that_can_be_filled = np.sum(weights_within_bound)

            # check if a weight is 0
            weights_equal_zero = (weights == 0).astype(int)
            weights += weights_equal_zero*1e-6

            # get the inverted normalized weights
            inverse_weights = 1/weights*weights_that_can_be_filled
            normalization_factor = np.sum(inverse_weights)
            renormalized_inverse_weights = inverse_weights/normalization_factor

            # now we redistribute the excess mass
            weights += renormalized_inverse_weights*total_excess
            clipped_weights = np.clip(weights, self.max_holding_constraint)
            total_excess = np.sum(weights-clipped_weights)
        
        return weights           

    def fitness_function_mean_variance(self, assets, weights):

        means = np.array(self.expected_returns[assets])
        covariances = self.var_cov_matrix.loc[assets, assets]

        weights = np.array(weights)
        
        var_term = weights.T@covariances@weights
        mean_term = weights@means

        fitness = self.trade_off_param*var_term-(1-self.trade_off_param)*mean_term

        return fitness
    
    def fitness_function(self, assets, weights, risk_term):
        
        weights = np.array(weights)

        time_series = self.asset_returns[assets].dropna()
        portfolio_returns = time_series@weights

        portfolio_expected_return = portfolio_returns.mean()
        risk_metric = None

        if risk_term == "MAD":
            risk_ts = np.abs(portfolio_returns - portfolio_expected_return)
            risk_metric = np.mean(risk_ts)

            fitness = self.trade_off_param*risk_metric-(1-self.trade_off_param)*portfolio_expected_return

        elif risk_term == "semivariance":
            deviations = portfolio_returns - portfolio_expected_return
            downside_deviations = deviations[deviations < 0]
            squared_downside_deviations = downside_deviations**2
            risk_metric = np.sum(squared_downside_deviations) / len(portfolio_returns)

            fitness = self.trade_off_param*risk_metric-(1-self.trade_off_param)*portfolio_expected_return
        
        elif risk_term == "skewness":
            skewness = skew(portfolio_returns)
            variance = np.var(portfolio_returns)

            fitness = self.trade_off_param*variance-(1-self.trade_off_param)*portfolio_expected_return - self.weight_skewness*skewness

        elif risk_term == "MV":
            fitness = self.fitness_function_mean_variance(assets= assets, weights= weights)   

        else:
            print("Enter a valid argument for 'risk_term'.")
            return False    
            
        return fitness
    
    def compute_weights(self, s_list):
        
        weights_vector = np.zeros(self.K)
        weights_vector += self.min_holding_constraint

        # We assume a uniform holding constraint
        left_over_asset_share = 1- (self.K*self.min_holding_constraint)

        normalized_s_vector = s_list/np.sum(s_list)

        final_weights = weights_vector + left_over_asset_share*normalized_s_vector
        repaired_final_weights = self.repair_weights(final_weights)

        return repaired_final_weights
    
    def evaluate_fitness(self, chromosome, fitness_type):
        weights = self.compute_weights(s_list= s_list)
        fitness = self.fitness_function(assets = assets, weights= weights, risk_term= fitness_type)

        return fitness
    
    def perform_crossover_and_mutation(self, chromosome_1, chromosome_2):

        chrom_1_dict = chromosome_1.assets_dictionary()
        chrom_2_dict = chromosome_2.assets_dictionary()

        chrom_1_asset_list = chrom_1_dict.keys()
        chrom_2_asset_list = chrom_2_dict.keys()
        
        common_assets = set(chrom_1_asset_list) & set(chrom_2_asset_list)
        common_assets_list = list(common_assets)

        s_value_common_assets_choice = np.random.binomial(n = 1,p = 0.5, size = len(common_assets_list), dtype = int)
        common_assets_s_values = []

        for i, asset in enumerate(common_assets_list):
            if s_value_common_assets_choice[i] == 0:
                common_assets_s_values.append(chrom_1_dict[asset])
            else:
                common_assets_s_values.append(chrom_2_dict[asset])

        crossover_dict = dict(zip(common_assets_list, common_assets_s_values))

        leftover_assets = (set(chrom_1_asset_list)| set(chrom_2_asset_list)) - common_assets
        leftover_assets_list = list(leftover_assets)

        leftover_assets_choice = np.random.binomial(n = 1, p = 0.5, size = len(leftover_assets_list))
        
        for i, leftover in enumerate(leftover_assets):
            if leftover_assets_choice[i] == 1:
                crossover_dict[leftover] = chrom_1_dict[leftover] or chrom_2_dict[leftover] # using short-circuit logical operator
            else:
                continue
        
        # handle the cardinality constraint

        n_assets_in_chromosome = len(crossover_dict.keys())

        if n_assets_in_chromosome  > self.K:
            assets_to_remove = len(crossover_dict.keys()) - self.K

            if assets_to_remove <= len(leftover_assets):
                for i in range(assets_to_remove):
                    del crossover_dict[leftover_assets[i]]
            
            else:
                for asset in leftover_assets:
                    del crossover_dict[asset]
                
                random_common_assets_to_remove = np.random.choice(a = [common_assets_list], size = assets_to_remove - len(leftover_assets), replace = False)

                for asset in random_common_assets_to_remove:
                    del crossover_dict[asset]
        
        elif n_assets_in_chromosome < self.K:
            assets_to_add = self.K - n_assets_in_chromosome

            possible_choices = set(self.assets_list) - set(crossover_dict.keys())
            possible_choices_list = list(possible_choices)

            assets_to_add = np.random.choice(a = possible_choices_list, size = assets_to_add)
            s_list_new_assets = np.random.uniform(low = 0, high = 1, size = assets_to_add)

            new_assets_dict = dict(zip(assets_to_add, s_list_new_assets))
            crossover_dict.update(new_assets_dict)

        ### We implement the mutation

        asset_to_mutate = np.random.choice(a = crossover_dict.keys(), size = 1)

        increase_or_decrease_s_flip = np.random.binomial(n = 1, p = 0.5, size = 1)
        if increase_or_decrease_s_flip == 0:
            crossover_dict[asset_to_mutate] *= 1.1
        else:
            crossover_dict[asset_to_mutate] *= 0.9
        
        # clip the mutated s value to ensure it fulfills the constraints
        crossover_dict[asset_to_mutate] = np.clip(crossover_dict[asset_to_mutate], a_min= 0, a_max= 1)

        return self.Chromosome(assets = crossover_dict.keys(), s_list = crossover_dict.values())

    
    def genetic_algorithm_loop(self, fitness_type, max_steps, n_population, trade_off_param = 0.5, weight_skewness = 0.01):
        
        self.trade_off_param = trade_off_param

        if fitness_type == "skewness":
            self.weight_skewness = weight_skewness
        
        starting_population = [self.create_chromosome() for x in range(n_population)]
        get_fitnesses = [self.evaluate_fitness(assets= chromosome.assets_dictionary.keys(), s_list= chromosome.s_list, fitness_type= fitness_type) for chromosome in starting_population]

        pass
