In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

#library to implement structures in python- pip install ypstruct
#ref-https://pypi.org/project/ypstruct/#description
from ypstruct import structure

#python genetic algorithm library- pip install pygad
import pygad #https://pygad.readthedocs.io/en/latest/

import warnings
warnings.filterwarnings("ignore")

Implementation Rules

1) only people of India have been considered

2) Condition of CDP1->context has been introduced

3) Condition of Experience1-> IBL is based upon experience only

positive frame-

1) gain0 condition is used

2) user id- 152 to 213 (from original dataset)- 40 users


Information about program (Column G and ahead)

Note:values are scaled by a factor of 1/10

Program 1/ SAFE OPTION i.e., 2 people were saved

Program 2/ RISKY OPTION  -(probability = 1/3), i.e., 6 people were saved

                       -(probability = 2/3), i.e., 0 people were saved
                       
IN FINAL CHOICE (column C in sheet 1)

1== PROGRAM1/DECISION1 IN SAMPLES- Safe Option

0==PROGRAM2/DECISION2 IN SAMPLES- Risky Option 

<h1> Data Reading- Gain Frame </h1>

In [2]:
#reding the data in sheet1- each row defines one user
#column2- number of samples by user
#column3-final choice
#column4- high payoff (600)-Decision 2- RISKY
#column5- low payoff (0)- Decision 2- RISKY
#column6- medium payoff (200)- Decision 1- SAFE
#column7 and ahead for eaxh row- Decision/ Programs from experience
data=pd.read_excel("GainData.xlsx",sheet_name="Sheet1",header=None)

In [3]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
0,1,2,1,600,0,200,1,2,,,...,,,,,,,,,,
1,1,5,1,600,0,200,1,2,1.0,2.0,...,,,,,,,,,,
2,1,2,1,600,0,200,1,2,,,...,,,,,,,,,,
3,1,3,0,600,0,200,1,1,2.0,,...,,,,,,,,,,
4,1,3,1,600,0,200,1,2,2.0,,...,,,,,,,,,,


In [4]:
#reading the data in sheet2- each row defines one user
#each row stores the payoff won by a user
payoff=pd.read_excel("GainData.xlsx",sheet_name="Sheet2",header=None)

In [5]:
payoff.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,200,0,,,,,,,,,...,,,,,,,,,,
1,200,0,200.0,0.0,200.0,,,,,,...,,,,,,,,,,
2,200,0,,,,,,,,,...,,,,,,,,,,
3,200,200,600.0,,,,,,,,...,,,,,,,,,,
4,200,600,0.0,,,,,,,,...,,,,,,,,,,


In [6]:
#no of samples drawn by each user
samples_user=data[1]

#final choice of each user
final_choice=data[2]

<h1> Defining The Fitness Function </h1>

In [7]:
def calc_activation(current_time_step,an_instance,d,s): #function to calc activation value of an instance
    
    activation_sum=0
    
    if len(an_instance.time_step)!=0: #instance has been seen once
        
        time_step=np.array([an_instance.time_step]) #converting into numpy float array for faster calc
        activation_sum=np.sum(np.power(current_time_step-time_step,-d))
        
        p = 0.001+np.random.uniform(); #random number bw 0 and 1
        epsilon = s * np.log((1 - p) / p);
        Activation = np.log(activation_sum) + epsilon;
    else:
        Activation = -100;
    return Activation     

In [8]:
def calc_ret_prob(instances,s): #function to calculate retrieval prob of all instances that match a retrieval contraint
    
    #instances->list of all instances that match a retrieval contraint
    
    retrieval=np.zeros(len(instances))
    retrieval_prob=np.zeros(len(instances)) #stores retrieval prob of all corresponding instances
    
    for i in range(len(retrieval)):
        retrieval[i]=np.exp(instances[i].activation/(s*np.sqrt(2)))
    
    for i in range(len(retrieval_prob)):
        retrieval_prob[i]=retrieval[i]/np.sum(retrieval)
        
    return retrieval_prob

In [9]:
def calc_blend_val(instances): 
    
    #calculates the blended value for a decision by blending all isntances that match a retreival constraint
    #instances- list of all instances that match a retrieval constraint
    
    blended_val=0
    for i in range(len(instances)):
        blended_val+=instances[i].outcome*instances[i].retrieval_prob
        
    return blended_val

In [10]:
def ibl_gain(solution,solution_idx):
    
    d=solution[0]*10 #first parameter- decay parameter-actual scale 1 to 10
    s=solution[1]*10 #second parameter- noise parameter-actual scale 1 to 10
    
    users=len(final_choice) #number of users in the experiment
    final_choice_ibl=[] #final choice as calculated by ibl
    
    for i in range(users): #this loop calculates final choice of each user
    
        #defining payoffs
        #decision1/program1/safe option
        med=2
        #decision2/program2/risky option
        low=0
        high=6

        #defining an instance
        #decision- program chosen
        #outcome- payoff received
        #time_step- time steps at which the decision of an instance was taken
        #activation- activation of an instance
        #retrieval_prob- retrieval probabiloty of an instance

        an_instance=structure(decision=None, outcome=None, time_step=[], activation=None, retrieval_prob=None)

        #DEFINING THE INSTANCES
        #for this experiment, we define 5 instances
        num_instance = 5
        instance = an_instance.repeat(num_instance) #instance becomes a list of 5 instances

        #INITIALISING THE INSTANCES
        
        #instance 0=Decision/Program 1- Safe option- Payoff=2
        instance[0].decision=1
        instance[0].outcome=med
        #time_step- left as empty list, activation and ret prob not defined yet
        
        #instance 1=Decision/Program 2- Risky option- Payoff=6
        instance[1].decision=2
        instance[1].outcome=high
        #time_step- left as empty list, activation and ret prob not defined yet

        #instance 2=Decision/Program 2- Risky option- Payoff=0
        instance[2].decision=2
        instance[2].outcome=low
        #time_step- left as empty list, activation and ret prob not defined yet
        
        #prepopulating instances- decision1 and 2 with default utility=10
        #instance 3=Decision/Program 1- Safe option
        instance[3].decision=1
        instance[3].outcome=10
        instance[3].time_step.append(0) #instance called at 0th time step- pre-populate

        #instance 4=Decision/Program 2- Risky Option
        instance[4].decision=2
        instance[4].outcome=10
        instance[4].time_step.append(0) #instance called at 0th time step- pre-populate
        
        #STORING THE INSTANCES FROM EXPERIENCE
        for j in range(samples_user[i]): #i-user,j-time step
            if payoff.iloc[i][j]==med*100:
                instance[0].time_step.append(j) #adding the current time step
            elif payoff.iloc[i][j]==high*100:
                instance[1].time_step.append(j)
            elif payoff.iloc[i][j]==low*100:
                instance[2].time_step.append(j)
                
        #CALCULATING ACTIVATION
        for instance_index in range(num_instance):
            #takes the current time step- which is the total samples made by user plus 1
            #instance, d and s
            instance[instance_index].activation=calc_activation(samples_user[i]+1,instance[instance_index],d,s)
            
        #CALCULATING RETRIEVAL PROBABILITY OF DECISION 1
        #decision 1-safe option- instance 0 and instance 3
        instance[0].retrieval_prob,instance[3].retrieval_prob=calc_ret_prob([instance[0],instance[3]],s)
        
        #CALCULATING RETRIEVAL PROBABILITY OF DECISION 2
        #decision 2-Risky option- instance 1, instance 2, instance 4
        instance[1].retrieval_prob,instance[2].retrieval_prob,instance[4].retrieval_prob=calc_ret_prob([instance[1],instance[2],instance[4]],s)
        
        #CALCULATING BLENDED VALUE OF DECISION 1
        #decision 1-safe option- instance 0 and instance 3
        blended_val_1=calc_blend_val([instance[0],instance[3]])
        
        #CALCULATING RETRIEVAL PROBABILITY OF DECISION 2
        #decision 2-Risky option- instance 1, instance 2, instance 4
        blended_val_2=calc_blend_val([instance[1],instance[2],instance[4]])
        
        #FINAL CHOICE
        if blended_val_1>=blended_val_2: #decision1- safe option
            final_choice_ibl.append(1) #decision1 is 1 in final choice
        else:
            final_choice_ibl.append(0) #decision 2 is 0 in final choice
    
    #now final choices according to IBL have been determined
    #The XNOR gate (negated XOR) gives an output of 1 both inputs are same and 0 if both are different. 
    result=np.array(np.logical_not(np.logical_xor(final_choice_ibl,final_choice)).astype('uint8'))
    
    error_ratio=(len(result)-np.count_nonzero(result))/len(result)
    fitness=1-error_ratio #VIMP- pyGAD optimizes considering the fitness function output needs to be MAXIMISED
    #therefore we define fitness as accuracy which is just 1-error_ratio
    
    return fitness

<h1> Defining Genetic Algorithm Parameters </h1>

In [12]:
fitness_function = ibl_gain

num_generations = 200 #100*number of variables in matlab- stopping condition
num_parents_mating = 4
sol_per_pop = 8

num_genes = 2 #number of parameters that need to be optimised-d ans s

gene_space= [{'low': 0, 'high': 1},{'low': 0, 'high': 1}]  #range for para 1 in solution, range for para2 in solution

parent_selection_type = "rws" #roulette wheel selection
keep_parents = 1 #one parent is kept

crossover_type = "uniform" #uniform crossover

mutation_type = "random"
mutation_percent_genes = 10

In [13]:
ga_instance = pygad.GA(num_generations=num_generations,
                       num_parents_mating=num_parents_mating,
                       fitness_func=fitness_function,
                       sol_per_pop=sol_per_pop,
                       num_genes=num_genes,
                       gene_space=gene_space,
                       parent_selection_type=parent_selection_type,
                       keep_parents=keep_parents,
                       crossover_type=crossover_type,
                       mutation_type=mutation_type,
                       mutation_percent_genes=mutation_percent_genes)

<h1> Running the Genetic Algorithm </h1>

In [14]:
#running the geentic algorithm 20 times and taking the average results
solution_all=[] #list of the 20 solutions
solution_fitness_all=[] #list of the 20 accuracies
for i in tqdm(range(20)):
    ga_instance.run()
    solution,solution_fitness,solution_idx= ga_instance.best_solution()
    solution_all.append(solution)
    solution_fitness_all.append(solution_fitness)

100%|█████████████████████████████████████████████████████████████████████████████████| 20/20 [52:13<00:00, 156.70s/it]


In [15]:
solution_all #all sets of optimised d and s

[array([0.88610326, 0.02944382]),
 array([0.46969347, 0.02010455]),
 array([0.3257209 , 0.01306673]),
 array([0.91896218, 0.01700925]),
 array([0.92895446, 0.03336672]),
 array([0.10423454, 0.00917565]),
 array([0.32419811, 0.00244639]),
 array([0.74403237, 0.03305058]),
 array([0.31991924, 0.04549779]),
 array([0.88964083, 0.03537371]),
 array([0.67668055, 0.00180045]),
 array([0.98015489, 0.02209078]),
 array([0.27892974, 0.03692508]),
 array([0.92252261, 0.02293661]),
 array([0.80408171, 0.00723039]),
 array([0.62986321, 0.00999051]),
 array([0.58348575, 0.03623584]),
 array([0.78668851, 0.02893431]),
 array([0.95505929, 0.02716471]),
 array([0.82705358, 0.02265572])]

In [16]:
solution_fitness_all #all sets of accuracy scores= 1-error_ratio

[0.95,
 0.925,
 0.9,
 0.95,
 0.9,
 0.95,
 0.925,
 0.95,
 0.95,
 0.925,
 0.9,
 0.925,
 0.85,
 0.925,
 0.925,
 0.925,
 0.9,
 0.95,
 0.925,
 0.925]

In [17]:
d=0
s=0
for i in range(len(solution_all)):
    d+=solution_all[i][0]
    s+=solution_all[i][1]
d=d/len(solution_all)
s=s/len(solution_all)
print("decay parameter optimised: ",d)
print("noise parameter optimised: ",s)

decay parameter optimised:  0.6677989609234339
noise parameter optimised:  0.022724980342033567


In [18]:
accuracy=0
for i in range(len(solution_fitness_all)):
    accuracy+=solution_fitness_all[i]
accuracy=accuracy/len(solution_fitness_all)
print("accuracy average: ",accuracy)
print("error ratio average: ",1-accuracy)

accuracy average:  0.9237500000000003
error ratio average:  0.0762499999999997
