## Population divergence

In [1]:
import numpy as np
import random
import pandas as pd

In [2]:
def limitToRange(val, min_limit, max_limit):
    if val < min_limit: return min_limit
    if val > max_limit: return max_limit
    return val

def limitArrayValsToRange(vals, min_limit, max_limit):
    return [limitToRange(val, min_limit, max_limit) for val in vals]

In [3]:
def normalize_probability(arr):
    sum_ = sum(arr)
    return [a/sum_ for a in arr]

In [4]:
def populationDivergence(population_one, population_two):

    pop1_col_product = normalize_probability(np.prod(np.asarray(population_one), axis=0))
    pop2_col_product = normalize_probability(np.prod(np.asarray(population_two), axis=0))
    # print(population_one_colwise_product, population_two_colwise_product)
    
    # Kuhlback leibler divergence
    return np.mean([np.abs(np.log(pop1_col_product[i]/(pop2_col_product[i]+1e-64))) for i in range(len(pop1_col_product))])

### Experiment 1
All genes are biased in one direction.

In [5]:
def createIndividualExp1(size=10, bias=0.0, min_limit=1e-4, max_limit=1.0):
    return [limitToRange((random.randint(0,100) / 100) + bias, min_limit, max_limit) for i in range(size)]

In [6]:
def createPopulationExp1(size=10, bias=0.0):
    return [createIndividualExp1(10, bias=bias, min_limit=1e-4, max_limit=1.0) for i in range(size)]

In [7]:
def createPopulationsExp1(n_pops, n_individuals, bias_range=[-0.4,0.4]):
    pops = []
    biases = []
    for i in range(n_pops):
        bias = random.uniform(bias_range[0], bias_range[1])
        biases.append(bias)
        pops.append(createPopulationExp1(n_individuals, bias=bias))
    return pops, biases

In [8]:
# createPopulations(2, 2)

In [9]:
def testDivergenceMeasureExp1(n_pops, n_individuals, bias_range=[-0.4,0.4]):
    populations, biases = createPopulationsExp1(n_pops, n_individuals, bias_range)
    
    results = pd.DataFrame(columns=["population_1", "population_2","bias_1", "bias_2", "divergence"])
    
    #count = 0
    for i in range(n_pops):
        for j in range(n_pops):
            if j<i: 
                continue
                
            divergence = populationDivergence(populations[i], populations[j])

            results = results.append({"population_1":i, "population_2":j, 
                                      "bias_1": biases[i], "bias_2": biases[j], 
                                      "divergence":divergence}, ignore_index=True)
    
    return results

In [23]:
divergence_dataframe_exp1 = testDivergenceMeasureExp1(1000, 100)

In [24]:
divergence_dataframe_exp1.to_csv("population_divergence_sample_experiment_1.csv",sep=",")

In [25]:
divergence_dataframe_exp1.head(10)

Unnamed: 0,population_1,population_2,bias_1,bias_2,divergence
0,0.0,0.0,-0.149219,-0.149219,0.0
1,0.0,1.0,-0.149219,-0.140639,33.074348
2,0.0,2.0,-0.149219,0.20833,38.23752
3,0.0,3.0,-0.149219,0.029362,36.150928
4,0.0,4.0,-0.149219,0.029232,36.925622
5,0.0,5.0,-0.149219,0.116329,39.741507
6,0.0,6.0,-0.149219,-0.267961,59.930844
7,0.0,7.0,-0.149219,-0.018698,37.33178
8,0.0,8.0,-0.149219,0.045254,37.594041
9,0.0,9.0,-0.149219,0.073597,36.96207


In [26]:
def createOneGenomeProduct():
    populations, biases = createPopulationsExp1(1, 100, bias_range=[-0.4,0.4])
    return np.prod(np.asarray(populations[0]), axis=0)
    

In [27]:
prod = createOneGenomeProduct()
prod

array([1.20691532e-190, 2.42579274e-189, 5.75341962e-189, 4.72568240e-170,
       1.51377434e-177, 6.17398185e-170, 2.20074347e-186, 2.21470178e-199,
       4.28885168e-157, 8.88003815e-150])

In [28]:
normalize_probability(prod)

[1.3591329727494196e-41,
 2.7317367146118915e-40,
 6.479048000004541e-40,
 5.321691298767205e-21,
 1.704693421591281e-28,
 6.952652059695123e-21,
 2.478303956226087e-37,
 2.494022704159955e-50,
 4.829766942663812e-08,
 0.9999999517023306]

### Experiment 2
Half of the genes in one direction, half in the other.  
Compare pairs where the directions are inverted,   

e.g.   
bias for pop 1 = [-0.3, -0,3, 0.3, 0.3]   
bias for pop 2 = [0.3, 0.3, -0.3, -0.3]

In [16]:
def createIndividualExp2(size=10, biases=[0.0,0.0], min_limit=1e-4, max_limit=1.0):
    return [limitToRange((random.randint(0,100) / 100) + (biases[0] if i < size//2 else biases[1]), min_limit, max_limit) for i in range(size)]

In [17]:
def createPopulationExp2(size=10, biases=[0.0,0.0]):
    return [createIndividualExp2(10, biases=biases, min_limit=1e-4, max_limit=1.0) for i in range(size)]

In [18]:
def createPopulationsExp2(n_pairs, n_individuals, bias_range=[-0.4,0.4]):
    pops = []
    biases = []
    pairs = []
    for i in range(n_pairs):
        bias = random.uniform(bias_range[0], bias_range[1])
        
        # Create pair of populations with inverted bias directions 
        pops.append(createPopulationExp2(n_individuals, biases=[bias, -bias]))
        pops.append(createPopulationExp2(n_individuals, biases=[-bias, bias]))
        
        biases.append(bias)
        pairs.append(i)
    return pops, biases, pairs

In [19]:
def testDivergenceMeasureExp2(n_pairs, n_individuals, bias_range=[-0.4,0.4]):
    populations, biases, pairs = createPopulationsExp2(n_pairs, n_individuals, bias_range)
    
    results = pd.DataFrame(columns=["population_1", "population_2",
                                    "bias_1_first", "bias_1_second", 
                                    "bias_2_first","bias_2_second", 
                                    "pair", "divergence"])
    
    for i in range(n_pairs):
        ind_pop_1 = i*2
        ind_pop_2 = i*2 + 1
                
        divergence = populationDivergence(populations[ind_pop_1], populations[ind_pop_2])

        results = results.append({"population_1":ind_pop_1, "population_2":ind_pop_2, 
                          "bias_1_first": biases[i], "bias_1_second": -biases[i],
                          "bias_2_first": - biases[i], "bias_2_second": biases[i],
                          "pair": pairs[i] ,"divergence":divergence}, ignore_index=True)
    
    return results

In [29]:
divergence_dataframe_exp2 = testDivergenceMeasureExp2(1000, 100)

In [30]:
divergence_dataframe_exp2.to_csv("population_divergence_sample_experiment_2.csv",sep=",")

In [31]:
divergence_dataframe_exp2.head(10)

Unnamed: 0,population_1,population_2,bias_1_first,bias_1_second,bias_2_first,bias_2_second,pair,divergence
0,0.0,1.0,0.096907,-0.096907,-0.096907,0.096907,0.0,120.822729
1,2.0,3.0,0.044151,-0.044151,-0.044151,0.044151,1.0,62.061272
2,4.0,5.0,0.249605,-0.249605,-0.249605,0.249605,2.0,205.794398
3,6.0,7.0,0.309716,-0.309716,-0.309716,0.309716,3.0,249.922373
4,8.0,9.0,0.101541,-0.101541,-0.101541,0.101541,4.0,136.665261
5,10.0,11.0,-0.346192,0.346192,0.346192,-0.346192,5.0,249.774789
6,12.0,13.0,0.284342,-0.284342,-0.284342,0.284342,6.0,229.105776
7,14.0,15.0,-0.021662,0.021662,0.021662,-0.021662,7.0,32.314774
8,16.0,17.0,-0.367431,0.367431,0.367431,-0.367431,8.0,276.108656
9,18.0,19.0,-0.01157,0.01157,0.01157,-0.01157,9.0,19.68725
