<div style="color:#8b1538; font-size:32px;">Graphic representation using matplotlib and seaborn</div>

In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

<br>

# SNPs distribution

> Load data

In [92]:
def load_data(model):
    # Path data
    path_data = "./Data/Msprime/snp_distribution/sfs_{}/".format(model)
    
    data = pd.read_json(path_or_buf="{}SFS_{}-all.json".format(path_data, model), typ='frame')
    
    return data

In [93]:
sfs_decline = load_data("decline")

In [94]:
sfs_decline

Unnamed: 0,Parameters,SNPs,SFS observed,Time
0,"{'Tau': 0.039810717100000004, 'Kappa': 0.00316...",[6176],"[[5534, 397, 72, 20, 11, 18, 12, 14, 16, 14, 1...",0.3660
1,"{'Tau': 1.9952623150000002, 'Kappa': 0.0012589...",[80609],"[[29599, 13921, 8598, 6087, 4504, 3404, 2663, ...",13.8219
2,"{'Tau': 2.5118864315, 'Kappa': 1.5848931925, '...",[129305],"[[33047, 16431, 11604, 9062, 7368, 6263, 5380,...",40.2946
3,"{'Tau': 15.8489319246, 'Kappa': 0.0031622777, ...",[114130],"[[31689, 16415, 11072, 8242, 6143, 5285, 4696,...",37.6654
4,"{'Tau': 0.0063095734, 'Kappa': 0.0019952623, '...",[1200],"[[1039, 44, 22, 20, 19, 9, 5, 9, 3, 7, 2, 4, 2...",0.0640
...,...,...,...,...
3514,"{'Tau': 1.9952623150000002, 'Kappa': 0.1258925...",[85203],"[[30150, 14082, 8967, 6500, 4942, 3670, 2811, ...",14.5513
3515,"{'Tau': 3.1622776602, 'Kappa': 0.0006309573, '...",[95610],"[[31606, 14703, 9580, 6876, 5138, 4403, 3580, ...",14.2997
3516,"{'Tau': 0.0001, 'Kappa': 0.0039810717, 'sample...",[519],"[[159, 80, 50, 26, 39, 26, 17, 12, 14, 14, 10,...",0.0730
3517,"{'Tau': 0.0005011872, 'Kappa': 0.0007943282, '...",[141],"[[98, 10, 10, 6, 5, 1, 0, 1, 0, 0, 0, 2, 3, 0,...",0.0109


<br>

In [154]:
def percentile(centile):
    
    def percentile_(data):
        return np.percentile(data, centile)
    percentile_.__name__ = "centile {}%".format(centile)
    
    return percentile_

In [194]:
def data_preprocessing(sfs):
    """
    Pre processing for the plot.
    """
    # New pandas DataFrame
    data = pd.DataFrame()
    
    # Compute log10 of parameters - either (tau, kappa) or (m12, kappa)
    keys = sfs['Parameters'][0].keys()
    for key in keys:
        if key in ['Tau', 'Kappa', 'm12']:
            name = "Log10({})".format(key)
            data[name] = sfs['Parameters'].apply(lambda ele: np.log10(ele[key]))
    
    # Compute mean(SNPs)
    data['SNPs'] = sfs['SNPs'].apply(lambda ele: np.log10(np.mean(ele)))
    
    #
    data = data.groupby(['Log10(Tau)', 'Log10(Kappa)']).agg([percentile(50)]).unstack('Log10(Kappa)')
    #data = data.pivot("Log10(Tau)", "Log10(Kappa)", "SNPs")
    
    return data
    
data = data_preprocessing(sfs_decline)

In [195]:
data

Unnamed: 0_level_0,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs,SNPs
Unnamed: 0_level_1,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%,centile 50%
Log10(Kappa),-3.5,-3.4,-3.3,-3.2,-3.1,-3.0,-2.9,-2.8,-2.7,-2.6,...,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9
Log10(Tau),Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
-4.0,1.653213,1.732394,1.826075,1.991226,2.068186,2.086360,2.227887,2.292256,2.350248,2.431364,...,,,,,,,,,,
-3.9,1.819544,1.812913,1.880814,1.929419,1.995635,2.133539,2.152288,2.283301,2.380211,2.437751,...,,,,,,,,,,
-3.8,1.740363,1.792392,1.929419,1.806180,2.029384,2.198657,2.303196,2.276462,2.448706,2.546543,...,,,,,,,,,,
-3.7,1.897627,1.886491,1.982271,1.963788,2.064458,2.170262,2.225309,2.363612,2.359835,2.506505,...,,,,,,,,,,
-3.6,1.880814,1.944483,1.949390,2.060698,2.146128,2.206826,2.220108,2.318063,2.387390,2.572872,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2.0,5.059488,5.050925,5.053536,5.056077,5.059272,5.048038,5.054425,5.047329,5.054107,5.051141,...,5.054900,5.054295,5.052913,5.059101,5.058278,5.053040,5.060339,5.055046,5.054092,5.052097
2.1,5.049609,5.053470,5.060335,5.060411,5.050163,5.053681,5.059272,5.054215,5.052979,5.057141,...,5.047586,5.061358,5.058130,5.057404,5.053747,5.053559,5.059862,5.054130,5.055023,5.056005
2.2,5.055925,5.058388,5.056192,5.056455,5.049544,5.058498,5.053854,5.055776,5.057316,5.059862,...,5.061177,5.050380,5.062191,5.056912,5.050759,5.059279,5.055172,5.055260,5.058365,5.049171
2.3,5.052936,5.060921,5.054544,5.056985,5.054590,5.053505,5.052929,5.056486,5.054364,5.060883,...,5.057670,5.058244,5.052486,5.061577,5.057457,5.051619,5.058555,5.051083,5.057651,5.053190


> Plot data

In [203]:
def plot_snp_distribution(data):
    # Plot
    sns.set_theme(style='whitegrid')

    # Plot
    ax = sns.heatmap(data)
    print(ax.get_xticks())
    # xticks
    #ax.xaxis.tick_top()
    xticks_labels = []
    plt.xticks(np.arange(64, step=3) + 0.5, labels=xticks_labels)
    
    # axis labels
    plt.xlabel('')
    plt.ylabel('')
        
    plt.show()

In [212]:
len(np.arange(64, step=3) + 0.5)

22

In [213]:
np.arange(-4, 2.5, 0.1)

array([-4.00000000e+00, -3.90000000e+00, -3.80000000e+00, -3.70000000e+00,
       -3.60000000e+00, -3.50000000e+00, -3.40000000e+00, -3.30000000e+00,
       -3.20000000e+00, -3.10000000e+00, -3.00000000e+00, -2.90000000e+00,
       -2.80000000e+00, -2.70000000e+00, -2.60000000e+00, -2.50000000e+00,
       -2.40000000e+00, -2.30000000e+00, -2.20000000e+00, -2.10000000e+00,
       -2.00000000e+00, -1.90000000e+00, -1.80000000e+00, -1.70000000e+00,
       -1.60000000e+00, -1.50000000e+00, -1.40000000e+00, -1.30000000e+00,
       -1.20000000e+00, -1.10000000e+00, -1.00000000e+00, -9.00000000e-01,
       -8.00000000e-01, -7.00000000e-01, -6.00000000e-01, -5.00000000e-01,
       -4.00000000e-01, -3.00000000e-01, -2.00000000e-01, -1.00000000e-01,
        3.55271368e-15,  1.00000000e-01,  2.00000000e-01,  3.00000000e-01,
        4.00000000e-01,  5.00000000e-01,  6.00000000e-01,  7.00000000e-01,
        8.00000000e-01,  9.00000000e-01,  1.00000000e+00,  1.10000000e+00,
        1.20000000e+00,  

In [204]:
%matplotlib widget
plot_snp_distribution(data)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[ 0.5  3.5  6.5  9.5 12.5 15.5 18.5 21.5 24.5 27.5 30.5 33.5 36.5 39.5
 42.5 45.5 48.5 51.5 54.5 57.5 60.5 63.5]
