In [651]:
import numpy as np
from clock_project.simulation.magnitude_quantification import calculate_non_stationarity, calculate_ENS, random_nucleotide_distribution, calculate_information, entropy_calculation
from clock_project.maths.evolutionary_rate import calculate_stationary_distribution

from cogent3.maths.measure import jsd
from clock_project.simulation.wts import generate_rate_matrix
import json
from clock_project.genome_analysis.yapeng_check_BV import get_bounds_violation, load_param_values
import os
import glob
from cogent3 import get_app
import pandas as pd
import plotly.graph_objects as go

from cogent3.util.deserialise import deserialise_object

load_json_app = get_app("load_json")
bounary_violation_function = get_bounds_violation()

In [652]:
alignment_length_dict = {}
alignment_dir = '/Users/gulugulu/repos/PuningAnalysis/data/ensembl_ortholog_sequences/homologies_alignment_common_name_350_threshold'
gene_paths = glob.glob(os.path.join(alignment_dir, '*.json'))
for gene_path in gene_paths:
    file_name = os.path.basename(gene_path).rsplit('.', 1)[0]
    alignment = deserialise_object(json.load(open(gene_path, 'r')))
    alignment_length = alignment.get_lengths()[0]
    alignment_length_dict[file_name] = alignment_length



In [653]:
base_dir = '/Users/gulugulu/Desktop/honours/data_local/whole_genome_mammal87/triads_model_fitting_350_threshold'
gene_paths = glob.glob(os.path.join(base_dir, '*/'))
valid_triads_identifier_dict = {}
for path in gene_paths:
    file_name = os.path.basename(path.rstrip('/'))
    model_fitting_result_dir = os.path.join(path, 'model_fitting_result')
    model_fitting_results_paths = glob.glob(os.path.join(model_fitting_result_dir, '*.json'))
    parameter_proximities = {'proximity_lower': [], 'proximity_upper': [], 'ens': []}
    valid_triads_identifier = []
    for path in model_fitting_results_paths: 
        identifier = os.path.basename(path).rsplit('.', 1)[0]
        model_fitting_result = load_json_app(path)
        param = load_param_values(model_fitting_result)
        exclude_params = ("length", "mprobs")
        list_of_params = param.params
        ens_list = model_fitting_result.get_lengths_as_ens()
        for param in list_of_params:
            if param["par_name"] not in exclude_params:
                proximity_lower = abs(param["init"] - param["lower"])
                proximity_upper = abs(param["init"] - param["upper"])
                ens = ens_list[param['edge']]
                parameter_proximities['ens'].append(ens)
                parameter_proximities['proximity_lower'].append(proximity_lower) 
                parameter_proximities['proximity_upper'].append(proximity_upper)
        bounary_violation_check = bounary_violation_function.main(model_fitting_result)
        if bounary_violation_check.vio == []:
            valid_triads_identifier.append(identifier)
    valid_triads_identifier_dict[file_name] = valid_triads_identifier


In [654]:
valid_triads_number_dict = {}
for gene_name, valid_list in valid_triads_identifier_dict.items():
    valid_triads_number_dict[gene_name] = len(valid_list)


In [655]:
keys = alignment_length_dict.keys()
lengths = [alignment_length_dict[key] for key in keys]
triad_counts = [valid_triads_number_dict[key] for key in keys]

In [656]:
import plotly.express as px
aln_len_valid_matrix_fig = px.scatter(x = lengths, y = triad_counts, labels={'x':'Alignment length', 'y':'Number of valid dataset'}, title= None)
# Update layout with labels and title
aln_len_valid_matrix_fig.update_layout(
    template='plotly_white',
    margin=dict(l=20, r=20, t=50, b=20),
    autosize=True,
    yaxis_title_font={'size': 20},  
    xaxis_title_font={'size': 20}, 
    width=None 
)
aln_len_valid_matrix_fig.show()

Independent variables - evolution time

In [670]:
t_range = np.linspace(0,2,10)

Indepdent variable - information of the initial nucleotide distribution

In [657]:
import os
import json

def extract_internal_root_distribution(base_path):
    gene_dirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]
    all_data = {}

    for gene_id in gene_dirs:
        gene_path = os.path.join(base_path, gene_id)
        internal_root_distribution = {}

        # Files to process
        json_files = ['ens_diff_bins.json', 'jsd_bins.json', 'shorest_ens_bins.json']
        for json_file in json_files:
            file_path = os.path.join(gene_path, json_file)
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    bins = data.get('bins', {})
                    for bin_index, content in bins.items():
                        if content: 
                            identifier = content['identifier']
                            triads_info = content['triads_info_big_tree']
                            nuc_freqs_dict = triads_info['nuc_freqs_dict']
                            internal_root_info = nuc_freqs_dict["internal_root_distribution"]
                            internal_root_distribution[str(identifier)] = internal_root_info

            all_data[gene_id] = internal_root_distribution

    return all_data

# Base directory containing all gene ID folders
base_path = '/Users/gulugulu/Desktop/honours/data_local/whole_genome_mammal87/triads_350_threshold'
result = extract_internal_root_distribution(base_path)

# Optionally, save this data to a file
output_file = os.path.join(base_path, 'internal_root_distributions.json')
with open(output_file, 'w') as f:
    json.dump(result, f, indent=4)

In [474]:
information_dict = {}
for gene_id, distirbutions_info in result.items():
    information_dict[gene_id] = {}
    for identifier, distirbution in distirbutions_info.items():
        information_dict[gene_id][identifier] = calculate_information(distirbution)




In [475]:
information_list = [value for gene_id in information_dict for value in information_dict[gene_id].values()]


In [659]:
information_bin = {i: None for i in range(int(0.65 / 0.0065))}
bin_size = 0.0065

In [660]:
for gene_id, all_information in information_dict.items():
    for identifier, information in all_information.items():
        bin_index = int(information // bin_size)
        if information_bin[bin_index] == None:
            information_bin[bin_index] = {'index': (gene_id, identifier, information)}

for bin_index, info in information_bin.items():
    if info != None:
        gene_id, identifier, _ = info['index']
        initial_distribution = result[gene_id][identifier]
        information_bin[bin_index]['distribution'] = initial_distribution

    

Confound (controlled) variable - substitution rate matrix

In [667]:
matrices_dict = {}
matrices_list = []
for gene_name, valid_triads_identifier in valid_triads_identifier_dict.items():
    triads_info_dir = os.path.join(base_dir, gene_name, 'triads_info_dict.json')
    triads_info_dict = json.load(open(triads_info_dir, 'r'))
    matrix_pairs = []
    ens_pairs = []
    for identifier in valid_triads_identifier:
        triads_species_name = triads_info_dict[identifier]['triads_species_names']
        ens_dict = triads_info_dict[identifier]['triads_info_small_tree']['ens']
        marices = triads_info_dict[identifier]['triads_info_small_tree']['matrices']
        matrix_pairs.append({triads_species_name['ingroup1']: np.array(marices[triads_species_name['ingroup1']])* ens_dict[triads_species_name['ingroup1']], triads_species_name['ingroup2']: np.array(marices[triads_species_name['ingroup2']])*ens_dict[triads_species_name['ingroup2']]})
    matrices_list.extend(matrix_pairs)
    matrices_dict[gene_name] = matrix_pairs
    
len(matrices_list)

2671

In [678]:
matrices_list.pop(1738)

{'Megabat': array([[-59.81650541,  50.66196725,   3.07873059,   6.07580757],
        [ 36.65484988, -47.20613229,   8.5359179 ,   2.01536451],
        [  8.75116483,   5.03487332, -74.10248108,  60.31644294],
        [  2.85756663,   7.44404106,  30.41730721, -40.71891491]]),
 'Goat': array([[-4.78519511e-03,  3.39058132e-03,  1.32680216e-03,
          6.78116305e-05],
        [ 3.17629842e-03, -5.61815618e-03,  6.39378945e-05,
          2.37791986e-03],
        [ 7.42983000e-11,  9.87449499e-10, -2.49657169e-03,
          2.49657063e-03],
        [ 8.22296332e-11,  1.12726153e-03,  1.81232000e-03,
         -2.93958161e-03]])}

Dependent variable - ENS difference

In [662]:
def ens_diff_log_ratio(pi, Q1, Q2, t):
    ens1 = calculate_ENS(pi, Q1, t)
    ens2 = calculate_ENS(pi, Q2, t)
    nabla_diff_log_ratio = np.log(ens1/ens2)
    return nabla_diff_log_ratio

Dependent variable - Nabla difference

In [663]:
def nabla_diff_log_ratio(pi, Q1, Q2, t):
    nabla1 = calculate_non_stationarity(pi, Q1, t)
    nabla2 = calculate_non_stationarity(pi, Q2, t)
    nabla_diff_log_ratio = np.log(nabla1/nabla2)
    return nabla_diff_log_ratio


Dependent variable - in-group JSD

In [664]:
import scipy.linalg
import scipy.stats


def get_jsd(p0, Q1, Q2, t):
    p1 = scipy.linalg.expm(Q1*t)
    p2 = scipy.linalg.expm(Q2*t)
    pi1 = p0*p1
    pi2 = p0*p2
    jsd = jsd(pi1, pi2)
    return jsd


3D Plotting - density aginst Q1, Q2 

In [665]:
#0
pi0 = [0.25, 0.25, 0.25, 0.25]

#0.02
pi1 = [0.22195243534512787,
    0.3100037065350664,
    0.19400175596722694,
    0.27404210215257885
]

#0.14
pi2 = [0.18737214745678638,
    0.3475654097096265,
    0.10013048487375038,
    0.36493195795983835
]

#0.47
pi3 = [0.07780196872922861,
    0.48601378500013337,
    0.045839682353676546,
    0.39034456391696143
]
#0.62
pi4 = [0.06414557741723811,
    0.5836667594641209,
    0.010753487466864692,
    0.3414341756517766
]

0.27
pi5 = [0.1, 0.4, 0.1, 0.4]


3D Density Plot

In [668]:
def bar_data(position3d, size=(1,1,1)):
    # Generate the vertices of a parallelepipedic bar at a specified position and size
    bar = np.array([[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0],
                    [0, 0, 1], [1, 0, 1], [1, 1, 1], [0, 1, 1]], dtype=float)
    bar *= np.array(size)
    bar += np.array(position3d)
    return bar

def triangulate_bar_faces(positions, sizes):
    # Triangulate the faces of multiple bars to generate vertices and indices for Mesh3d
    all_bars = [bar_data(pos, size) for pos, size in zip(positions, sizes)]
    vertices, ixr = np.unique(np.vstack(all_bars), return_inverse=True, axis=0)
    
    I, J, K = [], [], []
    for k in range(len(all_bars)):
        indices = ixr[k * 8:(k + 1) * 8]
        I.extend(indices[[0, 2, 0, 5, 0, 7, 5, 2, 3, 6, 7, 5]])
        J.extend(indices[[1, 3, 4, 1, 3, 4, 1, 6, 7, 2, 4, 6]])
        K.extend(indices[[2, 0, 5, 0, 7, 0, 2, 5, 6, 3, 5, 7]])
    return vertices, I, J, K

def get_plotly_mesh3d(x, y, bins=[10, 10], bargap=0.1):
    # Generate a 3D histogram plot data
    hist, xedges, yedges = np.histogram2d(x, y, bins=bins)
    xpos, ypos = np.meshgrid(xedges[:-1] + np.diff(xedges) / 2,
                             yedges[:-1] + np.diff(yedges) / 2, indexing="ij")
    
    positions = np.column_stack([xpos.ravel(), ypos.ravel(), np.zeros(xpos.size)])
    sizes = np.column_stack([np.full(xpos.size, xedges[1] - xedges[0] - bargap),
                             np.full(ypos.size, yedges[1] - yedges[0] - bargap),
                             hist.ravel()])
    
    vertices, I, J, K = triangulate_bar_faces(positions, sizes)
    return vertices[:, 0], vertices[:, 1], vertices[:, 2], I, J, K



In [700]:
result_list1 = []
for i in range(len(matrices_list)):
    q_pair = list(matrices_list[i].values())
    nabla_difference_list1 = []
    ens_difference_list1 = []
    jsd_list1 = []
    nabla1_list1 = []
    nabla2_list1 = []
    Q1 = q_pair[0]
    Q2 = q_pair[1]
    pi = pi0
    for t in t_range:
        ens1 = calculate_ENS(pi, Q1, t)
        nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
        nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
        ens2 = calculate_ENS(pi, Q2, t)
        p1 = scipy.linalg.expm(Q1*t)
        p2 = scipy.linalg.expm(Q2*t)
        pi_1 = np.dot(pi,p1)
        pi_2 = np.dot(pi,p2)
        jsd_value = jsd(pi_1, pi_2)
        ens_diff = np.log(ens1/ens2)
        nabla_diff = np.log(nabla1/nabla2)
        jsd_list1.append(jsd_value)
        result_list1.append((i, t, ens_diff, nabla_diff, jsd_value))

df1 = pd.DataFrame(result_list1, columns=['Matrix_ID', 'Time', 'ENS_difference', 'Nabla_difference', 'Ingroup_JSD'])

result_list2 = []
for i in range(len(matrices_list)):
    q_pair = list(matrices_list[i].values())
    nabla_difference_list2 = []
    ens_difference_list2 = []
    jsd_list2 = []
    nabla1_list2 = []
    nabla2_list2 = []
    Q1 = q_pair[0]
    Q2 = q_pair[1]
    pi = pi3
    for t in t_range:
        ens1 = calculate_ENS(pi, Q1, t)
        nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
        nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
        ens2 = calculate_ENS(pi, Q2, t)
        p1 = scipy.linalg.expm(Q1*t)
        p2 = scipy.linalg.expm(Q2*t)
        pi_1 = np.dot(pi,p1)
        pi_2 = np.dot(pi,p2)
        jsd_value = jsd(pi_1, pi_2)
        nabla_diff = np.log(nabla1/nabla2)
        ens_diff = np.log(ens1/ens2)
        jsd_list2.append(jsd_value)
        result_list2.append((i, t, ens_diff, nabla_diff, jsd_value))

df2 = pd.DataFrame(result_list2, columns=['Matrix_ID', 'Time', 'ENS_difference', 'Nabla_difference', 'Ingroup_JSD'])


In [701]:
df1 = df1.dropna()
df2 = df2.dropna()

x1 = df1['Nabla_difference'].to_numpy()
y1 = df1['ENS_difference'].to_numpy()
x2 = df2['Nabla_difference'].to_numpy()
y2 = df2['ENS_difference'].to_numpy()

df1['Nabla_difference'] = df1['Nabla_difference'].round(1)
df1['ENS_difference'] = df1['ENS_difference'].round(1)

# Group by the rounded values and count occurrences
density_data1 = df1.groupby(['Nabla_difference', 'ENS_difference']).size().reset_index(name='Density')
density1 = density_data1['Density'].to_list()
ens_difference1 = density_data1['ENS_difference'].to_list()
nabla_difference1 = density_data1['Nabla_difference'].to_list()


X1, Y1, Z1, I1, J1, K1 = get_plotly_mesh3d(x1, y1, bins=[20, 20], bargap=0.05)

df2['Nabla_difference'] = df2['Nabla_difference'].round(1)
df2['ENS_difference'] = df2['ENS_difference'].round(1)

# Group by the rounded values and count occurrences
density_data2 = df2.groupby(['Nabla_difference', 'ENS_difference']).size().reset_index(name='Density')
density2 = density_data2['Density'].to_list()
ens_difference2 = density_data2['ENS_difference'].to_list()
nabla_difference2 = density_data2['Nabla_difference'].to_list()

X2, Y2, Z2, I2, J2, K2 = get_plotly_mesh3d(x2, y2, bins=[20, 20], bargap=0.05)

In [805]:
import plotly.express as px 
  
# Create 3D scatter plot
fig_matrix_low = go.Figure(data=[go.Scatter3d(
    x=df1['Nabla_difference'],
    y=df1['Matrix_ID'].astype(str),
    z=df1['ENS_difference'],  # Convert to string for better color handling
    mode='markers',
    marker=dict(
        size=5,
        color=df1['Matrix_ID'],  # Color by matrix ID
        colorscale='Viridis',  # Adjust color scale
        opacity=0.8
    )
)])

# Update layout
fig_matrix_low.update_layout(
    title='3D Scatter Plot of ENS difference vs Nabla difference',
    width=800,  # Adjust width
    height=600,  # Adjust height
    scene=dict(
        xaxis_title='Nabla Difference',
        yaxis_title='Matrix ID',
        zaxis_title='ENS Difference',
        xaxis=dict(showgrid=True, zeroline=False),
        yaxis=dict(showgrid=True, zeroline=False),
        zaxis=dict(showgrid=True, zeroline=False),
    ),
    margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
)


# Create 3D scatter plot
fig_matrix_high = go.Figure(data=[go.Scatter3d(
    x=df2['Nabla_difference'],
    y=df2['Matrix_ID'].astype(str),
    z=df2['ENS_difference'],  # Convert to string for better color handling
    mode='markers',
    marker=dict(
        size=5,
        color=df2['Matrix_ID'],  # Color by matrix ID
        colorscale='Viridis',  # Adjust color scale
        opacity=0.8
    )
)])

# Update layout
fig_matrix_high.update_layout(
    title='3D Scatter Plot of ENS difference vs Nabla difference',
    width=800,  # Adjust width
    height=600,  # Adjust height
    scene=dict(
        xaxis_title='Nabla Difference',
        yaxis_title='Matrix ID',
        zaxis_title='ENS Difference',
        xaxis=dict(showgrid=True, zeroline=False),
        yaxis=dict(showgrid=True, zeroline=False),
        zaxis=dict(showgrid=True, zeroline=False),
    ),
    margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
)



In [702]:
fig_matrix_high.show()

In [703]:
fig_matrix_low.show()

In [806]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplots: 1 row, 2 columns
fig_density_3d = make_subplots(rows=1, cols=2, specs=[[{'type': 'mesh3d'}, {'type': 'mesh3d'}]], horizontal_spacing=0.01, subplot_titles=(
    'Low Information',
    'High Information'
))

# Add first 3D mesh plot to the first subplot
fig_density_3d.add_trace(go.Mesh3d(
    x=X1, y=Y1, z=Z1,
    i=I1, j=J1, k=K1,
    intensity=Z1,  # Typically uses the Z values or another metric for color intensity
    colorscale='Viridis',  # Reversed Viridis color scale; remove '_r' for normal progression
    showscale= False,
    opacity=1,  # Set opacity to make overlaps more discernible,
    coloraxis="coloraxis"
), row=1, col=1)

# Add second 3D mesh plot to the second subplot
fig_density_3d.add_trace(go.Mesh3d(
    x=X2, y=Y2, z=Z2,
    i=I2, j=J2, k=K2,
    intensity=Z2,  # Typically uses the Z values or another metric for color intensity
    colorscale='Viridis',  # Reversed Viridis color scale; remove '_r' for normal progression
    coloraxis="coloraxis",
    opacity=1  # Set opacity to make overlaps more discernible
), row=1, col=2)

# Update layout for the entire figure
fig_density_3d.update_layout(
    title="3D Density Histogram of Nabla Difference and ENS Differences across Matrices",
    width=1400,  # Adjust width
    height=800,  # Adjust height
    margin=dict(l=0, r=0, b=0, t=100),  # Adjust margins to make full use of space
    coloraxis=dict(
        colorscale='Viridis',  # Viridis color scale
        colorbar=dict(
            title='Density',
            x=1,  # Position the color bar to the right of the plots
            len=0.75
        )
    )
)

# Update scene settings for the subplots
fig_density_3d.update_scenes(
    dict(camera=dict(
        eye=dict(x=-1, y=-1, z=3),  # Adjust x, y, z to change the camera angle
        center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
        up=dict(x=0, y=0, z=1)  # Ensures that z is up
    ),
        xaxis_title='Nabla Difference (log ratio)',
        yaxis_title='ENS Difference (log ratio)',
        zaxis_title='Density',
        xaxis=dict(showgrid=True, zeroline=False),
        yaxis=dict(showgrid=True, zeroline=False),
        zaxis=dict(showgrid=True, zeroline=False)
    ), row=1, col=1)

fig_density_3d.layout.annotations[0].update(y=0.95)  # First subplot title
fig_density_3d.layout.annotations[1].update(y=0.95)  # Second subplot title

fig_density_3d.update_scenes(
    dict(camera=dict(
        eye=dict(x=-1, y=-1, z=3),  # Adjust x, y, z to change the camera angle
        center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
        up=dict(x=0, y=0, z=1)  # Ensures that z is up
    ),  
        xaxis_title='Nabla Difference (log ratio)',
        yaxis_title='ENS Difference (log ratio)',
        zaxis_title='Density',
        xaxis=dict(showgrid=True, zeroline=False),
        yaxis=dict(showgrid=True, zeroline=False),
        zaxis=dict(showgrid=True, zeroline=False)
    ), row=1, col=2)

# Show the figure
fig_density_3d.show()
fig_density_3d.write_image('3D Density Histogram of Nabla Difference and ENS Differences across Matrices.pdf')


In [705]:


# fig_density_low3d = go.Figure(data=[go.Mesh3d(
#     x=X1, y=Y1, z=Z1,
#     i=I1, j=J1, k=K1,
#     intensity=Z1,  # Typically uses the Z values or another metric for color intensity
#     colorscale='Viridis',  # Reversed Viridis color scale; remove '_r' for normal progression
#     colorbar=dict(title='Density'),  # Color scale bar
#     opacity=1  # Set opacity to make overlaps more discernible
# )])

# fig_density_low3d.update_layout(
#     scene=dict(camera=dict(
#             eye=dict(x=-1, y=-1, z=2),  # Adjust x, y, z to change the camera angle
#             center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
#             up=dict(x=0, y=0, z=1)  # Ensures that z is up
#         ),
#         xaxis_title='Nabla Difference',
#         yaxis_title='ENS Difference',
#         zaxis_title='Density',
#         xaxis=dict(showgrid=True, zeroline=False),
#         yaxis=dict(showgrid=True, zeroline=False),
#         zaxis=dict(showgrid=True, zeroline=False),),  # Ensure the aspect ratio reflects the data
#     title="Density Histogram of Nabla Difference and ENS Differences - Low Information",
#     width=800,  # Adjust width
#     height=600,  # Adjust height
#     margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
# )


In [708]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Sort the data by density so that higher density points are plotted last (on top)
density_data_sorted1 = density_data1.sort_values(by='Density', ascending=True)
density_data_sorted2 = density_data2.sort_values(by='Density', ascending=True)

# Create subplots: 1 row, 2 columns
fig_density_2d = make_subplots(rows=1, cols=2, subplot_titles=(
    'Low Information',
    'High Information'
),
horizontal_spacing=0.05)

# Add first scatter plot to the first subplot
fig_density_2d.add_trace(go.Scatter(
    x=density_data_sorted1['Nabla_difference'],
    y=density_data_sorted1['ENS_difference'],
    mode='markers',
    marker=dict(
        size=10,  # Adjust size as needed
        color=density_data_sorted1['Density'],  # set color based on the density
        colorscale='Viridis',  # Use Viridis color scale
        coloraxis="coloraxis",
        opacity=1,  # Set opacity to make overlaps more discernible
        showscale=True  # Show color scale beside the plot
    )
), row=1, col=1)

# Add second scatter plot to the second subplot
fig_density_2d.add_trace(go.Scatter(
    x=density_data_sorted2['Nabla_difference'],
    y=density_data_sorted2['ENS_difference'],
    mode='markers',
    marker=dict(
        size=10,  # Adjust size as needed
        color=density_data_sorted2['Density'],  # set color based on the density
        colorscale='Viridis',  # Use Viridis color scale
        coloraxis="coloraxis",
        opacity=1,  # Set opacity to make overlaps more discernible
        showscale=True  # Show color scale beside the plot
    )
), row=1, col=2)

# Update layout for the entire figure
fig_density_2d.update_layout(
    title='Density Plot of Nabla Difference and ENS Differences Across Matrix Pairs',
    template='plotly_white',  # Set background to white for better visibility
    showlegend=False,  # Hide legend
    width=1100,  # Adjust width
    height=500,
        coloraxis=dict(
        colorscale='Viridis',  # Viridis color scale
        colorbar=dict(
            title='Density',
            x=1,  # Position the color bar to the right of the plots
            len=0.75
        )
))

# Update x-axis and y-axis titles for the subplots
fig_density_2d.update_xaxes(title_text='Nabla Difference (log ratio)', row=1, col=1)
fig_density_2d.update_xaxes(title_text='Nabla Difference (log ratio)', row=1, col=2)
fig_density_2d.update_yaxes(title_text='ENS Difference (log ratio)', row=1, col=1)

# Show the figure
fig_density_2d.show()
fig_density_2d.write_image('2D Density Plot of Nabla Difference and ENS Differences across Matrix Pairs.pdf')



In [610]:

# # Create 3D scatter plot
# fig_matrix_high = go.Figure(data=[go.Scatter3d(
#     x=df2['Matrix_ID'].astype(str),
#     y=df2['Nabla_difference'],
#     z=df2['ENS_difference'],  # Convert to string for better color handling
#     mode='markers',
#     marker=dict(
#         size=5,
#         color=df2['Matrix_ID'],  # Color by matrix ID
#         colorscale='Viridis',  # Adjust color scale
#         opacity=0.8
#     )
# )])

# # Update layout
# fig_matrix_high.update_layout(
#     title='3D Scatter Plot of ENS difference vs Nabla difference',
#     width=800,  # Adjust width
#     height=600,  # Adjust height
#     scene=dict(
#         xaxis_title='Matrix ID',
#         yaxis_title='Nabla Difference',
#         zaxis_title='ENS Difference',
#         xaxis=dict(showgrid=True, zeroline=False),
#         yaxis=dict(showgrid=True, zeroline=False),
#         zaxis=dict(showgrid=True, zeroline=False),
#     ),
#     margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
# )

# fig_matrix_high.show()


In [709]:

result_list = []
for t in t_range:
    pi = pi0
    nabla_difference_list = []
    ens_difference_list = []
    jsd_list = []
    nabla1_list = []
    nabla2_list = []
    ens2_list = []
    for i in range(len(matrices_list)):
        q_pair = list(matrices_list[i].values())
        Q1 = q_pair[0]
        Q2 = q_pair[1]
        ens1 = calculate_ENS(pi, Q1, t)
        nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
        nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
        ens2 = calculate_ENS(pi, Q2, t)
        p1 = scipy.linalg.expm(Q1*t)
        p2 = scipy.linalg.expm(Q2*t)
        pi_1 = np.dot(pi,p1)
        pi_2 = np.dot(pi,p2)
        jsd_value = jsd(pi_1, pi_2)
        nabla_diff = np.log(nabla1/nabla2)
        ens_diff = np.log(ens1/ens2)
        jsd_list.append(jsd_value)
        nabla_difference_list.append(nabla_diff)
        ens_difference_list.append(ens_diff)
        result_list.append((i, t, ens_diff, nabla_diff, jsd_value))
        ens2_list.append(ens2)

df_low = pd.DataFrame(result_list, columns=['Matrix_ID', 'Time', 'ENS_difference', 'Nabla_difference', 'Ingroup_JSD'])

# # Create 3D scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=df['Nabla_difference'],
#     y=df['Time'],
#     z=df['ENS_difference'],  # Convert to string for better color handling
#     mode='markers',
#     marker=dict(
#         size=5,
#         color=df['Time'],  
#         colorscale='Viridis',  
#         opacity=1
#     )
# )])

# # Update layout
# fig.update_layout(
#     title='3D Scatter Plot of ENS difference vs Nabla difference',
#     width=800,  # Adjust width
#     height=600,  # Adjust height
#     scene=dict(camera=dict(
#             eye=dict(x=1, y=-2, z=1.5),  # Adjust x, y, z to change the camera angle
#             center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
#             up=dict(x=0, y=0, z=1)  # Ensures that z is up
#         ),
#         xaxis_title='Nabla Difference (log ratio)',
#         yaxis_title='Time',
#         zaxis_title='ENS Difference (log ratio)',
#         xaxis=dict(showgrid=True, zeroline=False),
#         yaxis=dict(showgrid=True, zeroline=False),
#         zaxis=dict(showgrid=True, zeroline=False),
#     ),
#     margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
# )

# fig.show()

In [613]:
# # Defining bins and bin labels
# bins = np.linspace(-2, 2, 20)
# bin_labels = np.round((bins[:-1] + bins[1:])/2, 2)  # Calculate midpoints

# # Binning Nabla_difference
# df['Nabla_difference'] = pd.cut(df['Nabla_difference'], bins=bins, labels=bin_labels)
# pivot_table = df.pivot_table(index='Time', columns='Nabla_difference', values='ENS_difference', aggfunc='mean')

# # Inverting the order of Time for plotting

# # Extracting the X, Y, Z coordinates for Plotly
# X, Y = np.meshgrid(pivot_table.columns.categories, pivot_table.index)
# Z = pivot_table.values

# # Create the surface plot using Plotly
# fig = go.Figure(data=[go.Surface(
#     z=Z,
#     x=X[0],  # X coordinates
#     y=Y[:, 0],  # Y coordinates
#     colorscale='Viridis',
#     showscale=False
# )])

# # Update layout
# fig.update_layout(
#     title='3D Surface Plot of ENS difference vs Nabla difference over Time  - Low Information',
#     scene=dict(camera=dict(
#             eye=dict(x=1, y=-2, z=1.5),  # Adjust x, y, z to change the camera angle
#             center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
#             up=dict(x=0, y=0, z=1)  # Ensures that z is up
#         ),
#         xaxis_title='Nabla Difference',
#         yaxis_title='Time',
#         zaxis_title='ENS Difference',
#         aspectmode='cube'
#     ),
#     autosize=False,
#     width=800,
#     height=800
# )

# fig.show()
# fig.write_image('3D Surface Plot of Nabla Difference and ENS Differences with respect to time - low information.pdf')

In [710]:

result_list = []
for t in t_range:
    pi = pi5
    nabla_difference_list = []
    ens_difference_list = []
    jsd_list = []
    nabla1_list = []
    nabla2_list = []
    for i in range(len(matrices_list)):
        q_pair = list(matrices_list[i].values())
        Q1 = q_pair[0]
        Q2 = q_pair[1]
        ens1 = calculate_ENS(pi, Q1, t)
        nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
        nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
        ens2 = calculate_ENS(pi, Q2, t)
        p1 = scipy.linalg.expm(Q1*t)
        p2 = scipy.linalg.expm(Q2*t)
        pi_1 = np.dot(pi,p1)
        pi_2 = np.dot(pi,p2)
        jsd_value = jsd(pi_1, pi_2)
        nabla_diff = np.log(nabla1/nabla2)
        ens_diff = np.log(ens1/ens2)
        jsd_list.append(jsd_value)
        nabla_difference_list.append(nabla_diff)
        ens_difference_list.append(ens_diff)
        result_list.append((i, t, ens_diff, nabla_diff, jsd_value))

df_high = pd.DataFrame(result_list, columns=['Matrix_ID', 'Time', 'ENS_difference', 'Nabla_difference', 'Ingroup_JSD'])

# fig = go.Figure(data=[go.Scatter3d(
#     x=df['Nabla_difference'],
#     y=df['Time'],
#     z=df['ENS_difference'],  # Convert to string for better color handling
#     mode='markers',
#     marker=dict(
#         size=5,
#         color=df['Time'],  
#         colorscale='Viridis',  
#         opacity=1
#     )
# )])

# # Update layout
# fig.update_layout(
#     title='3D Scatter Plot of ENS difference vs Nabla difference',
#     width=800,  # Adjust width
#     height=600,  # Adjust height
#     scene=dict(camera=dict(
#             eye=dict(x=1, y=-2, z=1.5),  # Adjust x, y, z to change the camera angle
#             center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
#             up=dict(x=0, y=0, z=1)  # Ensures that z is up
#         ),
#         xaxis_title='Nabla Difference (log ratio)',
#         yaxis_title='Time',
#         zaxis_title='ENS Difference (log ratio)',
#         xaxis=dict(showgrid=True, zeroline=False),
#         yaxis=dict(showgrid=True, zeroline=False),
#         zaxis=dict(showgrid=True, zeroline=False),
#     ),
#     margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
# )

# fig.show()

In [615]:
# # Defining bins and bin labels
# bins = np.linspace(-2, 2, 20)
# bin_labels = np.round((bins[:-1] + bins[1:])/2, 2)  # Calculate midpoints

# # Binning Nabla_difference
# df['Nabla_difference'] = pd.cut(df['Nabla_difference'], bins=bins, labels=bin_labels)
# pivot_table = df.pivot_table(index='Time', columns='Nabla_difference', values='ENS_difference', aggfunc='mean')

# # Inverting the order of Time for plotting

# # Extracting the X, Y, Z coordinates for Plotly
# X, Y = np.meshgrid(pivot_table.columns.categories, pivot_table.index)
# Z = pivot_table.values

# # Create the surface plot using Plotly
# fig = go.Figure(data=[go.Surface(
#     z=Z,
#     x=X[0],  # X coordinates
#     y=Y[:, 0],  # Y coordinates
#     colorscale='Viridis',
#     showscale=False
# )])

# # Update layout
# fig.update_layout(
#     title='3D Surface Plot of ENS difference vs Nabla difference over Time  - High Information',
#     scene=dict(camera=dict(
#             eye=dict(x=1, y=-2, z=1.5),  # Adjust x, y, z to change the camera angle
#             center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
#             up=dict(x=0, y=0, z=1)  # Ensures that z is up
#         ),
#         xaxis_title='Nabla Difference',
#         yaxis_title='Time',
#         zaxis_title='ENS Difference',
#         aspectmode='cube'
#     ),
#     autosize=False,
#     width=800,
#     height=800
# )

# fig.show()
# fig.write_image('3D Surface Plot of Nabla Difference and ENS Differences with respect to time - high information.pdf')

In [807]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming df_low and df_high are the dataframes for low and high information respectively

# Defining bins and bin labels
bins = np.linspace(-2, 2, 20)
bin_labels = np.round((bins[:-1] + bins[1:])/2, 2)  # Calculate midpoints

# Binning Nabla_difference for low information
df_low['Nabla_difference'] = pd.cut(df_low['Nabla_difference'], bins=bins, labels=bin_labels)
pivot_table_low = df_low.pivot_table(index='Time', columns='Nabla_difference', values='ENS_difference', aggfunc='mean')

# Binning Nabla_difference for high information
df_high['Nabla_difference'] = pd.cut(df_high['Nabla_difference'], bins=bins, labels=bin_labels)
pivot_table_high = df_high.pivot_table(index='Time', columns='Nabla_difference', values='ENS_difference', aggfunc='mean')

# Extracting the X, Y, Z coordinates for Plotly
X_low, Y_low = np.meshgrid(pivot_table_low.columns.categories, pivot_table_low.index)
Z_low = pivot_table_low.values

X_high, Y_high = np.meshgrid(pivot_table_high.columns.categories, pivot_table_high.index)
Z_high = pivot_table_high.values

# Create subplots: 1 row, 2 columns
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type': 'surface'}, {'type': 'surface'}]],
    horizontal_spacing=0,  # Adjust the spacing as needed
    subplot_titles=(
        'Low Information',
        'High Information'
    )
)

# Add the first surface plot (Low Information) to the first subplot
fig.add_trace(go.Surface(
    z=Z_low,
    x=X_low[0],  # X coordinates
    y=Y_low[:, 0],  # Y coordinates
    surfacecolor=Y_low,  # Use Y axis values (Time) for color scale
    colorscale='Viridis',
    showscale=False), row=1, col=1)

# Add the second surface plot (High Information) to the second subplot
fig.add_trace(go.Surface(
    z=Z_high,
    x=X_high[0],  # X coordinates
    y=Y_high[:, 0],  # Y coordinates
    surfacecolor=Y_low,  # Use Y axis values (Time) for color scale
    colorscale='Viridis',
    showscale=True,
    colorbar=dict(title='Time', x=1)  # Adjust the colorbar position
), row=1, col=2)

# Update layout with axis titles
fig.update_layout(
    title='3D Surface Plots of ENS difference vs Nabla difference over Time',
    autosize=False,
    width=1200,  # Adjust the width to fit the plots better
    height=800,  # Adjust the height to fit the plots better
    margin=dict(l=0, r=0, b=0, t=100),  # Adjust margins to make full use of space
    scene=dict(camera=dict(
            eye=dict(x=1, y=-2.5, z=1.5),  # Adjust x, y, z to change the camera angle
            center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
            up=dict(x=0, y=0, z=1)  # Ensures that z is up
        ),
        xaxis_title='Nabla Difference (log ratio)',
        yaxis_title='Time',
        zaxis_title='ENS Difference (log ratio)',
        aspectmode='cube'
    ),
    scene2=dict(camera=dict(
            eye=dict(x=1, y=-2.5, z=1.5),  # Adjust x, y, z to change the camera angle
            center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
            up=dict(x=0, y=0, z=1)  # Ensures that z is up
        ),
        xaxis_title='Nabla Difference (log ratio)',
        yaxis_title='Time',
        zaxis_title='ENS Difference (log ratio)',
        aspectmode='cube'
    )
)

# Show the figure
fig.show()

# Save the figure as PDF
fig.write_image('3D Surface Plots of Nabla Difference and ENS Differences with respect to time.pdf')








In [799]:
t = 1
pi = pi2
nabla_difference_list = []
ens_difference_list = []
jsd_list = []
ens_list = []
jsd_difference_list = []
dist_list = []
dist_diff_list = []
nabla_list = []
for i in range(len(matrices_list)):
    q_pair = list(matrices_list[i].values())
    Q1 = q_pair[0]
    Q2 = q_pair[1]
    sp1 = calculate_stationary_distribution(Q1)
    sp2 = calculate_stationary_distribution(Q2)
    ens1 = calculate_ENS(pi, Q1, t)
    nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
    nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
    ens2 = calculate_ENS(pi, Q2, t)
    p1 = scipy.linalg.expm(Q1*t)
    p2 = scipy.linalg.expm(Q2*t)
    pi_1 = np.dot(pi,p1)
    pi_2 = np.dot(pi,p2)
    jsd_value = jsd(pi_1, pi_2)
    jsd_1 = jsd(pi_1, pi)
    jsd_2 = jsd(pi_2, pi)
    jsd_diff = abs(jsd_1-jsd_2)
    nabla_diff = np.log(nabla1/nabla2)
    ens_diff = abs(ens1-ens2)
    jsd_list.append(jsd_value)
    jsd_difference_list.append(jsd_diff)
    dist1 = np.linalg.norm(pi-pi_1)
    dist2 = np.linalg.norm(pi-pi_2)
    dist = np.linalg.norm(pi_1-pi_2)
    dist_diff = abs(dist1 - dist2)
    dist_list.append(dist)
    dist_diff_list.append(dist_diff)
    nabla_difference_list.append(nabla_diff)
    ens_list.append((ens1, ens2))
    nabla_list.append((nabla1, nabla2))
    ens_difference_list.append(ens_diff)


In [800]:
nabla_ens_log_ratio_fig = px.scatter(x = np.sqrt(jsd_difference_list), y = np.sqrt(ens_difference_list), labels={'x':'nabla_diff', 'y':'ens_diff'}, trendline="ols", title= None)
# Update layout with labels and title
nabla_ens_log_ratio_fig.update_layout(
    template='plotly_white',
    margin=dict(l=20, r=20, t=50, b=20),
    autosize=True,
    yaxis_title_font={'size': 20},  
    xaxis_title_font={'size': 20}, 
    width=None 
)
nabla_ens_log_ratio_fig.show()

In [None]:
jsd_ens_abs_diff_fig = px.scatter(x = nabla_difference_list, y = ens_difference_list, labels={'x':'nabla_diff', 'y':'ens_diff'}, trendline="ols", title= None)
# Update layout with labels and title
nabla_ens_log_ratio_fig.update_layout(
    template='plotly_white',
    margin=dict(l=20, r=20, t=50, b=20),
    autosize=True,
    yaxis_title_font={'size': 20},  
    xaxis_title_font={'size': 20}, 
    width=None 
)
nabla_ens_log_ratio_fig.show()

In [795]:
re = px.get_trendline_results(nabla_ens_log_ratio_fig)
re.px_fit_results.iloc[0].summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.061
Model:,OLS,Adj. R-squared:,0.061
Method:,Least Squares,F-statistic:,173.3
Date:,"Wed, 07 Aug 2024",Prob (F-statistic):,2.13e-38
Time:,11:14:26,Log-Likelihood:,-1920.6
No. Observations:,2670,AIC:,3845.0
Df Residuals:,2668,BIC:,3857.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0104,0.010,1.084,0.278,-0.008,0.029
x1,0.1561,0.012,13.165,0.000,0.133,0.179

0,1,2,3
Omnibus:,22.103,Durbin-Watson:,2.008
Prob(Omnibus):,0.0,Jarque-Bera (JB):,15.777
Skew:,-0.065,Prob(JB):,0.000375
Kurtosis:,2.647,Cond. No.,1.23


In [790]:
import statsmodels.api as sm
a = np.sqrt(jsd_difference_list)
b = np.sqrt(ens_difference_list)

data = {
    'jsd_diff': nabla_difference_list,  # Jensen-Shannon divergence values
    'ens_diff': ens_difference_list  # Ensemble difference values
}
df = pd.DataFrame(data)
X = sm.add_constant(df['jsd_diff'])
y = df['ens_diff']
model = sm.OLS(y, X).fit()
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:               ens_diff   R-squared:                       0.136
Model:                            OLS   Adj. R-squared:                  0.135
Method:                 Least Squares   F-statistic:                     419.3
Date:                Wed, 07 Aug 2024   Prob (F-statistic):           1.13e-86
Time:                        11:14:16   Log-Likelihood:                -1865.4
No. Observations:                2670   AIC:                             3735.
Df Residuals:                    2668   BIC:                             3747.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0097      0.009      1.034      0.3

In [791]:

px.histogram(ens_difference_list)

In [727]:

result_list = []
for t in t_range:
    pi = pi0
    nabla_difference_list = []
    ens_difference_list = []
    jsd_list = []
    nabla1_list = []
    nabla2_list = []
    ens2_list = []
    jsd_diff_list = []
    dist_list = []
    dist_diff_list = []
    for i in range(len(matrices_list)):
        q_pair = list(matrices_list[i].values())
        Q1 = q_pair[0]
        Q2 = q_pair[1]
        ens1 = calculate_ENS(pi, Q1, t)
        nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
        nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
        ens2 = calculate_ENS(pi, Q2, t)
        p1 = scipy.linalg.expm(Q1*t)
        p2 = scipy.linalg.expm(Q2*t)
        pi_1 = np.dot(pi,p1)
        pi_2 = np.dot(pi,p2)
        jsd_value = jsd(pi_1, pi_2)
        nabla_diff = np.log(nabla1/nabla2)
        ens_diff = abs(ens1-ens2)
        jsd_list.append(jsd_value)
        jsd_1 = jsd(pi_1, pi)
        jsd_2 = jsd(pi_2, pi)
        jsd_diff = abs(jsd_1-jsd_2)
        dist1 = np.linalg.norm(pi-pi_1)
        dist2 = np.linalg.norm(pi-pi_2)
        dist = np.linalg.norm(pi_1-pi_2)
        dist_diff = abs(dist1 - dist2)
        jsd_diff_list.append(jsd_diff)
        dist_list.append(dist)
        dist_diff_list.append(dist_diff)
        nabla_difference_list.append(nabla_diff)
        ens_difference_list.append(ens_diff)
        result_list.append((i, t, np.sqrt(ens_diff), nabla_diff, np.sqrt(jsd_value), np.sqrt(jsd_diff), np.sqrt(dist), np.sqrt(dist_diff)))
        ens2_list.append(ens2)

df_jsd_low = pd.DataFrame(result_list, columns=['Matrix_ID', 'Time', 'ENS_difference', 'Nabla_difference', 'Ingroup_JSD', 'JSD_difference', 'Distance', 'Distance_difference'])

# # Create 3D scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=df_jsd_low['Ingroup_JSD'],
#     y=df_jsd_low['Time'],
#     z=df_jsd_low['ENS_difference'],  # Convert to string for better color handling
#     mode='markers',
#     marker=dict(
#         size=5,
#         color=df_jsd_low['Time'],  
#         colorscale='Viridis',  
#         opacity=1
#     )
# )])

# # Update layout
# fig.update_layout(
#     title='3D Scatter Plot of ENS difference vs Nabla difference',
#     width=800,  # Adjust width
#     height=600,  # Adjust height
#     scene=dict(
#         xaxis_title='JSD Difference (square root)',
#         yaxis_title='Time',
#         zaxis_title='ENS Difference (square root)',
#         xaxis=dict(showgrid=True, zeroline=False),
#         yaxis=dict(showgrid=True, zeroline=False),
#         zaxis=dict(showgrid=True, zeroline=False),
#     ),
#     margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
# )

# fig.show()

In [728]:

result_list = []
for t in t_range:
    pi = pi5
    nabla_difference_list = []
    ens_difference_list = []
    jsd_list = []
    nabla1_list = []
    nabla2_list = []
    ens2_list = []
    jsd_diff_list = []
    dist_list = []
    dist_diff_list = []
    for i in range(len(matrices_list)):
        q_pair = list(matrices_list[i].values())
        Q1 = q_pair[0]
        Q2 = q_pair[1]
        ens1 = calculate_ENS(pi, Q1, t)
        nabla1 = calculate_non_stationarity(pi, np.array(Q1), t)
        nabla2 = calculate_non_stationarity(pi, np.array(Q2), t)
        ens2 = calculate_ENS(pi, Q2, t)
        p1 = scipy.linalg.expm(Q1*t)
        p2 = scipy.linalg.expm(Q2*t)
        pi_1 = np.dot(pi,p1)
        pi_2 = np.dot(pi,p2)
        jsd_value = jsd(pi_1, pi_2)
        nabla_diff = np.log(nabla1/nabla2)
        ens_diff = abs(ens1-ens2)
        jsd_list.append(jsd_value)
        jsd_1 = jsd(pi_1, pi)
        jsd_2 = jsd(pi_2, pi)
        jsd_diff = abs(jsd_1-jsd_2)
        dist1 = np.linalg.norm(pi-pi_1)
        dist2 = np.linalg.norm(pi-pi_2)
        dist = np.linalg.norm(pi_1-pi_2)
        dist_diff = abs(dist1 - dist2)
        jsd_diff_list.append(jsd_diff)
        dist_list.append(dist)
        dist_diff_list.append(dist_diff)
        nabla_difference_list.append(nabla_diff)
        ens_difference_list.append(ens_diff)
        result_list.append((i, t, np.sqrt(ens_diff), nabla_diff, np.sqrt(jsd_value), np.sqrt(jsd_diff), np.sqrt(dist), np.sqrt(dist_diff)))
        ens2_list.append(ens2)

df_jsd_high = pd.DataFrame(result_list, columns=['Matrix_ID', 'Time', 'ENS_difference', 'Nabla_difference', 'Ingroup_JSD', 'JSD_difference', 'Distance', 'Distance_difference'])

# # Create 3D scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=df_jsd_high['JSD_difference'],
#     y=df_jsd_high['Time'],
#     z=df_jsd_high['ENS_difference'],  # Convert to string for better color handling
#     mode='markers',
#     marker=dict(
#         size=5,
#         color=df_jsd_high['Time'],  
#         colorscale='Viridis',  
#         opacity=0.8
#     )
# )])

# # Update layout
# fig.update_layout(
#     title='3D Scatter Plot of ENS difference vs Nabla difference',
#     width=800,  # Adjust width
#     height=600,  # Adjust height
#     scene=dict(
#         xaxis_title='JSD Difference (square root)',
#         yaxis_title='Time',
#         zaxis_title='ENS Difference (square root)',
#         xaxis=dict(showgrid=True, zeroline=False),
#         yaxis=dict(showgrid=True, zeroline=False),
#         zaxis=dict(showgrid=True, zeroline=False),
#     ),
#     margin=dict(l=0, r=0, b=0, t=30)  # Adjust margins to make full use of space
# )

# fig.show()

In [809]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming df_jsd_low and df_jsd_high are the dataframes for low and high information respectively

# Defining bins and bin labels for low information
bins_low = np.linspace(0, 0.3, 20)
bin_labels_low = np.round((bins_low[:-1] + bins_low[1:]) / 2, 2)  # Calculate midpoints

# Binning Ingroup_JSD for low information
df_jsd_low['Ingroup_JSD'] = pd.cut(df_jsd_low['Ingroup_JSD'], bins=bins_low, labels=bin_labels_low)
pivot_table_low = df_jsd_low.pivot_table(index='Time', columns='Ingroup_JSD', values='ENS_difference', aggfunc='mean')

# Defining bins and bin labels for high information
bins_high = np.linspace(0, 0.4, 20)
bin_labels_high = np.round((bins_high[:-1] + bins_high[1:]) / 2, 2)  # Calculate midpoints

# Binning JSD_difference for high information
df_jsd_high['Ingroup_JSD'] = pd.cut(df_jsd_high['Ingroup_JSD'], bins=bins_high, labels=bin_labels_high)
pivot_table_high = df_jsd_high.pivot_table(index='Time', columns='Ingroup_JSD', values='ENS_difference', aggfunc='mean')

# Extracting the X, Y, Z coordinates for Plotly for low information
X_low, Y_low = np.meshgrid(pivot_table_low.columns.categories, pivot_table_low.index)
Z_low = pivot_table_low.values

# Extracting the X, Y, Z coordinates for Plotly for high information
X_high, Y_high = np.meshgrid(pivot_table_high.columns.categories, pivot_table_high.index)
Z_high = pivot_table_high.values

# Create subplots: 1 row, 2 columns
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type': 'surface'}, {'type': 'surface'}]],
    horizontal_spacing=0,  # Adjust the spacing as needed
    subplot_titles=(
        'Low Information',
        'High Information'
    )
)

# Add the first surface plot (Low Information) to the first subplot
fig.add_trace(go.Surface(
    z=Z_low,
    x=X_low[0],  # X coordinates
    y=Y_low[:, 0],  # Y coordinates
    colorscale='Viridis',
    showscale=False), row=1, col=1)

# Add the second surface plot (High Information) to the second subplot
fig.add_trace(go.Surface(
    z=Z_high,
    x=X_high[0],  # X coordinates
    y=Y_high[:, 0],  # Y coordinates
    colorscale='Viridis',
    showscale=True,
    surfacecolor=Y_high,  # Use Y axis values (Time) for color scale
    colorbar=dict(title='Time', x=1.05)  # Adjust the colorbar position
), row=1, col=2)

# Update layout with axis titles
fig.update_layout(
    title='3D Surface Plots of ENS difference and Ingroup JSD over Time',
    autosize=False,
    width=1600,  # Adjust the width to fit the plots better
    height=800,  # Adjust the height to fit the plots better
    margin=dict(l=0, r=0, b=0, t=100),  # Adjust margins to make full use of space
    scene=dict(camera=dict(
            eye=dict(x=1, y=-2, z=2),  # Adjust x, y, z to change the camera angle
            center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
            up=dict(x=0, y=0, z=1)  # Ensures that z is up
        ),
        xaxis_title='Ingroup JSD (Sqrt transformed)',
        yaxis_title='Time',
        zaxis_title='Absolute ENS Difference (Sqrt transformed)',
        aspectmode='cube'
    ),
    scene2=dict(camera=dict(
            eye=dict(x=1, y=-2, z=2),  # Adjust x, y, z to change the camera angle
            center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
            up=dict(x=0, y=0, z=1)  # Ensures that z is up
        ),
        xaxis_title='Ingroup JSD (Sqrt transformed)',
        yaxis_title='Time',
        zaxis_title='Absolute ENS Difference (Sqrt transformed)',
        aspectmode='cube'
    )
)

# Show the figure
fig.show()

# Save the figure as PDF
fig.write_image('3D Surface Plots of ENS difference and Ingroup JSD with respect to Time.pdf')








In [808]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming df_jsd_low and df_jsd_high are the dataframes for low and high information respectively

# Defining bins and bin labels for low information
bins_low = np.linspace(0, 0.3, 20)
bin_labels_low = np.round((bins_low[:-1] + bins_low[1:]) / 2, 2)  # Calculate midpoints

# Binning Ingroup_JSD for low information
df_jsd_low['JSD_difference'] = pd.cut(df_jsd_low['JSD_difference'], bins=bins_low, labels=bin_labels_low)
pivot_table_low = df_jsd_low.pivot_table(index='Time', columns='JSD_difference', values='ENS_difference', aggfunc='mean')

# Defining bins and bin labels for high information
bins_high = np.linspace(0, 0.4, 20)
bin_labels_high = np.round((bins_high[:-1] + bins_high[1:]) / 2, 2)  # Calculate midpoints

# Binning JSD_difference for high information
df_jsd_high['JSD_difference'] = pd.cut(df_jsd_high['JSD_difference'], bins=bins_high, labels=bin_labels_high)
pivot_table_high = df_jsd_high.pivot_table(index='Time', columns='JSD_difference', values='ENS_difference', aggfunc='mean')

# Extracting the X, Y, Z coordinates for Plotly for low information
X_low, Y_low = np.meshgrid(pivot_table_low.columns.categories, pivot_table_low.index)
Z_low = pivot_table_low.values

# Extracting the X, Y, Z coordinates for Plotly for high information
X_high, Y_high = np.meshgrid(pivot_table_high.columns.categories, pivot_table_high.index)
Z_high = pivot_table_high.values

# Create subplots: 1 row, 2 columns
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type': 'surface'}, {'type': 'surface'}]],
    horizontal_spacing=0,  # Adjust the spacing as needed
    subplot_titles=(
        'Low Information',
        'High Information'
    )
)

# Add the first surface plot (Low Information) to the first subplot
fig.add_trace(go.Surface(
    z=Z_low,
    x=X_low[0],  # X coordinates
    y=Y_low[:, 0],  # Y coordinates
    colorscale='Viridis',
    showscale=False), row=1, col=1)

# Add the second surface plot (High Information) to the second subplot
fig.add_trace(go.Surface(
    z=Z_high,
    x=X_high[0],  # X coordinates
    y=Y_high[:, 0],  # Y coordinates
    colorscale='Viridis',
    showscale=True,
    surfacecolor=Y_high,  # Use Y axis values (Time) for color scale
    colorbar=dict(title='Time', x=1.05)  # Adjust the colorbar position
), row=1, col=2)

# Update layout with axis titles
fig.update_layout(
    title='3D Surface Plots of ENS difference and JSD Difference over Time',
    autosize=False,
    width=1600,  # Adjust the width to fit the plots better
    height=800,  # Adjust the height to fit the plots better
    margin=dict(l=0, r=0, b=0, t=100),  # Adjust margins to make full use of space
    scene=dict(camera=dict(
            eye=dict(x=1, y=-2, z=2),  # Adjust x, y, z to change the camera angle
            center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
            up=dict(x=0, y=0, z=1)  # Ensures that z is up
        ),
        xaxis_title='JSD Difference (Sqrt transformed)',
        yaxis_title='Time',
        zaxis_title='Absolute ENS Difference (Sqrt transformed)',
        aspectmode='cube'
    ),
    scene2=dict(camera=dict(
            eye=dict(x=1, y=-2, z=2),  # Adjust x, y, z to change the camera angle
            center=dict(x=0, y=0, z=0),  # Keeps the center of the plot at the origin
            up=dict(x=0, y=0, z=1)  # Ensures that z is up
        ),
        xaxis_title='JSD Difference (Sqrt transformed)',
        yaxis_title='Time',
        zaxis_title='Absolute ENS Difference (Sqrt transformed)',
        aspectmode='cube'
    )
)

# Show the figure
fig.show()

# Save the figure as PDF
fig.write_image('3D Surface Plots of ENS difference and JSD Difference with respect to Time.pdf')








In [802]:
stationary_distirbution_list = []
for matrix_pair in matrices_list:
    stationary_distirbution_dict = {}
    for species, matrix in matrix_pair.items():
        stationary_distirbution_dict[species] = calculate_stationary_distribution(matrix)
    stationary_distirbution_list.append(stationary_distirbution_dict)

In [803]:
information_list2 = []
for stationary_distribution_dict in stationary_distirbution_list:
    for distribution in list(stationary_distribution_dict.values()):
        information_list2.append(calculate_information(distribution))

In [804]:
px.histogram(information_list2)

In [630]:
np.median(information_list2)

0.0959617110416785

In [631]:
calculate_information(pi5)

0.2780719051126379