### Non-uniform temporal scaling of developmental processes in mammalian cortex
#### Paolino et al. 2023

This code corresponds to the compositional statistical tests used in Paolino et al. 2023 submitted for review to Nature Communications. Other statistical tests are not described here, as they used external Python and R scripts detailed in the manuscript (see Methods) and reported in the Supplementary Statistics Table 1. 

In [10]:
# Import relevant packages
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math 
import pandas as pd
import pingouin as pg
import scipy.stats as stats
from scipy.stats import zscore
from sklearn.utils import resample
from skbio.stats.composition import clr, ilr
from statsmodels.multivariate.manova import MANOVA
from statsmodels.formula.api import ols

In [12]:
pd.__version__

'1.5.2'

In [None]:
# Import data from the master CSV file.
sheet = 'name_of_relevant_excel_tab'
comp_df = pd.read_excel('master_file.xlsx', sheet_name=sheet)
comp_df.head()

In [None]:
# To avoid errors working with 0 percentage/proportion values, a small value is added to all zeros and evenly subtracted from the non-zero values
def adjust_values(series):
    # Define the small value to add
    small_value = 0.0001
    # Find the zero values
    zero_values = series == 0
    # Add the small value to the zero values
    series[zero_values] += small_value
    # Calculate the total added amount
    total_added = small_value * zero_values.sum()
    # Check if there are any non-zero values
    if (~zero_values).sum() > 0:
        # Subtract the total added amount evenly from the non-zero values
        series[~zero_values] -= total_added / (~zero_values).sum()
    return series

# Apply the function to each group of your DataFrame
# Assuming 'group_column' is the name of the column you want to group by
comp_df['prop_cells_reg'] = comp_df.groupby(['Species','Cortex'])['Prop_Ctx'].transform(adjust_values)
comp_df.head()

In [None]:
# Isometric log ratio transforms calculated from the proportions.
ilr_dfs = {'Species':[],'Cortex': [],'ILR_p1':[],'ILR_p2':[],'ILR_p3':[],'ILR_p4':[]}

for name, data in comp_df.groupby(['Species','Cortex']):
    ilr_dfs['Species'].append(name[0])
    ilr_dfs['Cortex'].append(name[1])
    prop_by_ctxandlayer = data['prop_cells_reg'].values
    print(name, np.sum(prop_by_ctxandlayer))
    ilr_prop_by_ctxandlayer = ilr(prop_by_ctxandlayer)
    # m - 1 the number of parts in the composition, could be more or less depending on specific comparison being made.
    ilr_dfs['ILR_p1'].append(ilr_prop_by_ctxandlayer[0])
    ilr_dfs['ILR_p2'].append(ilr_prop_by_ctxandlayer[1])
    ilr_dfs['ILR_p3'].append(ilr_prop_by_ctxandlayer[2])
    ilr_dfs['ILR_p4'].append(ilr_prop_by_ctxandlayer[3])

# Convert to a pandas dataframe
ilr_dfs = pd.DataFrame(ilr_dfs)
ilr_dfs