In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import os
import seaborn as sns
import napari
from skimage.io import imread
from scipy import stats
from copy import deepcopy

In [None]:
os.getcwd()

In [None]:
# Load the data
#separately load the datasets of the different samples

# # BLADDER SAMPLES
# path_to_data = ""
# # path_to_data = r"N:\Users\Federico_Carrara\Cell_Meshes\for_cell_statistics\bladder_samples"

# bladder_MBC19_bottom_df = pd.read_csv(
#     'output_MBC19_S5_St1_Crop_GFP_clean_bottom_s_10_e_6_d_8/cell_stats/stats_dataset_bladder.csv',
#     index_col=False
# )

# bladder_MBC19_top_df = pd.read_csv(
#     'output_MBC19_S5_St1_Crop_GFP_clean_top_s_10_e_6_d_8/cell_stats/stats_dataset_bladder.csv',
#     index_col=False
# )

# bladder_MBC20_bottom_df = pd.read_csv(
#     'output_MBC20_S1e_0a_St2_GFP_clean_bottom_s_10_e_6_d_8/cell_stats/stats_dataset_bladder.csv',
#     index_col=False
# )

# bladder_MBC20_top_df = pd.read_csv(
#     'output_MBC20_S1e_0a_St2_GFP_clean_top_s_10_e_6_d_8/cell_stats/stats_dataset_bladder.csv',
#     index_col=False
# )

bladder_df = pd.read_csv(
    './output_bladder_control_curated_segmentation_s_10_e_6_d_8/cell_stats/bladder_control_curated_segmentation.csv',
    index_col=0
)


# LUNG SAMPLE
# path_to_data = "/nas/groups/iber/Users/Federico_Carrara/Cell_Meshes/for_cell_statistics/lung_samples_s_10_e_2_d_4/"
# path_to_data = r"N:\Users\Federico_Carrara\Cell_Meshes\for_cell_statistics\lung_samples_s_10_e_2_d_4"

lung_df = pd.read_csv(
    './output_lung_new_sample_b_curated_segmentation_central_crop_relabel_seq_s_10_e_6_d_8/cell_stats/stats_dataset_lung_bronchiole.csv',
    index_col=0
)


# INTESTINE SAMPLE
# path_to_data = "/nas/groups/iber/Users/Federico_Carrara/Cell_Meshes/for_cell_statistics/intestine_samples_s_10_e_2_d_4/"
# path_to_data = r"N:\Users\Federico_Carrara\Cell_Meshes\for_cell_statistics\intestine_samples_s_10_e_2_d_4"

intestine_df = pd.read_csv(
    './output_intestine_sample2_b_curated_segmentation_relabel_seq_s_10_e_6_d_8/cell_stats/stats_dataset_intestine_villus.csv',
    index_col=0
)

In [None]:
# # Merge the datasets adding a marker indicating the sample
# #create a list indicating for each cell which sample it belongs to
# which_sample = (["bladder_MBC19"] * bladder_MBC19_bottom_df.shape[0] 
#     + ["bladder_MBC19"] * bladder_MBC19_top_df.shape[0]
#     + ["bladder_MBC20"] * bladder_MBC20_bottom_df.shape[0] 
#     + ["bladder_MBC20"] * bladder_MBC20_top_df.shape[0]
#     + ["lung"] * lung_df.shape[0]
#     + ["intestine"] * intestine_df.shape[0]
# )
# #create a list indicating for each cell which tissue it belongs to
# which_tissue = [np.where("bladder" in name, "bladder", name)[()] for name in which_sample]

#concatenate the dataframe and add the columns
cell_stats_df = pd.concat(
    list([
        # bladder_MBC19_bottom_df, 
        # bladder_MBC19_top_df, 
        # bladder_MBC20_bottom_df,
        # bladder_MBC20_top_df,
        bladder_df,
        lung_df,
        intestine_df
    ]),
    axis=0,
    ignore_index=True
)
# cell_stats_df["tissue_id"] = which_tissue
# cell_stats_df["sample_id"] = which_sample
cell_stats_df

In [None]:
# Rename 'cell_area' column to 'cell_surface_area'
cell_stats_df["surface_area"] = cell_stats_df["area"]
cell_stats_df.drop(columns=["area"], inplace=True)
cell_stats_df

In [None]:
# # Delete cells with 0 neighbors
# cell_stats_df = cell_stats_df.drop(index=cell_stats_df[cell_stats_df.neighbors == 0].index)
# cell_stats_df.shape

In [None]:
# Delete cells whose volume is lower than the 5% quantile
tissues = cell_stats_df['tissue'].unique()
for tissue in tissues:
    tissue_df = cell_stats_df[cell_stats_df['tissue'] == tissue]
    lower_threshold = np.quantile(tissue_df["volume"][~np.isnan(cell_stats_df['volume'])], 0.05)
    cell_stats_df = cell_stats_df.drop(index=cell_stats_df[(cell_stats_df['tissue'] == tissue) * (cell_stats_df["volume"] < lower_threshold)].index)
cell_stats_df.shape

In [None]:
# Number of samples and number of samples for each tissue
print(np.unique(cell_stats_df["tissue"], return_counts=True))
print(len(cell_stats_df))

In [None]:
#make a new df with only numerical attributes
numerical_cell_stats_df = deepcopy(cell_stats_df)
numerical_cell_stats_df = cell_stats_df.drop(columns=[
    'file_name', 'mesh_dir', 'exclude_cell',
    'neighbors', 'principal_axes', 'contact_area_fraction', 'contact_area',
    'contact_area_distribution', 'mean_contact_area', 'total_contact_area',
])
print(numerical_cell_stats_df.columns, numerical_cell_stats_df.shape)

# remove nans and Nones
numerical_cell_stats_df = numerical_cell_stats_df.dropna()
print(numerical_cell_stats_df.shape)

numeric_features = [
    'volume', 'num_neighbors', 'elongation',
    'isoperimetric_ratio', 'surface_area'
]

#standardize the values
scaler = StandardScaler()
scaled_values = scaler.fit_transform(numerical_cell_stats_df[numeric_features].values)
scaled_cell_stats_df = numerical_cell_stats_df.copy()
scaled_cell_stats_df[numeric_features] = scaled_values
scaled_cell_stats_df

In [None]:
#  Data exploration: feature correlation
corr = scaled_cell_stats_df[numeric_features].corr()
ax = sns.heatmap(
    corr, 
    vmin=-1, vmax=1, center=0,
    cmap='viridis',
    square=True,
    cbar_kws={"shrink": .5}
)

features = [col.replace("cell_", "").replace("_", " ").title() for col in scaled_cell_stats_df[numeric_features].columns]

ax.set_xticklabels(
    features,
    rotation=45,
    horizontalalignment='right',
    fontsize=12)

ax.set_title("Correlation Matrix", fontsize=24)
ax.set_yticklabels(features, fontsize=12)

In [None]:
# Compute principal components
#perform PCA
pca = PCA(n_components=2)
pca_out = pca.fit(scaled_cell_stats_df[numeric_features])

In [None]:
#analyze PCA components to see their composition
plt.rcParams['figure.figsize'] = [5, 9]
fig = plt.Figure()

features = [col.replace("cell_", "").replace("_", " ").title() for col in scaled_cell_stats_df[numeric_features].columns]

ax1 = plt.subplot(211)
sns.barplot(
    x=features,
    y=pca_out.components_[0],
    ax=ax1
)
ax1.set_title("PC1 loadings", fontsize=22)
ax1.tick_params(axis='x', which='both', bottom=False, labelbottom=False)

ax2 = plt.subplot(212)
sns.barplot(
    x=features,
    y=pca_out.components_[1],
    ax=ax2
)
ax2.set_title("PC2 loadings", fontsize=22)
ax2.tick_params(axis='x', labelrotation=90, labelsize=20)

In [None]:
pca_out.explained_variance_ratio_

In [None]:
# Scatter plot of the principal components
pc_data = pca_out.transform(scaled_cell_stats_df[numeric_features])
tissue_to_float = {
    'bladder': 0,
    'lung_bronchiole': 0.5,
    'intestine_villus': 1   
}
tissue_ids = [tissue_to_float[tissue] for tissue in scaled_cell_stats_df.tissue]

fig, ax = plt.subplots(figsize=(20, 8))

scatter = ax.scatter(
    x=pc_data[:, 0],
    y=pc_data[:, 1],
    c=tissue_ids,
    cmap='viridis'
)

ax.set_xlabel(f"PC1, explained variance = {round(pca_out.explained_variance_ratio_[0], 3)}", fontsize=18)
ax.set_ylabel(f"PC2, explained variance = {round(pca_out.explained_variance_ratio_[1], 3)}", fontsize=18)

ax.set_title("Principal Components Scatterplot", fontsize=28)

ax.legend(handles=scatter.legend_elements()[0], 
          labels=list([f"Tissue: {name}" for name in tissue_to_float.keys()]),
          loc="upper right",
          fontsize=22
          )

plt.show()

### Plots of the single features

In [None]:
# uom = '\u00B5m' + '\u00B2'
# plot_cell_stats_distrib("cell_surface_area", uom, "../Cell_Meshes/for_cell_statistics/data_analysis/plots/")

# uom = '\u00B5m' + '\u00B3'
# plot_cell_stats_distrib("cell_volume", uom, "../Cell_Meshes/for_cell_statistics/data_analysis/plots/")

# uom = None
# plot_cell_stats_distrib("cell_isoperimetric_ratio", uom, "../Cell_Meshes/for_cell_statistics/data_analysis/plots/")

# uom = None
# plot_cell_stats_distrib("cell_nb_of_neighbors", uom, "../Cell_Meshes/for_cell_statistics/data_analysis/plots/")

# uom = None
# plot_cell_stats_distrib("cell_elongation", uom, "../Cell_Meshes/for_cell_statistics/data_analysis/plots/")

In [None]:
def grid_plot_cell_stats_distrib(df, unit_of_measures=None, save_dir=None, show=False):

    #store non-numerical columns, then drop them from the df
    tissue_ids = df['tissue'].values
    numeric_df = deepcopy(df)
    numeric_df = numeric_df.drop(columns=['cell_ID', 'tissue'])

    # Define 3 colors from the viridis palette
    colors = sns.color_palette('viridis', 3)

    # Create a separate figure for each tissue type in the data
    tissues = np.unique(tissue_ids)

    # Define dictionary that links tissue to cell_type
    tissue_to_type = dict(zip(tissues, ["Transitional", "Simple Cuboidal", "Simple Columnar"]))
    # tissue_to_type = dict(zip(tissues, ["Simple Cuboidal", "Simple Columnar"]))

    # Big figure to accomodate all the plots
    # fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(24, 12))
    # plt.suptitle("Morphological cell statistics comparison", fontsize=24)

    fig = plt.figure(figsize=(24, 12))
    # fig.tight_layout(pad=3.0)
    subplot_id = 1

    for i, tissue in enumerate(tissues):
        for j, column in enumerate(numeric_df.columns):

            # Find the max on the x and y-axes to have the same axes length
            max_x = max(numeric_df[column])
            max_x = max_x + 0.1*max_x

            # Get the unit of measure
            unit_of_measure = unit_of_measures[j]

            # Get the current axis object
            ax = fig.add_subplot(len(tissues), len(numeric_df.columns), subplot_id)
            subplot_id += 1

            # Subset the data for the current tissue
            data = numeric_df[tissue_ids == tissue]

            # Map kernel density plot onto the axes, using shading and color
            sns.kdeplot(data=data, x=column, fill=True, color=colors[i], ax=ax, clip=(0.0, max_x))

            # Map rugplot to the axes, using height to adjust the size of the ticks
            sns.rugplot(data=data, x=column, height=0.125, color=colors[i], ax=ax)

            # Set title and axes labels
            if j == 0:
                ax.set_title(f'{tissue.title()}: {tissue_to_type[tissue]}', fontsize=20)

            if unit_of_measure:
                xlab = column.replace("_", " ").title() + f" ({unit_of_measure})"
            else:
                xlab = column.replace("_", " ").title()
            ax.set_xlabel(xlab, fontsize=20)
            ax.set_ylabel('Density', fontsize=16)

            # Remove y-axis ticks and set x and y-axis limits for the current plot
            ax.set_yticks([])
            ax.set_xlim([0, max_x])
            # ax.set_ylim([0, max_y])

            # Remove the square around the plot
            sns.despine(left=False, bottom=False, top=True, right=True)

            # Remove x-axis from the first 2 plots
            if i < len(tissues)-1:
                ax.set_xticks([])
                ax.set_xlabel("")
                ax.spines['bottom'].set_visible(False)

    fig.suptitle("Morphological cell statistics comparison", fontsize=24)

    # Save the current plot
    if save_dir:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_name = f"grid_kdeplot.jpg"
        plt.savefig(os.path.join(save_dir, save_name), bbox_inches='tight', dpi=150) 

    # Show the plot
    if show:
        plt.show()
    else:
        plt.close()


In [None]:
ordered_columns = ['cell_ID', 'tissue', 'surface_area', 'volume', 'isoperimetric_ratio', 'elongation']
grid_plot_df = numerical_cell_stats_df[ordered_columns].copy()

grid_plot_cell_stats_distrib(grid_plot_df, 
                             ['\u00B5m' + '\u00B2', '\u00B5m' + '\u00B3', None, None], 
                             save_dir="../images",
                             show=True)

In [None]:
# Discrete plot of number of neighbors
def barplot_num_neighbors(df, save_dir=None, show=True):
    
    column = "num_neighbors"

    # Define 3 colors from the viridis palette
    colors = sns.color_palette('viridis', 3)

    # Create a separate figure for each tissue type in the data
    tissues = df['tissue'].unique()

    # Define dictionary that links tissue to cell_type
    tissue_to_type = dict(zip(tissues, ["Transitional", "Simple Cuboidal", "Simple Columnar"]))
    # tissue_to_type = dict(zip(tissues, ["Simple Cuboidal", "Simple Columnar"]))

    # Find the max on the x and y-axes to have the same axes length
    # max_x = max(df[column])
    # max_x = max_x + 0.1*max_x
    # max_x = 25

    fig = plt.figure(figsize=(20, 5))
    subplot_id = 1
    for i, tissue in enumerate(tissues):
        # Get the current axis object
        ax = fig.add_subplot(1, len(tissues), subplot_id)
        subplot_id += 1

        # Subset the data for the current tissue
        data = df[df['tissue'] == tissue][column]

        # Count the frequency of each unique value
        unique_values, counts = np.unique(data, return_counts=True)

        # Create a bar plot using Seaborn
        sns.barplot(x=unique_values, y=counts, color=colors[i])

        # Set title and axes labels
        ax.set_title(f'{tissue.title()}: {tissue_to_type[tissue]}', fontsize=20)

        xlab = column.replace("_", " ").title()
        ax.set_xlabel(xlab, fontsize=20)
        ax.set_ylabel('Counts', fontsize=16)

        # Remove y-axis ticks and set x and y-axis limits for the current plot
        # ax.set_yticks([])
        # ax.set_xlim([0, max_x])
        # ax.set_ylim([0, max_y])

        # Remove the square around the plot
        sns.despine(left=False, bottom=False, top=True, right=True)

    # fig.suptitle("Nb of neighbors comparison", fontsize=24)

    # Save the current plot
    if save_dir:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_name = f"num_neighbors_barplot.jpg"
        plt.savefig(os.path.join(save_dir, save_name), bbox_inches='tight', dpi=150) 

    # Show the plot
    if show:
        plt.show()
    else:
        plt.close()
    

In [None]:
barplot_num_neighbors(numerical_cell_stats_df, save_dir="../master_thesis_docs/IMGS/for_group_meeting/", show=True)

## Lewis' Law plots

Lewis' law states that the average apical area of cells having $n$ neighbors $\bar{A}_n$ is linearly related to the number of neighbors $n$ itself. $\newline$
In particular we have the following relation:
$$ \frac{\bar{A}_n}{\bar{A}} = \frac{n - 2}{4} $$
In 3D we could have a similar relation for volumes, namely:
$$ \frac{\bar{V}_n}{\bar{V}} \sim n $$


In [None]:
def lewis_law_plots(
        df, 
        save_dir,
        show=True,
        feature='volume',
        fit_degrees=[1,2]
) -> None:
    
    # Create a separate figure for each tissue type in the data
    tissues = df['tissue'].unique()

    # Define 3 colors from the viridis palette
    colors = sns.color_palette('viridis', len(tissues))

    # Define dictionary that links tissue to cell_type
    tissue_to_type = dict(zip(tissues, ["Transitional", "Simple Cuboidal", "Simple Columnar"]))
    # tissue_to_type = dict(zip(tissues, ["Simple Cuboidal", "Simple Columnar"]))

    fig = plt.figure(figsize=(18, 6))
    fig.suptitle(f"Lewis' Law for {feature.replace('_', ' ')}", fontsize=30)
    subplot_id = 1
    for i, tissue in enumerate(tissues):
        # Get the current axis object
        ax = fig.add_subplot(1, len(tissues), subplot_id)
        subplot_id += 1

        # Subset the data for the current tissue
        tissue_df = df[df['tissue'] == tissue]

        # Compute global volume average
        global_avg = tissue_df[feature].mean()

        # Compute average volume for each value of n
        num_neighbors_values = np.asarray(tissue_df['num_neighbors'].unique())
        local_avgs, local_sds = {}, {}
        for n in num_neighbors_values:
            #subset the df
            data = tissue_df[tissue_df['num_neighbors'] == n][feature]
            # compute the average for this n
            local_avgs[n] = data.mean()/global_avg
            local_sds[n] = (data/global_avg).std()/np.sqrt(len(data))
        # Sort dict by key
        local_avgs = dict(sorted(local_avgs.items()))
        local_sds = dict(sorted(local_sds.items()))
        std_devs = list(local_sds.values())

        # Compute fitted lines
        x = np.asarray(list(local_avgs.keys()), dtype=np.int64)
        y = list(local_avgs.values())
        coeff_sets = [np.polyfit(x, y, degree) for degree in fit_degrees]
        polylines = [np.poly1d(coeff_set) for coeff_set in coeff_sets] 
        x_fit = np.linspace(min(x), max(x), max(x)-min(x)+1, dtype=np.int32) 
        y_linear, y_quadratic = (polyline(x_fit) for polyline in polylines)

        # Plot the values and the fitted lines
        # scatter = ax.scatter(x, y, c=colors[i])
        ax.errorbar(x, y, yerr=std_devs, fmt='o', color=colors[i], ecolor='grey', capsize=4)
        linear, = ax.plot(x_fit, y_linear, color='red', linestyle='--', label='Linear fit')   
        quadratic, = ax.plot(x_fit, y_quadratic, color='green', linestyle='-.', label='Quadratic fit')

        # Set title and axes labels
        ax.set_title(f'{tissue.title()}: {tissue_to_type[tissue]}', fontsize=20)
        ax.set_xlabel(r'Number of neighbors $(n)$', fontsize=20)
        if feature == 'volume':
            ax.set_ylabel(r'$\bar{V}_n / \bar{V}$', fontsize=20)
        elif feature == 'surface_area':
            ax.set_ylabel(r'$\bar{A}_n / \bar{A}$', fontsize=20)
        ax.set_xticks(x_fit)
        ax.legend(handles=[linear, quadratic], loc='lower right')

        # Remove the square around the plot
        sns.despine(left=False, bottom=False, top=True, right=True)
    
    plt.subplots_adjust(top=0.8)

    # Save the current plot
    if save_dir:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_name = f"lewis_law_{feature}_plots.jpg"
        plt.savefig(os.path.join(save_dir, save_name), bbox_inches='tight', dpi=150) 

    # Show the plot
    if show:
        plt.show()
    else:
        plt.close()


In [None]:
lewis_law_plots(
    numerical_cell_stats_df,
    './images',
    feature='volume'
)

## Plot of number of neighbors vs normalized volume

In [None]:
def volume_neighbors_scatter_plot(
    df,     
    save_dir,
    show=True,
) -> None:
    
    # Create a separate figure for each tissue type in the data
    tissues = df['tissue'].unique()

    tissue_to_float = dict(zip(tissues, np.linspace(0, 1, len(tissues))))
    tissue_ids = [tissue_to_float[tissue] for tissue in df['tissue']]

    # Define dictionary that links tissue to cell_type
    tissue_to_type = dict(zip(tissues, ["Transitional", "Simple Cuboidal", "Simple Columnar"]))
    # tissue_to_type = dict(zip(tissues, ["Simple Cuboidal", "Simple Columnar"]))

    normalized_volume = df['volume'].values / df['volume'].mean()

    fig, ax = plt.subplots(figsize=(20, 8))

    scatter = ax.scatter(
        x=df['num_neighbors'],
        y=normalized_volume,
        c=tissue_ids,
        cmap='viridis'
    )

    ax.set_xlabel(r'Number of neighbors $(n)$', fontsize=20)
    ax.set_ylabel(r'$V / \bar{V}$', fontsize=20)
    ax.set_title("Volume vs. Number of Neighbors Scatterplot", fontsize=28)
    # ax.legend(handles=scatter.legend_elements()[0], 
    #     labels=list([f"{name.title()}: {tissue_to_type[name]}" for name in tissues]),
    #     loc="upper right",
    #     fontsize=22
    # )
    
    # Save the current plot
    if save_dir:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_name = f"volume_vs_neighbors_plot.jpg"
        plt.savefig(os.path.join(save_dir, save_name), bbox_inches='tight', dpi=150) 

    # Show the plot
    if show:
        plt.show()
    else:
        plt.close()

In [None]:
volume_neighbors_scatter_plot(
    numerical_cell_stats_df,
    './images/'
)

In [None]:
def volume_neighbors_heatmaps(
    df,     
    save_dir,
    show=True,
) -> None:
    
    # Create a separate figure for each tissue type in the data
    tissues = df['tissue'].unique()

    tissue_to_float = dict(zip(tissues, np.linspace(0, 1, len(tissues))))
    tissue_ids = [tissue_to_float[tissue] for tissue in df['tissue']]

    # Define dictionary that links tissue to cell_type
    tissue_to_type = dict(zip(tissues, ["Transitional", "Simple Cuboidal", "Simple Columnar"]))
    # tissue_to_type = dict(zip(tissues, ["Simple Cuboidal", "Simple Columnar"]))

    fig = plt.figure(figsize=(18, 6))
    fig.suptitle(f"Volume vs. Number of Neighbors Heatmaps", fontsize=30)
    subplot_id = 1
    for i, tissue in enumerate(tissues):
        # Get the current axis object
        ax = fig.add_subplot(1, len(tissues), subplot_id)
        subplot_id += 1

        # Subset the data for the current tissue
        tissue_df = df[df['tissue'] == tissue]

        # Compute global volume average
        global_avg = tissue_df['volume'].mean()

        # Compute average volume for each value of n
        num_neighbors_values = np.asarray(tissue_df['num_neighbors'].unique())
        volume_dict = {}
        for n in num_neighbors_values:
            normalized_volume = tissue_df[tissue_df['num_neighbors'] == n]['volume']/global_avg
            volume_dict[n] = normalized_volume.values
        # Sort dict by key
        volume_dict = dict(sorted(volume_dict.items()))
        sorted_num_neighbors = list(volume_dict.keys())

        # Create matrix for heatmap
        volume_matrix = []
        for _, volumes in volume_dict.items():
            volume_matrix.append(sorted(volumes))

        # Pad the matrix to same length (put 0 in front of each list)
        max_len = max([len(v) for v in volume_matrix])
        for i, volumes in enumerate(volume_matrix):
            if len(volumes) < max_len:
                volume_matrix[i] = np.concatenate((np.asarray([0]*(max_len-len(volumes))), volumes))
        volume_matrix = np.asarray(volume_matrix)

        # Plot the heatmap
        ax = sns.heatmap(
            volume_matrix.T, 
            cmap='viridis',
            cbar_kws={"shrink": .5}
        )

        ax.set_xlabel(r'Number of neighbors $(n)$', fontsize=18)
        ax.set_xticklabels(sorted_num_neighbors)
        ax.set_yticklabels('')
        ax.yaxis.set_ticks([])
        ax.set_title(f'{tissue.title()}: {tissue_to_type[tissue]}', fontsize=18)

    plt.subplots_adjust(top=0.85)

    # Save the current plot
    if save_dir:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_name = f"volume_neighbors_heatmap.jpg"
        plt.savefig(os.path.join(save_dir, save_name), bbox_inches='tight', dpi=150) 

    # Show the plot
    if show:
        plt.show()
    else:
        plt.close()

In [None]:
volume_neighbors_heatmaps(numerical_cell_stats_df, './images/')

## ANOVA/MANOVA tests between the 3 groups

In [None]:
def one_way_anova(df, column):
    bladder_vals = df.loc[df["tissue_id"].values == "bladder", column]
    lung_vals = df.loc[df["tissue_id"].values == "lung", column]
    intestine_vals = df.loc[df["tissue_id"].values == "intestine", column]
    print(f"Bladder: mean={bladder_vals.mean()}, Std={bladder_vals.std()}")
    print(f"Lung: mean={lung_vals.mean()}, Std={lung_vals.std()}")
    print(f"Intestine: mean={intestine_vals.mean()}, Std={intestine_vals.std()}")
    f_val, p_val = stats.f_oneway(bladder_vals, lung_vals, intestine_vals)
    return f_val, p_val 

In [None]:
fv, pv = one_way_anova(cell_stats_df, "cell_surface_area")
print(f"Surface Area -> F-value: {fv}, p-value: {pv}")
print("-------------------------------------------------------------")

fv, pv = one_way_anova(cell_stats_df, "cell_volume")
print(f"Volume -> F-value: {fv}, p-value: {pv}")
print("-------------------------------------------------------------")

fv, pv = one_way_anova(cell_stats_df, "cell_isoperimetric_ratio")
print(f"Isoperimetric Ratio -> F-value: {fv}, p-value: {pv}")
print("-------------------------------------------------------------")

fv, pv = one_way_anova(cell_stats_df, "cell_elongation")
print(f"Elongation -> F-value: {fv}, p-value: {pv}")
print("-------------------------------------------------------------")

fv, pv = one_way_anova(cell_stats_df, "cell_nb_of_neighbors")
print(f"# neighbors -> F-value: {fv}, p-value: {pv}")
print("-------------------------------------------------------------")

## Inspect cells with abnormal number of neighbors

In [None]:
viewer = napari.Viewer()
# Open the image in napari and investigate
path_to_img_dir = 'output_lung_new_sample_b_curated_segmentation_central_crop_relabel_seq_s_10_e_6_d_8'
img = imread(os.path.join(path_to_img_dir, 'processed_labels.tif'))
viewer.add_labels(img, name='lung_labels')

In [None]:
# Intestine
filtered_df = bladder_df[bladder_df['num_neighbors'] == 0]
cell_ids = filtered_df['cell_ID'].values
print(cell_ids)

for cell_id in cell_ids[:10]:
    single_cell = img.copy()
    single_cell[single_cell != cell_id] = 0
    viewer.add_labels(single_cell, name=f'cell_{cell_id}')

In [None]:
cell_ids = lung_df[lung_df['exclude_cell'] == True]['cell_ID'].values

for cell_id in cell_ids:
    single_cell = img.copy()
    single_cell[single_cell != cell_id] = 0
    viewer.add_labels(single_cell, name=f'cell_{cell_id}')