In [None]:
import os 
import sys
import pandas as pd
import numpy as np

In [None]:
sys.path.append('/nas/groups/iber/Users/Federico_Carrara/Statistics_Collection/EpiStats/src/statistics_collection/')
import StatsAnalytics as sa

In [None]:
paths = [
    '../outputs/outputs_v2/output_bladder_control_curated_segmentation_s_10_e_6_d_8/cell_stats/stats_dataset_bladder.csv',
    '../outputs/outputs_v2/output_esophagus_Z2_curated_crop_s_10_e_6_d_8/cell_stats/stats_dataset_esophagus.csv',
    '../outputs/outputs_v2/output_intestine_sample2_b_curated_segmentation_relabel_seq_s_10_e_6_d_8/cell_stats/stats_dataset_intestine_villus.csv',
    '../outputs/outputs_v2/output_lung_new_sample_b_curated_segmentation_central_crop_relabel_seq_s_10_e_6_d_8/cell_stats/stats_dataset_lung_bronchiole.csv',
    '../outputs/outputs_v2/output_lung_pseudostratified_from_harold_s_10_e_6_d_8/cell_stats/stats_dataset_lung.csv'
]
cell_stats_df = sa.prepare_df(paths)
cell_stats_df

In [None]:
sa.detect_outliers(
    cell_stats_df,
    inplace=True,
    quantile_level=0.01
)

In [None]:
cell_stats_df[['tissue', 'volume', 'is_outlier']]

In [None]:
numerical_cell_stats_df = sa.extract_numerical(cell_stats_df)
numerical_cell_stats_df

In [None]:
std_cell_stats_df = sa.standardize(numerical_cell_stats_df)
std_cell_stats_df

In [None]:
numerical_features = [
    'area', 'volume',
    'isoperimetric_ratio', 'num_neighbors', 
    'elongation'
]
_ , aaa, _ = sa.apply_PCA(std_cell_stats_df, numerical_features, 4, False)
aaa[0]

## Test Aboav-Weaire law

In [None]:
tissues = ['bladder']*3 + ['esophagus']*2
ids = [1,2,3,1,2]
exclude_cells = [False, True, False, False, False]
slices = [[0,1,2], [0,1,2], [1,2], [0,1,2], [0,1]]
neighs = [[[2], [2,3], [3]], [[1], [1], [3]], [[1], [1,2]], [[2], [2], []], [[1], [1]]]
num_neighs = [[len(n) for n in neigh] for neigh in neighs]

test_df = pd.DataFrame({
    'cell_ID': ids,  
    'tissue': tissues,
    'exclude_cell': exclude_cells,
    'slices': slices,
    'num_neighbors_2D': num_neighs,
    'neighbors_2D': neighs,
})
print(test_df)
sa._get_aboav_law_2D_stats(test_df, 1, show_logs=True)

## Test exclude neighbors

In [None]:
tissues = ['bladder']*3 + ['esophagus']*2
ids = [1,2,3,1,2]
exclude_cells = [False, False, False, False, False]
outliers = [False, False, True, False, False]
slices = [[0,1,2], [0,1,2], [1,2], [0,1,2], [0,1]]
neighs = [[[2], [2,3], [3]], [[1], [1], [3]], [[1], [1,2]], [[2], [2], []], [[1], [1]]]
num_neighs = [[len(n) for n in neigh] for neigh in neighs]
neighbors_3D = [[2,3], [1,3], [1,2], [2], [1]]
num_neighbors_3D = [2, 2, 2, 1, 1]

test_df = pd.DataFrame({
    'cell_ID': ids,  
    'tissue': tissues,
    'exclude_cell': exclude_cells,
    'slices': slices,
    'num_neighbors_2D': num_neighs,
    'neighbors_2D': neighs,
    'num_neighbors': num_neighbors_3D,
    'neighbors': neighbors_3D,
    'is_outlier': outliers
})
print(test_df)

out_test_df = sa._exclude_outliers(test_df)
out_test_df

## Test confidence intervals on fitted parameters

In [None]:
from scipy import stats

np.random.seed(1234)
x = np.random.randint(1, 10, 10)
y = np.random.randint(1, 10, 10)
n = len(x)
fits = [np.polyfit(x, y, degree, cov=True) for degree in [1,2]]
coeff_sets = [fit[0] for fit in fits]
std_err_sets = [np.sqrt(np.diag(fit[1])) for fit in fits] 
print(coeff_sets)
print(std_err_sets)
print(stats.t(df=n-2).ppf((0.025, 0.975)))
conf_ints = [
    [
        (coeff - stats.t(df=n-len(coeff_set)).ppf(0.975)*std_err,
            coeff,
            coeff + stats.t(df=n-len(coeff_set)).ppf(0.975)*std_err)
            for coeff, std_err in zip(coeff_set, std_errs)
    ]
    for coeff_set, std_errs in zip(coeff_sets, std_err_sets)
]
confint_width_sets = [
    [
        stats.t(df=n-len(coeff_set)).ppf(0.975)*std_err
        for coeff, std_err in zip(coeff_set, std_errs)
    ]
    for coeff_set, std_errs in zip(coeff_sets, std_err_sets)
]
[round(ci_width, 2) for ci_width in confint_width_sets[1]]

## Check Outliers

In [None]:
bladder_df = cell_stats_df[cell_stats_df['tissue'] == 'bladder']
out_idxs = np.where(bladder_df['is_outlier'].values)[0]

In [None]:
import napari
from skimage.io import imread

img = imread('../outputs/outputs_v2/output_bladder_control_curated_segmentation_s_10_e_6_d_8/processed_labels.tif')

viewer = napari.Viewer()
viewer.add_labels(img)

for idx in out_idxs:
    cell = img[img == idx]
    viewer.add_labels(cell, name=f'cell_{idx}')

## Exclude cells tousching background in lung sample

In [None]:
import napari
from skimage.io import imread
from scipy import ndimage
from LabelPreprocessing import get_labels_touching_background
import json
from tqdm import tqdm

In [None]:
lung_img = imread('../outputs/outputs_v2/output_lung_pseudostratified_from_harold_s_10_e_6_d_8/processed_labels.tif')

viewer = napari.Viewer()
viewer.add_labels(lung_img)

In [None]:
labels, counts = get_labels_touching_background(lung_img, 1, './notebooks/')

In [None]:
all_labels = np.unique(lung_img)
print(len(labels))
print(len(all_labels))

In [None]:
# Crop image
crop_lung_img = lung_img[:, 150:360, :]
viewer = napari.Viewer()
viewer.add_labels(crop_lung_img)

In [None]:
labels_crop, counts_crop = get_labels_touching_background(crop_lung_img, 1, None, 0.1, None)

In [None]:
labels_crop

In [None]:
# Remove labels touching background
clean_lung_img = lung_img.copy()
for label in tqdm(labels_crop):
    clean_lung_img[lung_img == label] = 0

In [None]:
viewer = napari.Viewer()
viewer.add_labels(lung_img)
viewer.add_labels(clean_lung_img)

In [None]:
slice_lung_img = crop_lung_img[:, :, 1]
viewer.add_labels(slice_lung_img)

# for z in range(crop_lung_img.shape[2]):


In [None]:
threshold = 0.05

# Find the unique labels in the labeled image
unique_labels = np.unique(slice_lung_img)

# Pad the input labeled image with a layer of background pixels (label 0)
padded_labeled_img = np.pad(slice_lung_img, pad_width=10, mode='constant', constant_values=0)

# Initialize lists to store labels touching background and counts of background touching voxels
labels_touching_background = []
background_touch_counts = {} 

# Iterate through the unique labels, excluding the background label (0)
for label in tqdm(unique_labels[1:], desc="Checking labels touching background: "):
    # Create a binary image for the current label
    binary_img = padded_labeled_img == label

    # Dilate the binary image by one voxel to find the border of the label
    dilated_binary_img = ndimage.binary_dilation(binary_img)

    # Find the border by XOR operation between the dilated and original binary images
    border_binary_img = dilated_binary_img ^ binary_img

    # Count the number of background pixels (label 0) touching the border
    border_labeled_img = padded_labeled_img[border_binary_img]
    background_touch_count = np.sum(border_labeled_img == 0) / len(border_labeled_img)
    background_touch_counts[label] = background_touch_count

    # Check if the background touch count is greater than the threshold
    if background_touch_count > threshold:
        # Add label to the list of labels touching background
        labels_touching_background.append(label)

# Convert labels list into numpy array
labels_touching_background = np.asarray(labels_touching_background, dtype=np.uint16)

In [None]:
labels_touching = set()
for z in tqdm(range(crop_lung_img.shape[2])):
    slice_lung_img = crop_lung_img[:, :, z]
    slice_labels_touching, _ = get_labels_touching_background(slice_lung_img, None, 0.05)
    labels_touching.update(list(slice_labels_touching))

In [None]:
labels_touching, counts = get_labels_touching_background(crop_lung_img, 2, None, 0.05)

In [None]:
# Remove labels touching background
clean_lung_img = lung_img.copy()
for label in tqdm(labels_touching):
    clean_lung_img[lung_img == label] = 0

In [None]:
viewer = napari.Viewer()
viewer.add_labels(lung_img)
viewer.add_labels(clean_lung_img)

In [None]:
clean_lung_img = lung_img.copy()
for label in labels:
    clean_lung_img[lung_img == label] == 0

viewer.add_labels(clean_lung_img)