## Stats for OS number

In [1]:
import numpy as np
import pandas as pd

import sys
sys.path.append("../../../../../../01_python_definitions")
from fluorescent_microscopy_analysis import asterics

In [2]:
path = "../../measurements/cpfl/"
measurements = pd.read_csv(path + "08-features-processed.csv")
measurements

Unnamed: 0,image_id,label,age,genotype,maximum,mean,median,minimum,sigma,sum,...,equivalent_spherical_radius_rescaled,feret_diameter_rescaled,perimeter_2d_rescaled,major_axis_length_2d_rescaled,minor_axis_length_2d_rescaled,surface_area_rescaled,bbox_volume_rescaled,convex_volume_rescaled,volume_rescaled,summed_volume
0,0,99,8,cpfl,435.323232,231.815654,215.544626,136.808081,67.478992,50966.181818,...,0.969158,2.798536,6.42257,2.481628,1.372441,19.03757,17.155822,8.109028,5.67084,563.030645
1,1,93,8,cpfl,460.709677,224.380055,206.129242,126.634409,70.985109,56809.064516,...,1.051165,3.16222,7.325543,2.841319,1.473626,24.355526,21.933224,10.160933,6.959598,648.556847
2,2,62,8,cpfl,510.370968,252.925858,232.679688,141.645161,81.942129,46226.919355,...,0.922842,2.613556,5.670415,2.152691,1.295106,16.577307,13.719543,6.730957,4.8232,300.083068
3,3,290,14,cpfl,608.186207,293.062064,266.616379,157.948276,99.985899,94890.186207,...,1.104997,3.527489,8.747193,3.531725,1.585332,27.055333,25.996121,12.82161,8.65755,3659.193719
4,4,216,14,cpfl,648.180556,298.264152,268.251157,154.018519,107.243126,104264.393519,...,1.138501,3.563166,9.041931,3.52464,1.744031,27.524135,29.736536,13.43422,9.092604,3710.954257
5,5,135,14,cpfl,546.748148,272.073887,250.41428,153.17037,86.006519,84179.6,...,1.076958,3.493253,8.9495,3.56804,1.559667,27.394193,29.199424,13.371972,8.43555,2371.852523
6,6,145,20,cpfl,487.565517,243.963166,224.548761,130.993103,77.417826,106942.248276,...,1.243204,4.012502,10.161918,3.974969,1.837445,36.029479,39.587794,18.379267,11.898974,2115.03803
7,7,164,20,cpfl,442.237805,228.048594,211.738281,126.243902,69.436005,77319.286585,...,1.174419,3.703005,8.76723,3.694451,1.541159,28.744134,31.292334,13.436362,9.488528,1804.643293
8,8,155,20,cpfl,359.148387,191.621903,179.647253,108.348387,52.7696,78788.929032,...,1.185435,3.809545,9.842299,3.811786,1.790636,33.458424,35.488406,17.097196,10.85019,2869.171547
9,9,33,245,cpfl,865.575758,368.635598,323.865057,173.484848,155.070912,157599.121212,...,1.143449,3.313591,9.257627,3.302101,1.937421,27.561774,26.845265,14.834386,10.540409,953.964241


### Test for significance

- it is assumed that the variance of the samples are not equal
- genotype is compared withing each age
- independent t test is used

In [5]:
ages = [8, 14, 20, 30, 70, 245]
test_results = []

for i, postnatal_age in enumerate(ages):
    
    age_filtered = measurements[measurements["age"] == postnatal_age]
    
    number_of_labels_cpfl = age_filtered[age_filtered["genotype"] == "cpfl"]["label"]
    number_of_labels_wt = age_filtered[age_filtered["genotype"] == "wt"]["label"]
    
    t_statistic, p_value = ttest_ind(number_of_labels_wt,
                                     number_of_labels_cpfl,
                                     equal_var=False,
                                     alternative="greater")
    
    test_results.append({"age": postnatal_age, "p_value": p_value})
    
test_results = pd.DataFrame(test_results)
test_results

NameError: name 'ttest_ind' is not defined

In [10]:
significant_results = test_results[test_results["p_value"] < 0.05]
significant_results

Unnamed: 0,age,p_value
4,70,0.047701
5,245,0.014898


In [11]:
for postnatal_age in ages:
    
    print(postnatal_age)

8
14
20
30
70
245


In [12]:
def significant_t_tests(dataframe, ages, y, x="age", wt="wt", test="cpfl", hue="genotype"):
    """
    Perform significant t-tests between two groups at different ages.

    Parameters:
    -----------
    dataframe : pandas DataFrame
        The DataFrame containing the data.
    ages : list-like
        A list or array-like object containing the different ages at which tests are conducted.
    y : str
        The column name in the DataFrame representing the measurement of interest.
    x : str, optional
        The column name in the DataFrame representing the age variable (default is 'age').
    wt : str, optional
        The label representing the wild-type group in the 'hue' column (default is 'wt').
    test : str, optional
        The label representing the test sample group in the 'hue' column (default is 'cpfl').
    hue : str, optional
        The column name in the DataFrame representing the categorical variable distinguishing groups (default is 'genotype').

    Returns:
    --------
    pandas DataFrame
        A DataFrame containing the significant t-test results, with columns 'age' and 'p_value'.

    Notes:
    ------
    - This function performs independent t-tests between the 'wt' and 'test' groups for each specified age.
    - It filters the results based on the significance level of the t-tests (p < 0.05).
    """
    
    # Import necessary packages
    import pandas as pd
    from scipy.stats import ttest_ind
    
    # Define empty array to store test results
    test_results = []
    
    # Test different ages with independent t test
    for postnatal_age in ages:
        
        # Filter a specific age
        age_filtered = measurements[measurements[x] == postnatal_age]
    
        # Filter the measurement of interest in wt and test sample
        measurement_test_y = age_filtered[age_filtered[hue] == test][y]
        measurement_wt_y = age_filtered[age_filtered[hue] == wt][y]
        
        # Perform the t test of the 2 groups
        t_statistic, p_value = ttest_ind(measurement_wt_y,
                                         measurement_test_y,
                                         equal_var=False,
                                         alternative="greater")
        
        # Append the test result into the empty dataframe, two columns age and p_value
        test_results.append({"age": postnatal_age, "p_value": p_value})
    
    # Turn into pandas dataframe and filter significant result
    test_results = pd.DataFrame(test_results)
    test_results = test_results[test_results["p_value"] < 0.05]
    
    return test_results

In [13]:
t_test_results = significant_t_tests(dataframe=measurements, x="age", y="label", hue="genotype", wt="wt", test="cpfl", ages=ages)
t_test_results

Unnamed: 0,age,p_value
4,70,0.047701
5,245,0.014898


In [16]:
for index, row in t_test_results.iterrows():
    
    for i, age in enumerate(ages):
        
        if age == row["age"]:
            x_coordinate = i
            
            print(x_coordinate)
    

4
5


In [17]:
def x_coordinate_barplot(row, ages):
    """
    Determine the x-coordinate for drawing an asterisk on a barplot based on the provided row and ages array.

    Parameters:
    - row (dict): A dictionary representing a row from a t_test_table containing at least the "age" key.
    - ages (list): A list of ages used for comparison.

    Returns:
    - int: The x-coordinate where the asterisk should be drawn on the barplot.
    """
    
    # Iterate through the ages to find the matching age in the row
    for x_coordinate, age in enumerate(ages):
        
        # If the age in the row matches the current age in the iteration, return the x-coordinate
        if age == row["age"]:
            return x_coordinate

In [18]:
for index, row in t_test_results.iterrows():
    
    x_coordinate = x_coordinate_barplot(row=row, ages=ages)
    
    print(x_coordinate)

4
5


In [7]:
for index, row in t_test_results.iterrows():
    
    significance = asterics(row, column_name="p_value")
    print(significance)

*
*


Some code I do not wish to lose... :)

In [None]:
# Draw mean value, based on boxplot
# boxplot = sns.boxplot(data=measurements,
#                      x="age",
#                      y="label",
#                      showmeans=True,
#                      meanline=True,
#                      meanprops={"color": "#6d6d6d", "ls": "-", "lw": 2.5},
#                      medianprops={'visible': False},
#                      whiskerprops={'visible': False},
#                      showfliers=False,
#                      showbox=False,
#                      showcaps=False,
#                      hue="genotype",
#                      hue_order=["wt", "cpfl"])