# Introduction

In this document, there's a dataset generation, plotting and analysis of Irene's optobot experiments

# Package imports

In [None]:
import sys
import importlib
from pathlib import Path
import numpy as np
sys.path.insert(0, "../Utilities")

from IPython.display import display, HTML
from scipy import stats
from pingouin import mwu

import pickle
import pandas as pd

# sys.path.append("/home/durrieu/Tracking_Analysis/Utilities")
# sys.path.append("/Users/ulric/git/Tracking_Analysis/Utilities")
import Utils
import Optobot_utils
import Processing

import holoviews as hv
hv.extension('bokeh')

# Import holoviews template
import HoloviewsTemplates

import warnings
warnings.filterwarnings('ignore')

%load_ext rpy2.ipython

# Accessing the experiments

We first developp a function to find the experiments in the main directory. We then use a custom class to load the experiments, including metadata and DLC data.

In [None]:
Exps = Optobot_utils.find_experiments(
    Utils.get_labserver() / "Experimental_data" / "Irene_Optobot"
)

# Uncomment below to display the list of detected experiments
#Exps

# Generating the dataset

In [None]:
data_list = []

for i, exp in enumerate(Exps):
    try:
        # Load the data and add it to a list to be concatenated later
        data = Optobot_utils.Fly(exp).data
        # Add a unique identifier to each DataFrame
        data["id"] = i
        data_list.append(data)
    except Exception as e:
        print(f"error occurred while loading {exp}")
        print(str(e))

# Concatenate the DataFrames
dataset = pd.concat(data_list, ignore_index=True)

dataset

Using pandas methods we can check how many flies we get for each group and genotype

In [None]:
# Count the number of unique "fly" values grouped by "age"

unique_fly_counts = dataset.groupby(["age", "genotype"])["id"].nunique()

unique_fly_counts

> Here there are some genotypes that are the same with slightly different syntax. There's also w1118 flies that we won't include in this study.


## Data cleaning

In [None]:
# Get all unique values of genotype

genotypes = dataset["genotype"].unique()

genotypes
# Replace the genotypes that are the same but spelled differently
# IF_Atg18 and IF-Atg18 are the same genotype
dataset["genotype"] = dataset["genotype"].replace("IF_Atg18", "IF-Atg18")
# IF-THGal4 and IF_THGal4 are the same genotype
dataset["genotype"] = dataset["genotype"].replace("IF_THGal4", "IF-THGal4")
# SYnjRQ and SynjRQ are the same genotype
dataset["genotype"] = dataset["genotype"].replace("SYnjRQ", "SynjRQ")
# SynjRQ and SynjRQ-THGal4 are the same genotype
dataset["genotype"] = dataset["genotype"].replace("SynjRQ-THGal4", "SynjRQ")

# Get all unique values of genotype
genotypes = dataset["genotype"].unique()

genotypes
# Exclude the genotypes that are not of interest, in this case w1118
dataset = dataset[dataset != "w1118"]
# Count the number of unique "fly" values grouped by "age" and "genotype"

unique_fly_counts = dataset.groupby(["age", "genotype"])["id"].nunique()

unique_fly_counts

## Detecting moving bouts and resting

Tracking data is noisy. Looking at it, it looks like anything where velocity is **below 0.1 mm/s correspond to a fly not moving**, which we'll use as threshold for resting. We also see that some small movements like grooming increase velocity a bit so we'll add a second threshold to detect **moving bouts when the fly's velocity rises above 0.5 mm/s**.

In [None]:
# Create a new column called "resting" that is True if the velocity is less than 0.1 and False otherwise

dataset["resting"] = dataset["velocity"] < 0.1

dataset["moving"] = dataset["velocity"] > 0.5

# Statistic analysis and plotting

In the following part we'll plot data as jitter boxplots and run inferential statistics. Because the samples are fairly small and contain some outliers, parametric tests are not the best option. Two different strategies can be used:

1) Including age and genotype together, we can run a **PERMANOVA**, which doesn't make any assumption other than observations being independent. This will show whether there is an effet of genotype, age, and whether there is an interaction between age and genotype. If something shows up as significant (p-value < 0.05), we'll run a **pairwise comparison for permutation tests** with a Bonferroni correction for multiple comparisons.

2) Subsetting age, we can run a **Kruskal-Wallis** test for each age category and if the test returns a p-value < 0.05, run a posthoc pairwise comparison. In this case, we'll use a planned comparisons approach where we'll apply **Mann-whitney tests** with a Bonferroni correction for multiple comparisons. 

## Average velocity

### Generating the dataset

In [None]:
# Subset the data to only get moving = True

moving = dataset[dataset["moving"]]

# Get average velocity grouped by genotype and id
Avg_vel = moving.groupby(["genotype","age", "id"])["velocity"].mean()

# Flatten the multi-index to form a DataFrame
Avg_vel = Avg_vel.reset_index()

Avg_vel

In [None]:
# Ensure that Avg_vel["age"].unique() returns an array
unique_ages = np.array(Avg_vel["age"].unique())

unique_ages

In [None]:
# Find Sample size for each age
sample_size = moving.groupby(["age", "genotype"])["id"].nunique()

sample_size

### Plotting

In [None]:
importlib.reload(HoloviewsTemplates)

In [None]:
# Create a dictionary of plots for each age
plots = {
    age: (
        hv.BoxWhisker(
            Avg_vel[Avg_vel["age"] == age], kdims=["genotype"], vdims="velocity"
        ).opts(**HoloviewsTemplates.hv_irene["boxwhisker"])
        * hv.Scatter(
            Avg_vel[Avg_vel["age"] == age], kdims=["genotype"], vdims="velocity"
        )
        .opts(**HoloviewsTemplates.hv_irene["scatter"])
        .opts(color="genotype")
    )
    .opts(
        **HoloviewsTemplates.hv_irene["plot"],
        ylabel="Average velocity (mm/s)",
    )
    .opts(title=f"Age: {age}")
    for age in unique_ages
}

In [None]:

# Use hv.Layout to display the plots side by side
layout = hv.Layout(plots.values()).cols(2)

layout

In [None]:
# Save this as a html file
hv.save(layout, "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/avg_velocity_plots.html")

# Also save the plots as a png file
#hv.save(layout, "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/avg_velocity_plots.png")

### Statistics

#### Kruskal-Wallis and Mann-Whitney

In [None]:
# Define the pairs of groups to compare
pairs = [
    ("IF-Atg18", "SynjRQ-Atg18"),
    ("IF-Atg18", "SynjRQ"),
    ("IF-THGal4", "SynjRQ-Atg18"),
    ("IF-THGal4", "SynjRQ"),
    ("SynjRQ-Atg18", "SynjRQ"),
]

In [None]:
# Create lists to store the results
kruskal_results = []
posthoc_results = []

# Loop over each unique age
for age in unique_ages:
    # Subset the data for the current age
    data_age = Avg_vel[Avg_vel["age"] == age]

    # Remove the w1118 genotype
    data_age = data_age[data_age["genotype"] != "w1118"]

    # Create a list to store the velocity values for each genotype
    velocity_values = []

    # Loop over each unique genotype
    for genotype in genotypes:
        if genotype == "w1118":
            continue
        # Subset the data for the current genotype
        data_genotype = data_age[data_age["genotype"] == genotype]

        # Append the velocity values to the list
        velocity_values.append(data_genotype["velocity"].values)

    # Perform the Kruskal-Wallis test
    H, pval = stats.kruskal(*velocity_values)

    # Add the results to the Kruskal list
    kruskal_results.append({"Age": age, "H-statistic": H, "P-Value": pval})

    if pval < 0.05:
        # Perform the pairwise Mann-Whitney U tests
        for pair in pairs:
            group1 = data_age[data_age["genotype"] == pair[0]]["velocity"]
            group2 = data_age[data_age["genotype"] == pair[1]]["velocity"]
            posthoc = mwu(group1, group2, alternative="two-sided")

            # Apply the Bonferroni correction
            p_val = posthoc["p-val"].item() * len(pairs)
            p_val = 1 if p_val > 1 else p_val

            # Add the post-hoc test results to the post-hoc list
            posthoc_results.append(
                {
                    "Age": age,
                    "Pair": f"{pair[0]} vs {pair[1]}",
                    "Post-hoc P-Value": p_val,
                }
            )

# Convert the lists to DataFrames
kruskal_df = pd.DataFrame(kruskal_results)
posthoc_df = pd.DataFrame(posthoc_results)


# Define a function for elementwise styles
def color_red_if_less_than_005(val):
    color = "red" if val < 0.05 else "black"
    return f"color: {color}"


# Define a function for columnwise styles
def highlight_p_values(data):
    is_p_value = data.name == "P-Value" or data.name == "Post-hoc P-Value"
    return [color_red_if_less_than_005(val) if is_p_value else "" for val in data]


# Apply the styles
kruskal_df = kruskal_df.style.apply(highlight_p_values)
posthoc_df = posthoc_df.style.apply(highlight_p_values)

display(kruskal_df)
display(posthoc_df)

In [None]:
# convert genotype and age to factors
Avg_vel["genotype"] = pd.Categorical(Avg_vel["genotype"])
Avg_vel["age"] = pd.Categorical(Avg_vel["age"])

In [None]:
%%R -i Avg_vel
# Define a local library path : 
local_lib <- "/home/durrieu/R/x86_64-pc-linux-gnu-library/4.3"


# Import libraries

library(ARTool, lib.loc=local_lib)

# Read the CSV file into an R data.frame
#Avg_vel <- read.csv("/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/Avg_vel.csv")

# Convert genotype and age to factors
Avg_vel$genotype <- as.factor(Avg_vel$genotype)
Avg_vel$age <- as.factor(Avg_vel$age)

# Print Avg_vel

#print(Avg_vel)

# Load the required R packages
library(ARTool)

# Perform the Aligned Rank Transform
art <- art(velocity ~ genotype * age, data = Avg_vel)

# Perform a two-way ANOVA on the aligned ranks
anova_results <- anova(art)

# Print the results
print(anova_results)

### Summary

Average velocity doesn't seem to be very different from one group to the other. Running a dataset-wide analysis shows no difference; Running a age by age analysis reveals a genotype effect at age 9, specifically between IF-Atg18 and SynjRQ-Atg18, as well as SynjRQ-Atg18 vs SynjRQ.



## Max Velocity

### Generating the dataset

In [None]:
# Get the max velocity grouped by genotype and id
Max_vel = moving.groupby(["genotype", "age", "id"])["velocity"].max()

Max_vel = Max_vel.reset_index()

### Plotting

In [None]:
# Create a dictionary of plots for each age
plots = {
    age: (
        hv.BoxWhisker(
            Max_vel[Max_vel["age"] == age], kdims=["genotype"], vdims="velocity"
        ).opts(**HoloviewsTemplates.hv_irene["boxwhisker"])
        * hv.Scatter(
            Max_vel[Max_vel["age"] == age], kdims=["genotype"], vdims="velocity"
        )
        .opts(**HoloviewsTemplates.hv_irene["scatter"])
        .opts(color="genotype")
    )
    .opts(
        **HoloviewsTemplates.hv_irene["plot"],
        ylabel="Maximum velocity (mm/s)",
    )
    .opts(title=f"Age: {age}")
    for age in unique_ages
}

In [None]:
layout = hv.Layout(plots.values()).cols(2)

layout

In [None]:
# Save this as a html file
hv.save(
    layout,
    "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/max_velocity_plots.html",
)

# Also save the plots as a png file
# hv.save(
#     layout,
#     "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/max_velocity_plots.png",
# )

### Statistics

#### Kruskal-wallis and Mann-Whitney tests

In [None]:
# Create lists to store the results
kruskal_results = []
posthoc_results = []

# Loop over each unique age
for age in unique_ages:
    # Subset the data for the current age
    data_age = Max_vel[Max_vel["age"] == age]

    # Remove the w1118 genotype
    data_age = data_age[data_age["genotype"] != "w1118"]

    # Create a list to store the velocity values for each genotype
    velocity_values = []

    # Loop over each unique genotype
    for genotype in genotypes:
        if genotype == "w1118":
            continue
        # Subset the data for the current genotype
        data_genotype = data_age[data_age["genotype"] == genotype]

        # Append the velocity values to the list
        velocity_values.append(data_genotype["velocity"].values)

    # Perform the Kruskal-Wallis test
    H, pval = stats.kruskal(*velocity_values)

    # Add the results to the Kruskal list
    kruskal_results.append({"Age": age, "H-statistic": H, "P-Value": pval})

    if pval < 0.05:
        # Perform the pairwise Mann-Whitney U tests
        for pair in pairs:
            group1 = data_age[data_age["genotype"] == pair[0]]["velocity"]
            group2 = data_age[data_age["genotype"] == pair[1]]["velocity"]
            posthoc = mwu(group1, group2, alternative="two-sided")

            # Apply the Bonferroni correction
            p_val = posthoc["p-val"].item() * len(pairs)
            p_val = 1 if p_val > 1 else p_val

            # Add the post-hoc test results to the post-hoc list
            posthoc_results.append(
                {
                    "Age": age,
                    "Pair": f"{pair[0]} vs {pair[1]}",
                    "Post-hoc P-Value": p_val,
                }
            )

# Convert the lists to DataFrames
kruskal_df = pd.DataFrame(kruskal_results)
posthoc_df = pd.DataFrame(posthoc_results)


# Define a function for elementwise styles
def color_red_if_less_than_005(val):
    color = "red" if val < 0.05 else "black"
    return f"color: {color}"


# Define a function for columnwise styles
def highlight_p_values(data):
    is_p_value = data.name == "P-Value" or data.name == "Post-hoc P-Value"
    return [color_red_if_less_than_005(val) if is_p_value else "" for val in data]


# Apply the styles
kruskal_df = kruskal_df.style.apply(highlight_p_values)
posthoc_df = posthoc_df.style.apply(highlight_p_values)

display(kruskal_df)
display(posthoc_df)

#### PERMANOVA and Adonis pairwise comparisons

In [None]:
%%R -i Max_vel

# Define a local library path : 
local_lib <- "/home/durrieu/R/x86_64-pc-linux-gnu-library/4.3"


# Import libraries

library(ARTool, lib.loc=local_lib)

# Read the CSV file into an R data.frame
#Avg_vel <- read.csv("/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/Avg_vel.csv")

# Convert genotype and age to factors
Max_vel$genotype <- as.factor(Max_vel$genotype)
Max_vel$age <- as.factor(Max_vel$age)

# Print Avg_vel

#print(Avg_vel)

# Load the required R packages
library(ARTool)

# Perform the Aligned Rank Transform
art <- art(velocity ~ genotype * age, data = Max_vel)

# Perform a two-way ANOVA on the aligned ranks
anova_results <- anova(art)

# Print the results
print(anova_results)

In [None]:
%%R -i Max_vel -o results


# Define a local library path : 
local_lib <- "/home/durrieu/R/x86_64-pc-linux-gnu-library/4.3"


# Import libraries

library(pairwiseAdonis, lib.loc=local_lib)

# Define the pairs
pairs <- list(
  c("IF-Atg18", "SynjRQ-Atg18"),
  c("IF-Atg18", "SynjRQ"),
  c("IF-THGal4", "SynjRQ-Atg18"),
  c("IF-THGal4", "SynjRQ"),
  c("SynjRQ-Atg18", "SynjRQ")
)

# Calculate the number of comparisons
num_comparisons <- length(pairs)

# Initialize an empty data frame to store the results
results <- data.frame()

# For each pair, subset the data and perform the test
for (pair in pairs) {
  subset_data <- Max_vel[Max_vel$genotype %in% pair, ]
  pairwise_results <- pairwise.adonis(as.matrix(subset_data$velocity), subset_data$genotype)
  
  # Apply the Bonferroni correction
  pairwise_results$p.adjusted <- pairwise_results$p.value * num_comparisons
  
  # Add the pair and the results to the data frame
  results <- rbind(results, data.frame(pair = paste(pair, collapse = " vs "), p.value = pairwise_results$p.value, p.adjusted = pairwise_results$p.adjusted))
}

In [None]:
#Define a function for columnwise styles
def highlight_p_values(data):
    is_p_value = data.name == "p.adjusted"
    return [color_red_if_less_than_005(val) if is_p_value else '' for val in data]

# Apply the styles
styled_results = results.style.apply(highlight_p_values)

# Display the DataFrame
styled_results

### Summary:

Maximum velocity doesn't seem to be changed by genotype. At age 9 I find one difference: IF-Atg18 vs SynjRQ-Atg18 with the first method and nothing with the second method.

## Time spent moving / Resting

### Dataset generation

In [None]:
# Get the number of rows in the dataset where moving = True, grouped by genotype and id, and divide by the total number of rows grouped by genotype and id

prop_moving = (
    moving.groupby(["genotype", "age", "id"])
    .size()
    .div(dataset.groupby(["genotype", "age", "id"]).size())
    .reset_index(name="prop_time_moving")
)
prop_moving

### Plotting

In [None]:
# Create a dictionary of plots for each age
plots = {
    age: (
        hv.BoxWhisker(
            prop_moving[prop_moving["age"] == age],
            kdims=["genotype"],
            vdims="prop_time_moving",
        ).opts(**HoloviewsTemplates.hv_irene["boxwhisker"])
        * hv.Scatter(
            prop_moving[prop_moving["age"] == age],
            kdims=["genotype"],
            vdims="prop_time_moving",
        )
        .opts(**HoloviewsTemplates.hv_irene["scatter"])
        .opts(color="genotype")
    )
    .opts(
        **HoloviewsTemplates.hv_irene["plot"],
        ylabel="Proportion of time spent moving",
    )
    .opts(title=f"Age: {age}")
    for age in unique_ages
}

In [None]:
layout = hv.Layout(plots.values()).cols(2)

layout

In [None]:
# Save this as a html file
hv.save(
    layout,
    "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/proportion_moving_plots.html",
)

# Also save the plots as a png file
# hv.save(
#     layout,
#     "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/proportion_moving_plots.png",
# )

### Statistics

#### Kruskal-Wallis and Mann-Whitney

In [None]:
# Create lists to store the results
kruskal_results = []
posthoc_results = []

# Loop over each unique age
for age in unique_ages:
    # Subset the data for the current age
    data_age = prop_moving[prop_moving["age"] == age]

    # Remove the w1118 genotype
    data_age = data_age[data_age["genotype"] != "w1118"]

    # Create a list to store the velocity values for each genotype
    velocity_values = []

    # Loop over each unique genotype
    for genotype in genotypes:
        if genotype == "w1118":
            continue
        # Subset the data for the current genotype
        data_genotype = data_age[data_age["genotype"] == genotype]

        # Append the velocity values to the list
        velocity_values.append(data_genotype["prop_time_moving"].values)

    # Perform the Kruskal-Wallis test
    H, pval = stats.kruskal(*velocity_values)

    # Add the results to the Kruskal list
    kruskal_results.append({"Age": age, "H-statistic": H, "P-Value": pval})

    if pval < 0.05:
        # Perform the pairwise Mann-Whitney U tests
        for pair in pairs:
            group1 = data_age[data_age["genotype"] == pair[0]]["prop_time_moving"]
            group2 = data_age[data_age["genotype"] == pair[1]]["prop_time_moving"]
            posthoc = mwu(group1, group2, alternative="two-sided")

            # Apply the Bonferroni correction
            p_val = posthoc["p-val"].item() * len(pairs)
            p_val = 1 if p_val > 1 else p_val

            # Add the post-hoc test results to the post-hoc list
            posthoc_results.append(
                {
                    "Age": age,
                    "Pair": f"{pair[0]} vs {pair[1]}",
                    "Post-hoc P-Value": p_val,
                }
            )

# Convert the lists to DataFrames
kruskal_df = pd.DataFrame(kruskal_results)
posthoc_df = pd.DataFrame(posthoc_results)


# Define a function for elementwise styles
def color_red_if_less_than_005(val):
    color = "red" if val < 0.05 else "black"
    return f"color: {color}"


# Define a function for columnwise styles
def highlight_p_values(data):
    is_p_value = data.name == "P-Value" or data.name == "Post-hoc P-Value"
    return [color_red_if_less_than_005(val) if is_p_value else "" for val in data]


# Apply the styles
kruskal_df = kruskal_df.style.apply(highlight_p_values)
posthoc_df = posthoc_df.style.apply(highlight_p_values)

display(kruskal_df)
display(posthoc_df)

#### PERMANOVA and Adonis pairwise comparisons

In [None]:
%%R -i prop_moving

# Define a local library path :
local_lib <- "/home/durrieu/R/x86_64-pc-linux-gnu-library/4.3"

# Import libraries
library(ARTool, lib.loc=local_lib)

# Convert genotype and age to factors
prop_moving$genotype <- as.factor(prop_moving$genotype)
prop_moving$age <- as.factor(prop_moving$age)

# Perform the Aligned Rank Transform
art <- art(prop_time_moving ~ genotype * age, data = prop_moving)

# Perform a two-way ANOVA on the aligned ranks
anova_results <- anova(art)

# Print the results
print(anova_results)

### Summary

Nothing to see here

## Proportion of time resting

### Dataset generation

In [None]:
# Do the same with resting
resting = dataset[dataset["resting"]]

prop_resting = (
    resting.groupby(["genotype", "age", "id"])
    .size()
    .div(dataset.groupby(["genotype", "age", "id"]).size())
    .reset_index(name="prop_time_resting")
)
prop_resting

### Plotting

In [None]:
# Create a dictionary of plots for each age
plots = {
    age: (
        hv.BoxWhisker(
            prop_resting[prop_moving["age"] == age],
            kdims=["genotype"],
            vdims="prop_time_resting",
        ).opts(**HoloviewsTemplates.hv_irene["boxwhisker"])
        * hv.Scatter(
            prop_resting[prop_moving["age"] == age],
            kdims=["genotype"],
            vdims="prop_time_resting",
        )
        .opts(**HoloviewsTemplates.hv_irene["scatter"])
        .opts(color="genotype")
    )
    .opts(
        **HoloviewsTemplates.hv_irene["plot"],
        ylabel="Proportion of time spent resting",
    )
    .opts(title=f"Age: {age}")
    for age in unique_ages
}

layout = hv.Layout(plots.values()).cols(2)

layout

In [None]:

# Save this as a html file
hv.save(
    layout,
    "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/proportion_resting_plots.html",
)

# Also save the plots as a png file

# hv.save(
#     layout,
#     "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/proportion_resting_plots.png",
# )

### Statistics

#### Kruskal-Wallis test and Mann-whitney pairwise tests

In [None]:
# Create lists to store the results
kruskal_results = []
posthoc_results = []

# Loop over each unique age
for age in unique_ages:
    # Subset the data for the current age
    data_age = prop_resting[prop_resting["age"] == age]

    # Remove the w1118 genotype
    data_age = data_age[data_age["genotype"] != "w1118"]

    # Create a list to store the velocity values for each genotype
    velocity_values = []

    # Loop over each unique genotype
    for genotype in genotypes:
        if genotype == "w1118":
            continue
        # Subset the data for the current genotype
        data_genotype = data_age[data_age["genotype"] == genotype]

        # Append the velocity values to the list
        velocity_values.append(data_genotype["prop_time_resting"].values)

    # Perform the Kruskal-Wallis test
    H, pval = stats.kruskal(*velocity_values)

    # Add the results to the Kruskal list
    kruskal_results.append({"Age": age, "H-statistic": H, "P-Value": pval})

    if pval < 0.05:
        # Perform the pairwise Mann-Whitney U tests
        for pair in pairs:
            group1 = data_age[data_age["genotype"] == pair[0]]["prop_time_resting"]
            group2 = data_age[data_age["genotype"] == pair[1]]["prop_time_resting"]
            posthoc = mwu(group1, group2, alternative="two-sided")

            # Apply the Bonferroni correction
            p_val = posthoc["p-val"].item() * len(pairs)
            p_val = 1 if p_val > 1 else p_val

            # Add the post-hoc test results to the post-hoc list
            posthoc_results.append(
                {
                    "Age": age,
                    "Pair": f"{pair[0]} vs {pair[1]}",
                    "Post-hoc P-Value": p_val,
                }
            )

# Convert the lists to DataFrames
kruskal_df = pd.DataFrame(kruskal_results)
posthoc_df = pd.DataFrame(posthoc_results)


# Define a function for elementwise styles
def color_red_if_less_than_005(val):
    color = "red" if val < 0.05 else "black"
    return f"color: {color}"


# Define a function for columnwise styles
def highlight_p_values(data):
    is_p_value = data.name == "P-Value" or data.name == "Post-hoc P-Value"
    return [color_red_if_less_than_005(val) if is_p_value else "" for val in data]


# Apply the styles
kruskal_df = kruskal_df.style.apply(highlight_p_values)
posthoc_df = posthoc_df.style.apply(highlight_p_values)

display(kruskal_df)
display(posthoc_df)

#### PERMANOVA and Adonis pairwise comparisons

In [None]:
%%R -i prop_resting

# Define a local library path :
local_lib <- "/home/durrieu/R/x86_64-pc-linux-gnu-library/4.3"

# Import libraries
library(ARTool, lib.loc=local_lib)

# Convert genotype and age to factors
prop_resting$genotype <- as.factor(prop_resting$genotype)
prop_resting$age <- as.factor(prop_resting$age)

# Perform the Aligned Rank Transform
art <- art(prop_time_resting ~ genotype * age, data = prop_resting)

# Perform a two-way ANOVA on the aligned ranks
anova_results <- anova(art)

# Print the results
print(anova_results)

### Summary

Nothing to see here either

## Distance travelled

### Dataset generation

In [None]:
# Get the max distance traveled grouped by genotype, age and id
Max_dist = moving.groupby(["genotype", "age", "id"])["cumulated_distance"].max().reset_index()

In [None]:
Max_dist

In [None]:
# Create a dictionary of plots for each age
plots = {
    age: (
        hv.BoxWhisker(
            Max_dist[prop_moving["age"] == age],
            kdims=["genotype"],
            vdims="cumulated_distance",
        ).opts(**HoloviewsTemplates.hv_irene["boxwhisker"])
        * hv.Scatter(
            Max_dist[prop_moving["age"] == age],
            kdims=["genotype"],
            vdims="cumulated_distance",
        )
        .opts(**HoloviewsTemplates.hv_irene["scatter"])
        .opts(color="genotype")
    )
    .opts(
        **HoloviewsTemplates.hv_irene["plot"],
        ylabel="Distance traveled (mm)",
    )
    .opts(title=f"Age: {age}")
    for age in unique_ages
}

layout = hv.Layout(plots.values()).cols(2)

layout

In [None]:


# Save this as a html file
hv.save(
    layout,
    "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/distance_traveled_plots.html",
)

# Also save the plots as a png file

# hv.save(
#     layout,
#     "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/Plots/distance_traveled_plots.png",
# )

### Statistics

#### Kruskal-Wallis test and Mann-Whitney tests

In [None]:
# Create lists to store the results
kruskal_results = []
posthoc_results = []

# Loop over each unique age
for age in unique_ages:
    # Subset the data for the current age
    data_age = Max_dist[Max_dist["age"] == age]

    # Remove the w1118 genotype
    data_age = data_age[data_age["genotype"] != "w1118"]

    # Create a list to store the velocity values for each genotype
    velocity_values = []

    # Loop over each unique genotype
    for genotype in genotypes:
        if genotype == "w1118":
            continue
        # Subset the data for the current genotype
        data_genotype = data_age[data_age["genotype"] == genotype]

        # Append the velocity values to the list
        velocity_values.append(data_genotype["cumulated_distance"].values)

    # Perform the Kruskal-Wallis test
    H, pval = stats.kruskal(*velocity_values)

    # Add the results to the Kruskal list
    kruskal_results.append({"Age": age, "H-statistic": H, "P-Value": pval})

    if pval < 0.05:
        # Perform the pairwise Mann-Whitney U tests
        for pair in pairs:
            group1 = data_age[data_age["genotype"] == pair[0]]["cumulated_distance"]
            group2 = data_age[data_age["genotype"] == pair[1]]["cumulated_distance"]
            posthoc = mwu(group1, group2, alternative="two-sided")

            # Apply the Bonferroni correction
            p_val = posthoc["p-val"].item() * len(pairs)
            p_val = 1 if p_val > 1 else p_val

            # Add the post-hoc test results to the post-hoc list
            posthoc_results.append(
                {
                    "Age": age,
                    "Pair": f"{pair[0]} vs {pair[1]}",
                    "Post-hoc P-Value": p_val,
                }
            )

# Convert the lists to DataFrames
kruskal_df = pd.DataFrame(kruskal_results)
posthoc_df = pd.DataFrame(posthoc_results)


# Define a function for elementwise styles
def color_red_if_less_than_005(val):
    color = "red" if val < 0.05 else "black"
    return f"color: {color}"


# Define a function for columnwise styles
def highlight_p_values(data):
    is_p_value = data.name == "P-Value" or data.name == "Post-hoc P-Value"
    return [color_red_if_less_than_005(val) if is_p_value else "" for val in data]


# Apply the styles
kruskal_df = kruskal_df.style.apply(highlight_p_values)
posthoc_df = posthoc_df.style.apply(highlight_p_values)

display(kruskal_df)
display(posthoc_df)

#### PERMANOVA and Adonis pairwise test

In [None]:
%%R -i Max_dist

# Define a local library path :
local_lib <- "/home/durrieu/R/x86_64-pc-linux-gnu-library/4.3"

# Import libraries
library(ARTool, lib.loc=local_lib)

# Convert genotype and age to factors
Max_dist$genotype <- as.factor(Max_dist$genotype)
Max_dist$age <- as.factor(Max_dist$age)

# Perform the Aligned Rank Transform
art <- art(cumulated_distance ~ genotype * age, data = Max_dist)

# Perform a two-way ANOVA on the aligned ranks
anova_results <- anova(art)

# Print the results
print(anova_results)

### Summary

Again, an effect of genotype is found with Kruskal-Wallis test at 9 days. IF-THGal4 vs SynjRQ-Atg18	 is different, but there's a trend for IF-Atg18 vs SynjRQ-Atg18 and SynjRQ-Atg18 vs SynjRQ	

## Additionnal data: cumulated distance over time

An interesting metric could be to look at how much flies move during the video, looking at chronology.

In [None]:
# Let's try to plot the cumulated distance traveled over time as curves, averaged over genotype and grouped by age and id

# Get the cumulated distance traveled grouped by genotype, age and id
Cum_dist = moving.groupby(["time", "genotype", "age"])["cumulated_distance"].mean().reset_index()

Cum_dist

In [None]:
# Apply the filter to each group of data
for age in Cum_dist["age"].unique():
    for genotype in Cum_dist["genotype"].unique():
        # Filter data for the current age and genotype
        data = Cum_dist[(Cum_dist["age"] == age) & (Cum_dist["genotype"] == genotype)]

        # Apply the filter
        filtered_data = Processing.butter_lowpass_filter(
            data["cumulated_distance"], cutoff=0.01, order=1
        )

        # Replace the original data with the filtered data
        Cum_dist.loc[data.index, "cumulated_distance"] = filtered_data

In [None]:
# Create an empty list to store plots
plots = []

# Create separate plots for each age
for age in Cum_dist["age"].unique():
    # Filter data for the current age
    data = Cum_dist[Cum_dist["age"] == age]

    # Create an empty overlay for this age
    age_overlay = hv.NdOverlay({})

    # Create separate curves for each genotype
    for genotype in data["genotype"].unique():
        # Filter data for the current genotype
        genotype_data = data[data["genotype"] == genotype]

        # Create curve and add to the age overlay
        curve = hv.Curve(genotype_data, kdims=["time"], vdims=["cumulated_distance"])
        age_overlay[genotype] = curve
        age_overlay[genotype].opts(title = f"Age: {age}", width = 500, height = 500, active_tools = [])

    # Add to the list of plots
    plots.append(age_overlay)

# Create a layout of plots
layout = hv.Layout(plots).cols(2)

# Display the layout
layout

Nothing really pops up in this representation.