In [None]:
import matplotlib.pyplot as plt
import cv2


import scipy

from pathlib import Path

import numpy as np

import h5py
import math


import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import holoviews as hv

import platform

import sys
sys.path.insert(0, "..")
sys.path.insert(0, "../../..")


from pathlib import Path

import cv2

import json

from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.palettes import Spectral11
from bokeh.io import output_notebook
import iqplot
import bokeh.io
bokeh.io.output_notebook()


from Utilities.Utils import *
from Utilities.Processing import *

# Get a list of the directories containing the tracking data

In [None]:
# Get the DataFolder

if platform.system() == "Darwin":
    DataPath = Path("/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos")
# Linux Datapath
if platform.system() == "Linux":
    DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos")

print(DataPath)

Make a list of the folders I want to use. For instance, I want to use the folders that have the "tnt" in the name as I will explore velocities for different crossings with UAS-TNT. I'm also only getting flies tested in the afternoon.

## Vision + Starvation state effect

In [None]:
Folders = []
for folder in DataPath.iterdir():
    minfolder = str(folder).lower()
    if "feedingstate" in minfolder and "tracked" in minfolder and "pm" in minfolder:
        Folders.append(folder)

Folders

## Importing the tracking data and generating the dataset

In this part, we import the Metatadata .json file and the tracking data .h5 file. Then we compute smoothed fly y positions and generate time column.

In [None]:
Dataset = pd.DataFrame()

# Loop over all the .analysis.h5 files in the folder and store the ball y positions and the arena and corridor numbers as metadata
Flynum = 0
# Loop over all the folders that don't have "Dark" in the name
for folder in Folders:
    print(f"Adding experiment {folder} to the dataset...")
    # Read the metadata.json file
    with open(folder / "Metadata.json", "r") as f:
        metadata = json.load(f)
        variables = metadata["Variable"]
        metadata_dict = {}
        for var in variables:
            metadata_dict[var] = {}
            for arena in range(1, 10):
                arena_key = f"Arena{arena}"
                var_index = variables.index(var)
                metadata_dict[var][arena_key] = metadata[arena_key][var_index]
        
        #print (metadata_dict)
        
    for file in folder.glob("**/*flytrack*.analysis.h5"):
        
        flypath = file
        with h5py.File(flypath.as_posix(), "r") as f:
            dset_names = list(f.keys())
            fly_locs = f["tracks"][:].T
            node_names = [n.decode() for n in f["node_names"][:]]
            
    for file in folder.glob("**/*.analysis.h5"):
        #print(file)
        with h5py.File(file, "r") as f:
            dset_names = list(f.keys())
            locations = f["tracks"][:].T
            node_names = [n.decode() for n in f["node_names"][:]]

        locations.shape
        
        if "Flipped" in folder.name:
            yball : np.ndarray = np.flip(locations[:, :, 1, :])

        else:
            yball : np.ndarray = locations[:, :, 1, :]   
             
        yfly = fly_locs[:, :, 1, :]
        
        # Get the filename from the path
        foldername = folder.name

        # Get the arena and corridor numbers from the parent (corridor) and grandparent (arena) folder names
        arena = file.parent.parent.name
        corridor = file.parent.name
        
        # Get the metadata for this arena
        arena_key = arena.capitalize()
        arena_metadata = {var: pd.Categorical([metadata_dict[var][arena_key]]) for var in metadata_dict}
        
        Flynum += 1
        
        # Load the start and end coordinates from coordinates.npy
        start, end = np.load(file.parent / 'coordinates.npy')
        
        # Store the ball y positions, start and end coordinates, and the arena and corridor numbers as metadata
        data = {"Fly": pd.Categorical(["Fly" + str(Flynum)]),
                #"yfly": [list(yfly[:, 0, 0])], 
                "yball": [list(yball[:, 0, 0])],
                "experiment": pd.Categorical([foldername]),
                "arena": pd.Categorical([arena]), 
                "corridor": pd.Categorical([corridor]),
                "start": pd.Categorical([start]),
                "end": pd.Categorical([end])}
        data.update(arena_metadata)

        # Use pandas.concat instead of DataFrame.append
        Dataset = pd.concat([Dataset, pd.DataFrame(data)], ignore_index=True) 

# Explode yfly column to have one row per timepoint

#Dataset.drop(columns=["Genotye", "Date",], inplace=True)

# Dataset = Dataset.explode('yfly')
# Dataset['yfly'] = Dataset['yfly'].astype(float)

Dataset = Dataset.explode('yball')
Dataset['yball'] = Dataset['yball'].astype(float)

# Filter parameters
cutoff = 0.0015  # desired cutoff frequency of the filter, Hz ,      slightly higher than actual 1.2 Hz
order = 1  # sin wave can be approx represented as quadratic


In [None]:

#Dataset['yfly_smooth'] = butter_lowpass_filter(Dataset['yfly_relative'], cutoff, order)
# print('Filtering yfly relative to start...')
# Dataset['yfly_SG'] = savgol_lowpass_filter(Dataset['yfly'], 221, 1)

# Compute yball_relative relative to start
Dataset['yball_relative'] = abs(Dataset['yball'] - Dataset['start'])

# Fill missing values using linear interpolation
Dataset['yball_relative'] = Dataset['yball_relative'].interpolate(method='linear')

Dataset['yball_relative_SG'] = savgol_lowpass_filter(Dataset['yball_relative'], 221, 1)

print('Defining frame and time columns...')
Dataset["Frame"] = Dataset.groupby("Fly").cumcount()

Dataset["time"] = Dataset["Frame"] / 30

# Remove the original yfly column

print('Removing Frame column...')
Dataset.drop(columns=["Frame",], inplace=True)

print('Resetting index...')
Dataset.reset_index(drop=True, inplace=True)

Dataset.head()

In [None]:
GroupedData = Dataset.groupby(["time", "Light", "FeedingState"]).mean(numeric_only=True).reset_index()

GroupedData.head()

In [None]:
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')

# Define a function to return alpha value based on Light condition
def alpha_val(light_condition):
    return 0.5 if light_condition == 'off' else 1.0

# Apply the function to the dataset to create a new column 'alpha'
GroupedData['alpha'] = GroupedData['Light'].apply(alpha_val)

color_dict = {"fed": "blue", "starved": "orange", "starved_noWater": "green"}

# Create an empty overlay
overlay = hv.NdOverlay({})

# Create separate overlays for each FeedingState
for feeding_state, color in color_dict.items():
    # Filter data for the current FeedingState
    data = GroupedData[GroupedData['FeedingState'] == feeding_state]
    
    # Create curves and overlay them
    curves = hv.Curve(data, kdims=['time'], vdims=['yball_relative_SG', 'Light', 'alpha'])
    curves = curves.groupby('Light').overlay().opts(opts.Curve(color=color, alpha='alpha'))
    
    # Add to the overall overlay
    overlay[feeding_state] = curves

overlay = overlay.opts( 
        height=750,
        width=1000,
        #alpha=1,
        #line_width=2,
        xlabel="Time(s)",
        ylabel="Average relative distance pushed (px)",
        show_grid=True,
        fontscale=2,
        title="",)
# Display the plot
overlay


In [None]:
import holoviews as hv
from holoviews import opts

# Define a function to map FeedingState to color
def feedingstate_to_color(feedingstate):
    color_dict = {"fed": "blue", "starved": "orange", "starved_noWater": "green"}
    return color_dict.get(feedingstate, 0)

# Define a function to map Light condition to alpha
def light_to_alpha(light):
    return 0.5 if light == 'off' else 1.0  # replace with your actual logic of mapping light to alpha

# Apply the mappings to FeedingState and Light condition
GroupedData['FeedingStateColor'] = GroupedData['FeedingState'].apply(feedingstate_to_color)
#print(GroupedData['FeedingStateColor'])
GroupedData['LightAlpha'] = GroupedData['Light'].apply(light_to_alpha)

# Define a function to map FeedingState to new labels
def feedingstate_to_label(feedingstate):
    label_dict = {"fed": "fed", "starved": "starved with water", "starved_noWater": "starved without water"}
    return label_dict.get(feedingstate, feedingstate)

# Apply the mapping to FeedingState
GroupedData['FeedingStateLabel'] = GroupedData['FeedingState'].apply(feedingstate_to_label)

# Calculate the sample size
sample_size = Dataset.groupby(['Light','FeedingState'])['Fly'].nunique()

# Define a function to map FeedingState, Light, and sample size to new labels
def feedingstate_light_to_label(row):
    label = row['FeedingStateLabel']
    light = row['Light']
    size = sample_size.loc[light, row['FeedingState']]
    return f"{label}, {light} (n={size})"

# Apply the mapping to FeedingState, Light, and sample size
GroupedData['Feeding state, Light'] = GroupedData.apply(feedingstate_light_to_label, axis=1)

# Create the curves and apply the options
curves = hv.Curve(GroupedData, kdims=['time'], vdims=['yball_relative_SG','FeedingStateLabel', 'Light', 'FeedingStateColor', 'LightAlpha', 'alpha', 'Feeding state, Light']).groupby(['Feeding state, Light']).overlay()
curves.opts(opts.Curve(color='FeedingStateColor', alpha='LightAlpha', 
        height=750,
        width=1000,
        line_width=1,
        xlabel="Time(s)",
        ylabel="Average relative distance pushed (px)",
        show_grid=True,
        fontscale=1.5,
        title="",
        ))
curves.opts(legend_position='bottom_right')

curves




In [None]:
# Save the plot as a png
hv.save(
    curves,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/FyssenReport/feeding_and_light.png",
    fmt="png",
)

In [None]:
# Save the plot as a html
hv.save(
    curves,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/FyssenReport/feeding_and_light.html",
    fmt="html",
)

In [None]:
# Attempt to save the datasets 

DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Datasets")

GroupedData.to_feather(DataPath / "230928_GroupedDataFeedingStateLightMean.feather")

Dataset.to_feather(DataPath / "230928_DatasetFeedingStateLight.feather")

## Genotype data

In [None]:
%reset -f

In [None]:
# Get the DataFolder

if platform.system() == "Darwin":
    DataPath = Path("/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos")
# Linux Datapath
if platform.system() == "Linux":
    DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Videos")

print(DataPath)

In [None]:
Folders = []
for folder in DataPath.iterdir():
    minfolder = str(folder).lower()
    if "tnt" in minfolder and "tracked" in minfolder and "pm" in minfolder:
        Folders.append(folder)

Folders

## Importing the tracking data and generating the dataset

In this part, we import the Metatadata .json file and the tracking data .h5 file. Then we compute smoothed fly y positions and generate time column.

In [None]:
Dataset = pd.DataFrame()

# Loop over all the .analysis.h5 files in the folder and store the ball y positions and the arena and corridor numbers as metadata
Flynum = 0
# Loop over all the folders that don't have "Dark" in the name
for folder in Folders:
    #print(f"Adding experiment {folder} to the dataset...")
    # Read the metadata.json file
    with open(folder / "Metadata.json", "r") as f:
        metadata = json.load(f)
        variables = metadata["Variable"]
        metadata_dict = {}
        for var in variables:
            metadata_dict[var] = {}
            for arena in range(1, 10):
                arena_key = f"Arena{arena}"
                var_index = variables.index(var)
                metadata_dict[var][arena_key] = metadata[arena_key][var_index]
        
        print (metadata_dict)
        
    for file in folder.glob("**/*flytrack*.analysis.h5"):
        
        flypath = file
        with h5py.File(flypath.as_posix(), "r") as f:
            dset_names = list(f.keys())
            fly_locs = f["tracks"][:].T
            node_names = [n.decode() for n in f["node_names"][:]]
            
    for file in folder.glob("**/*.analysis.h5"):
        #print(file)
        with h5py.File(file, "r") as f:
            dset_names = list(f.keys())
            locations = f["tracks"][:].T
            node_names = [n.decode() for n in f["node_names"][:]]

        locations.shape
        
        if "Flipped" in folder.name:
            yball : np.ndarray = np.flip(locations[:, :, 1, :])

        else:
            yball : np.ndarray = locations[:, :, 1, :]   
             
        yfly = fly_locs[:, :, 1, :]
        
        # Get the filename from the path
        foldername = folder.name

        # Get the arena and corridor numbers from the parent (corridor) and grandparent (arena) folder names
        arena = file.parent.parent.name
        corridor = file.parent.name
        
        # Get the metadata for this arena
        arena_key = arena.capitalize()
        arena_metadata = {var: pd.Categorical([metadata_dict[var][arena_key]]) for var in metadata_dict}
        
        Flynum += 1
        
        # Load the start and end coordinates from coordinates.npy
        start, end = np.load(file.parent / 'coordinates.npy')
        
        # Store the ball y positions, start and end coordinates, and the arena and corridor numbers as metadata
        data = {"Fly": pd.Categorical(["Fly" + str(Flynum)]),
                #"yfly": [list(yfly[:, 0, 0])], 
                "yball": [list(yball[:, 0, 0])],
                "experiment": pd.Categorical([foldername]),
                "arena": pd.Categorical([arena]), 
                "corridor": pd.Categorical([corridor]),
                "start": pd.Categorical([start]),
                "end": pd.Categorical([end])}
        data.update(arena_metadata)

        # Use pandas.concat instead of DataFrame.append
        Dataset = pd.concat([Dataset, pd.DataFrame(data)], ignore_index=True) 

# Explode yfly column to have one row per timepoint

#Dataset.drop(columns=["Genotye", "Date",], inplace=True)

# Dataset = Dataset.explode('yfly')
# Dataset['yfly'] = Dataset['yfly'].astype(float)

Dataset = Dataset.explode('yball')
Dataset['yball'] = Dataset['yball'].astype(float)

# Filter parameters
cutoff = 0.0015  # desired cutoff frequency of the filter, Hz ,      slightly higher than actual 1.2 Hz
order = 1  # sin wave can be approx represented as quadratic


In [None]:

#Dataset['yfly_smooth'] = butter_lowpass_filter(Dataset['yfly_relative'], cutoff, order)
# print('Filtering yfly relative to start...')
# Dataset['yfly_SG'] = savgol_lowpass_filter(Dataset['yfly'], 221, 1)

# Compute yball_relative relative to start
Dataset['yball_relative'] = abs(Dataset['yball'] - Dataset['start'])

# Fill missing values using linear interpolation
Dataset['yball_relative'] = Dataset['yball_relative'].interpolate(method='linear')

Dataset['yball_relative_SG'] = savgol_lowpass_filter(Dataset['yball_relative'], 221, 1)

print('Defining frame and time columns...')
Dataset["Frame"] = Dataset.groupby("Fly").cumcount()

Dataset["time"] = Dataset["Frame"] / 30

# Remove the original yfly column

print('Removing Frame column...')
Dataset.drop(columns=["Frame",], inplace=True)

print('Resetting index...')
Dataset.reset_index(drop=True, inplace=True)

Dataset.head()

In [None]:
GroupOps = Dataset.groupby(
    [
        "time",
        "Genotype",
    ]
)

In [None]:
GroupData = GroupOps.mean(numeric_only=True).reset_index()

The part below is too heavy to be executed in a notebook. I just ran it in a separate script.

In [None]:
#Confints = GroupOps["yball_relative_SG"].apply(lambda x: draw_bs_ci(x, n_reps=300))

# DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Datasets")

# #GroupOps.to_feather(DataPath / "230928_GroupedDataTNT.feather")

# Dataset.to_feather(DataPath / "230928_DatasetTNT.feather")

It should now be possible to load the confints from the script output

In [None]:
DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Datasets")

Confints = pd.read_feather(DataPath / "230928_Confints.feather")

In [None]:
Confints_process = Confints.reset_index()

In [None]:
Confints.head()

In [None]:
Confints[["ci_lower", "ci_upper"]] = Confints["yball_relative_SG"].tolist()

Confints.head()

In [None]:
# Define GroupData["ci_minus"] and GroupData["ci_plus"] as the columns containing the values of Confints
GroupData["ci_lower"] = Confints_process["ci_lower"]
GroupData["ci_upper"] = Confints_process["ci_upper"]

# Define GroupData["ci_minus"] and GroupData["ci_plus"] as the columns containing the values of Confints

In [None]:
GroupData.head()

In [None]:
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.palettes import Spectral11
from bokeh.io import output_notebook

# Call once to configure Bokeh to display plots inline in the notebook.
output_notebook()

# Create a new plot with a title and axis labels
p = figure(title="yball over time", x_axis_label='time', y_axis_label='yball_relative_SG')

# Get the list of genotypes
genotypes = GroupData['Genotype'].unique()

genotypes = ['PR', 'TNTxTH', 'TNTxE-PG']


In [None]:

# For each genotype, create a line plot with confidence intervals
for i, genotype in enumerate(genotypes):
    df_genotype = GroupData[GroupData['Genotype'] == genotype]
    source = ColumnDataSource(df_genotype)
    
    # Draw the line for yball_relative_SG
    p.line('time', 'yball_relative_SG', source=source, line_width=2, color=Spectral11[i], legend_label=genotype)
    
    # Draw the upper and lower bounds for the confidence interval
    p.varea(x='time', y1='ci_lower', y2='ci_upper', source=source, fill_color=Spectral11[i], fill_alpha=0.4)

# Show the results
show(p)


## Implementing interaction metrics and plotting

In this part, we reuse the code from ConcatClips.py to get the interaction events from each video, and using these we can quantify metrics listed in 'Plotting ideas' in my notebook. These are :

* Number of events
* Event at which the ball is brought to the end
* Time at which the ball is brought to the end
* Amount of significant events
* Push vs pull 
* plot the interactions chronology
* time between interactions
* First meaningful interaction



In [None]:
from bokeh.plotting import figure, show

In [None]:
from Utilities.Ballpushing_utils import *

Dataset_list = []
Flycount = 0

for folder in Folders:
    #print(f"Processing {folder}...")
    # Read the metadata.json file
    with open(folder / "Metadata.json", "r") as f:
        metadata = json.load(f)
        variables = metadata["Variable"]
        metadata_dict = {}
        for var in variables:
            metadata_dict[var] = {}
            for arena in range(1, 10):
                arena_key = f"Arena{arena}"
                var_index = variables.index(var)
                metadata_dict[var][arena_key] = metadata[arena_key][var_index]

        # In the metadata_dict, make all they Arena subkeys lower case

        for var in variables:
            metadata_dict[var] = {k.lower(): v for k, v in metadata_dict[var].items()}
        #print(metadata_dict)

        files = list(folder.glob("**/*.mp4"))

    for file in files:
        #print(file.name)
        # Get the arena and corridor numbers from the parent (corridor) and grandparent (arena) folder names
        arena = file.parent.parent.name
        # print(arena)
        corridor = file.parent.name

        # Get the Genotype and Dates from the metadata, arena should have a upper case first letter

        Genotype = metadata_dict["Genotype"][arena]
        #print(f"Genotype: {Genotype} for arena {arena}")

        Date = metadata_dict["Date"][arena]
        # print(f"Date: {Date} for arena {arena}")

        Light = metadata_dict["Light"][arena]
        FeedingState = metadata_dict["FeedingState"][arena]
        Period = metadata_dict["Period"][arena]

        start, end = np.load(file.parent / 'coordinates.npy')
        
        dir = file.parent

        # Define flypath as the *flytrack*.analysis.h5 file in the same folder as the video
        try:
            flypath = list(dir.glob("*flytrack*.analysis.h5"))[0]
            #print(flypath.name)
        except IndexError:
            #print(f"No fly tracking file found for {file.name}, skipping...")
            
            continue

        # Define ballpath as the *tracked*.analysis.h5 file in the same folder as the video
        try:
            ballpath = list(dir.glob("*tracked*.analysis.h5"))[0]
            #print(ballpath.name)
        except IndexError:
            #print(f"No ball tracking file found for {file.name}, skipping...")
            
            continue

        vidpath = file
        vidname = f"{Genotype}_{Date}_Light_{Light}_{FeedingState}_{Period}_{arena}_{corridor}"

        try:
            # Extract interaction events and mark them in the DataFrame
            data = extract_interaction_events(ballpath, flypath, mark_in_df=True)
            data["start"] = start
            data["end"] = end
            data["Genotype"] = Genotype
            data["Date"] = Date
            data["arena"] = arena
            data["corridor"] = corridor
            Flycount += 1
            data["Fly"] = f'Fly {Flycount}'
            # Compute yball_relative relative to start
            data['yball_relative'] = abs(data['yball_smooth'] - data['start'])

            # Fill missing values using linear interpolation
            data['yball_relative'] = data['yball_relative'].interpolate(method='linear')
            
            
            # Append the data to the all_data DataFrame
            Dataset_list.append(data)
        except Exception as e:
            error_message = str(e)
            traceback_message = traceback.format_exc()
            #print(f"Error processing video {vidname}: {error_message}")
            #print(traceback_message)

# Concatenate all dataframes in the list into a single dataframe
Dataset = pd.concat(Dataset_list, ignore_index=True)

In [None]:
Dataset.head()

### Average number of events

In [None]:
# For each Fly, compute how many unique values of Event that are not None there are

# Group the data by Fly and Event
GroupedData = Dataset.groupby(["Fly", "Genotype"]).nunique(['Event']).reset_index()

# Count the number of unique values of Event for each Fly
#Counts = GroupedData["Event"].nunique()

In [None]:
GroupedData.head()

In [None]:
import iqplot

p_NumbEvents = iqplot.stripbox(data=GroupedData, q="Event", cats="Genotype", title="Number of events", frame_width= 750, frame_height= 500, tooltips=[("Event", "@{Event}"),("Genotype", "@{Genotype}")], spread="jitter")

show(p_NumbEvents)

In [None]:
Savepath = Path("/mnt/labserver/DURRIEU_Matthias/Pictures/InteractionEvents/")

bokeh.io.save(p_NumbEvents, filename=Savepath / "NumbEvents.html")

In [None]:
average_unique_events = Dataset.groupby(['Fly', 'Genotype'])['Event'].nunique().groupby('Genotype').mean()

average_unique_events


### Event at which the ball is brough to the end

In [None]:
end_threshold = 0

# Group by 'Fly' column and find the minimum 'ball_position' for each group
max_positions = Dataset.groupby('Fly')['yball_relative'].max()

# Initialize an empty DataFrame to store the results
result = []

# For each Fly, find the first event where 'ball_position' is less than or equal to min_position + threshold
for fly, max_positions in max_positions.items():
    fly_data = Dataset[Dataset['Fly'] == fly]
    event = fly_data[fly_data['yball_relative'] >= max_positions - end_threshold]['Event'].iloc[0]
    result.append({'Fly': fly, 'Event': event})

# Convert the result to a DataFrame
result_df = pd.DataFrame(result)

# Merge the result_df with the original Dataset
Dataset = pd.merge(Dataset, result_df, on=['Fly', 'Event'], how='left')

# Create the 'IsFinal' column, which is True if 'Event' is in result_df and False otherwise
Dataset['IsFinal'] = Dataset['Event'].notna()

Dataset.head()


In [None]:
Dataset["Event"]

In [None]:
# Drop rows where 'Event' is NaN
Dataset = Dataset.dropna(subset=['Event'])

# Now extract the event number from the 'Event' column
Dataset['EventNumber'] = Dataset['Event'].str.extract('(\d+)').astype(int)

# Filter rows where 'IsFinal' is True
final_events = Dataset[Dataset['IsFinal'] == True]

final_events.head()


In [None]:

# For each Fly, find the minimum 'EventNumber' where 'IsFinal' is True
result = []
for fly in final_events['Fly'].unique():
    fly_data = final_events[final_events['Fly'] == fly]
    min_event_number = fly_data['EventNumber'].min()
    result.append({'Fly': fly, 'MinEventNumber': min_event_number})

# Convert the result to a DataFrame
result_df = pd.DataFrame(result)


In [None]:
# Merge 'result_df' with the original Dataset to get the 'Genotype' column
result_df = pd.merge(result_df, Dataset[['Fly', 'Genotype']].drop_duplicates(), on='Fly', how='left')

# Plot the data
p_finalevent = iqplot.stripbox(data=result_df, q="MinEventNumber", cats="Genotype", title="Event at which the fly reaches the end of the corridor", frame_width= 750, frame_height= 500, tooltips=[("MinEventNumber", "@{MinEventNumber}"),("Genotype", "@{Genotype}")], spread="jitter")

bokeh.io.show(p_finalevent)

#TODO: Figure out why this doesn't work properly


In [None]:

# Save the plot as html
bokeh.io.save(
    p_finalevent,
    Savepath / "FinalEvent.html",
    #fmt="html",
)

In [None]:
# Get the time at which the final event occurs

GroupFly_Time = final_events.groupby(["Fly", "Genotype"])["time"].first().reset_index()

In [None]:
p_finaltime = iqplot.stripbox(data=GroupFly_Time, q="time", cats="Genotype", title="Time of the event at which the fly reaches the end of the corridor", frame_width= 750, frame_height= 500,  tooltips=[("time", "@{time}"),("Genotype", "@{Genotype}")], spread="jitter")

bokeh.io.show(p_finaltime)


Save dataset

In [None]:
bokeh.io.save(
    p_finaltime,
    Savepath / "FinalTime.html",
    #fmt="html",
)

In [None]:
DataPath = Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Datasets")

Dataset.to_feather(DataPath / "231002_DatasetTNT_InteractionEvents.feather")

In [None]:
def check_yball_variation(event_df, threshold=10):
    yball_segment = event_df["yball_smooth"]
    variation = yball_segment.max() - yball_segment.min()
    return variation > threshold

# Apply the function and reset the index
significant_events = Dataset.groupby(["Fly", "Event"]).apply(check_yball_variation).reset_index()

# Rename the 0 column to 'SignificantEvent'
significant_events.rename(columns={0: 'SignificantEvent'}, inplace=True)

# Merge the significant_events DataFrame with the original Dataset
Dataset = pd.merge(Dataset, significant_events, on=['Fly', 'Event'], how='left')

Dataset.head()


In [None]:
# Count the number of unique events that are significant for each 'Fly' and 'Genotype'
unique_significant_events = Dataset[Dataset['SignificantEvent'] == True].groupby(['Fly', 'Genotype'])['Event'].nunique()

print(unique_significant_events)


In [None]:
p_unique_significant_events = iqplot.stripbox(data=unique_significant_events.reset_index(), q="Event", cats="Genotype", title="Number of significant events", frame_width= 750, frame_height= 500,  tooltips=[("Event", "@{Event}"),("Genotype", "@{Genotype}")], spread="jitter")

bokeh.io.show(p_unique_significant_events)

In [None]:
bokeh.io.save(
    p_unique_significant_events,
    Savepath / "NumbSignificantEvents.html",
    #fmt="html",
)

In [None]:
# Filter the DataFrame to include only the significant events
significant_events = Dataset[Dataset['SignificantEvent'] == True]

# For each 'Fly', find the first significant event and get its time
first_significant_event_times = significant_events.groupby('Fly')['time'].idxmin()

# Use these indices to get the corresponding rows from the original DataFrame
first_significant_events = Dataset.loc[first_significant_event_times]

first_significant_events.head()


In [None]:
# For each 'Fly' and 'Genotype', find the first significant event and get its time
first_significant_event_times = significant_events.groupby(['Fly', 'Genotype'])['time'].idxmin()

# Use these indices to get the corresponding rows from the original DataFrame
first_significant_events = Dataset.loc[first_significant_event_times]

# Plot the data
p_first_significant_event_times = iqplot.stripbox(data=first_significant_events.reset_index(), q="time", cats="Genotype", title="Time of first significant event", frame_width= 750, frame_height= 500,   tooltips=[("time", "@{time}"),("Genotype", "@{Genotype}")], spread="jitter")
bokeh.io.show(p_first_significant_event_times)


In [None]:
bokeh.io.save(
    p_first_significant_event_times,
    Savepath / "FirstSignificantEvent_Time.html",
    #fmt="html",
)

In [None]:
# Filter the DataFrame to include only the significant events
significant_events = Dataset[Dataset['SignificantEvent'] == True]

# For each 'Fly', find the first significant event and get its time
first_significant_event_times = significant_events.groupby('Fly')['time'].idxmin()

# Use these indices to get the corresponding rows from the original DataFrame
first_significant_events = Dataset.loc[first_significant_event_times]

first_significant_events["EventNumber"] = first_significant_events.loc[:,"Event"].str.extract('(\d+)').astype(int)

first_significant_events.head()



In [None]:
# Plot the data
p_first_significant_event_numbers = iqplot.stripbox(data=first_significant_events.reset_index(), q="EventNumber", cats="Genotype", title="Number of first significant event", frame_width= 750, frame_height= 500, tooltips=[("EventNumber", "@{EventNumber}"),("Genotype", "@{Genotype}")], spread="jitter")
bokeh.io.show(p_first_significant_event_numbers)

In [None]:
bokeh.io.save(
    p_first_significant_event_numbers,
    Savepath / "FirstSignificantEvent_Number.html",
    #fmt="html",
)

# Pauses

In [None]:
import numpy as np
Dataset['Event'] = Dataset['Event'].replace('None', np.nan)


In [None]:
Dataset['IsEvent'] = Dataset['Event'].notna()


In [None]:
Dataset['TimeSinceLastEvent'] = Dataset.groupby('Fly').apply(lambda x: x['time'] - x.loc[x['IsEvent'], 'time'].shift()).reset_index(level=0, drop=True)


In [None]:
## For each 'Fly', calculate the time difference between consecutive events
Dataset['TimeSinceLastEvent'] = Dataset[Dataset['IsEvent']].groupby('Fly')['time'].diff()

# Calculate the average time between events for each 'Fly' and 'Genotype'
average_time_between_events = Dataset.groupby(['Fly', 'Genotype'])['TimeSinceLastEvent'].mean().reset_index()

# Plot the data
p_average_time_between_events = iqplot.stripbox(data=average_time_between_events, q="TimeSinceLastEvent", cats="Genotype", title="Average time between events", frame_width= 750, frame_height= 500, tooltips=[("TimeSinceLastEvent", "@{TimeSinceLastEvent}"),("Genotype", "@{Genotype}")], spread="jitter")
bokeh.io.show(p_average_time_between_events)


In [None]:
bokeh.io.save(
    p_average_time_between_events,
    Savepath / "AverageTimeBetweenEvents.html",
    #fmt="html",
)

In [None]:
Dataset['IsNonEvent'] = Dataset['Event'].isna()


In [None]:
# Calculate the time difference between consecutive rows
Dataset['TimeDifference'] = Dataset.groupby('Fly')['time'].diff()

# For non-event rows, sum these time differences
Dataset['NonEventTime'] = Dataset.loc[Dataset['IsNonEvent'], 'TimeDifference']

# Calculate the cumulative time spent in non-events for each 'Fly' and 'Genotype'
cumulative_time_in_non_events = Dataset.groupby(['Fly', 'Genotype'])['NonEventTime'].sum().reset_index()

# Plot the data
p_cumulative_time_in_non_events = iqplot.stripbox(data=cumulative_time_in_non_events, q="NonEventTime", cats="Genotype", title="Cumulative time spent not interacting", frame_width= 750, frame_height= 500, tooltips=[("NonEventTime", "@{NonEventTime}"),("Genotype", "@{Genotype}")], spread="jitter")
bokeh.io.show(p_cumulative_time_in_non_events)


In [None]:
bokeh.io.save(
    p_cumulative_time_in_non_events,
    Savepath / "CumulativeTimeNonEvents.html",
    #fmt="html",
)

In [None]:
# For each 'Fly', 'Event', and 'Genotype', get the timestamp of the first frame
first_frame_timestamps = Dataset.groupby(['Fly', 'Event', 'Genotype'])['time'].first().reset_index()

# Plot the data
p_first_frame_timestamps = iqplot.histogram(data=first_frame_timestamps, q="time", cats="Genotype", title="Timestamp of first frame of each event", frame_width= 750, frame_height= 500, )
bokeh.io.show(p_first_frame_timestamps)


In [None]:
bokeh.io.save(
    p_first_frame_timestamps,
    Savepath / "FirstFrameTimestamps.html",
    #fmt="html",
)

### Puls

In [None]:
# Calculate the difference in 'yball_relative' between the start and end of each event
yball_relative_diff = Dataset.groupby(['Fly', 'Event'])['yball_relative'].apply(lambda x: x.iloc[-1] - x.iloc[0]).reset_index(name='yball_relative_diff')

# Merge yball_relative_diff with Dataset
Dataset = pd.merge(Dataset, yball_relative_diff, on=['Fly', 'Event'], how='left')


In [None]:

Dataset.head()


In [None]:

# Identify whether each event is a 'Push', 'Pull', or 'None'
Dataset['Event_Type'] = np.where(Dataset['yball_relative_diff'] > 10, 'Push', np.where(Dataset['yball_relative_diff'] < -10, 'Pull', 'None'))

# Count the number of each event type for each 'Fly' and 'Genotype'
event_type_counts = Dataset.groupby(['Fly', 'Genotype', 'Event_Type']).size().reset_index(name='Count')

# Count the number of unique events with event type for each fly and genotype
unique_event_type_counts = event_type_counts.groupby(['Fly', 'Genotype'])['Event_Type'].nunique().reset_index(name='Count')

# Filter the DataFrame to include only the 'Pull' events
pull_events = Dataset[Dataset['Event_Type'] == 'Pull']

# Count the number of 'Pull' events for each 'Fly' and 'Genotype'
pull_event_counts = pull_events.groupby(['Fly', 'Genotype']).size().reset_index(name='Count')

# Count the number of unique 'Pull' events for each 'Fly' and 'Genotype'
unique_pull_event_counts = pull_events.groupby(['Fly', 'Genotype'])['Event'].nunique().reset_index(name='Count')

# Plot the data


p_pull_event_counts = iqplot.stripbox(data=unique_pull_event_counts, q="Count", cats="Genotype", title="Number of 'Pull' events", frame_width= 750, frame_height= 500, tooltips=[("Count", "@{Count}"),("Genotype", "@{Genotype}")], spread="jitter")
bokeh.io.show(p_pull_event_counts)



In [None]:
bokeh.io.save(
    p_pull_event_counts,
    Savepath / "PullEventCounts.html",
    #fmt="html",
)

In [None]:
#TODO : Pull and push within event

#TODO : Make a dashboard with all the plots