This notebook is meant to explore tracking data statistics across flies. The first element in there is about generating a 'pooled' dataset where all the dataframes are concatenated in one while keeping track of flies identity and conditions.

# Libraries import

In [None]:
import sys
from pathlib import Path
import matplotlib as mpl

mpl.rcParams["figure.figsize"] = (
    10,
    10,
)  # Change figure size including in the jupyter outputs.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import mpmath
import cv2

sys.modules["sympy.mpmath"] = mpmath
from scipy import signal
import datetime
import dateutil
import bokeh.io
import holoviews as hv
from holoviews import opts

hv.extension(
    "bokeh",
    "matplotlib",
)
bokeh.io.output_notebook()

import panel as pn


sys.path.insert(0, "..")

from Utilities.Utils import *
from Utilities.Processing import *

import black
import jupyter_black

jupyter_black.load()

# Path and data import

Mac Paths

In [None]:
DataPath = [
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie"
    ),
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie_noFood"
    ),
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar_noFood"
    ),
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar"
    ),
]

Workstation paths

In [None]:
DataPath = [
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie"
    ),
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie_noFood"
    ),
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar_noFood"
    ),
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar"
    ),
]

In [None]:
# for each dataset, load the data and append it to the pooled dataframe
pooled = pd.DataFrame()
count = 0
for paths in DataPath:
    datasets = list(paths.glob("**/BallPositions_processed.feather"))
    for dataset in datasets:
        df = pd.read_feather(dataset)
        # add a column with the dataset name
        count += 1
        df["fly"] = f"Fly {count}"
        # check if the dataframe path contains "noFood"
        if "noFood" in str(dataset):
            df["food"] = "noFood"
        else:
            df["food"] = "Food"
        pooled = pooled.append(df)

# Compute average value for ysmooth grouped by trial and fly

In [None]:
# Get average ysmooth over time grouped by trial and fly

Group = pooled.groupby(["food", "TrialNumber", "Time"])

In [None]:
Group_avg = Group.mean()
Group_med = Group.median()

In [None]:
Curves = (
    hv.Curve(
        data=Group_avg,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
            "food",
        ],
    )
    .groupby(["food", "TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay("TrialNumber")
)
Curves

## Plot average value for ysmooth grouped by trial and fly, with a panel with and without food

In [None]:
Curves = (
    hv.Curve(
        data=Group_avg,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
            "food",
        ],
    )
    .groupby(["food", "TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay("TrialNumber")
)
Curves

## Plot with median instead to get estimates less impacted by outliers

In [None]:
Curves = (
    hv.Curve(
        data=Group_med,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
            "food",
        ],
    )
    .groupby(["food", "TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay("TrialNumber")
)
Curves

## Pooled plots

In [None]:
Group = pooled.groupby(["TrialNumber", "Time"])
Group_avg = Group.mean()
Group_med = Group.median()

In [None]:
Curves = (
    hv.Curve(
        data=Group_avg,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
        ],
    )
    .groupby(["TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay()
)
Curves

In [None]:
hv.save(Curves, "/Users/ulric/Downloads/Average_Pooled.html")

# Plot average trial duration grouped by trial and fly

In [None]:
Trialdurations = pooled.groupby(["fly", "TrialNumber"]).apply(lambda x: x["Time"].max())

In [None]:
# convert Trialdurations to a dataframe and reset index to get fly and TrialNumber as columns
Trialdurations = pd.DataFrame(Trialdurations).reset_index()
# rename column 0 to "duration"
Trialdurations.rename(columns={0: "duration"}, inplace=True)

Trialdurations

In [None]:
# Add column with converted durations in seconds
Trialdurations["duration_s"] = frame2time(
    Trialdurations["duration"], reverse=False, fps=80
)

In [None]:
# compute average duration grouped by TrialNumber
Grouped_avg = Trialdurations.groupby("TrialNumber").mean()

In [None]:
box = hv.BoxWhisker(data=Trialdurations, kdims="TrialNumber", vdims="duration_s").opts(
    height=500,
    width=500,
    framewise=True,
    cmap="Viridis",
    xlabel="Trial",
    ylabel="Duration (sec)",
    # ylim=(0, 40),
    # box_color="TrialNumber",
    box_fill_alpha=0,
    # invert_axes=True,
    # invert_yaxis=True,
    # box_line_color="gray",
)

points = hv.Scatter(data=Trialdurations, kdims="TrialNumber", vdims="duration_s").opts(
    framewise=True,
    cmap="Viridis",
    # invert_axes=True,
    # invert_yaxis=True,
    # ylim=(0, 40),
    color="TrialNumber",
    jitter=0.4,
)


box * points

## Grouped by food  condition

In [None]:
Trialdurations_food = pooled.groupby(["food", "fly", "TrialNumber"]).apply(
    lambda x: x["Time"].max()
)

In [None]:
Trialdurations_food = pd.DataFrame(Trialdurations_food).reset_index()
Trialdurations_food.rename(columns={0: "duration"}, inplace=True)

Trialdurations_food

In [None]:
Trialdurations_food["duration_s"] = frame2time(
    Trialdurations_food["duration"], reverse=False, fps=80
)

Add replicate number to the plot

In [None]:
# find how many unique values there are in the fly column
nflies = len(pooled["fly"].unique())

# same but only for the noFood condition
nflies_noFood = len(pooled[pooled["food"] == "noFood"]["fly"].unique())

nflies_food = nflies - nflies_noFood

replicates = {"food": ["Food", "noFood"], "n": [nflies_food, nflies_noFood]}

In [None]:
box = (
    hv.BoxWhisker(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        height=1000,
        width=1000,
        framewise=True,
        cmap="Viridis",
        xlabel="Trial",
        ylabel="Duration (sec)",
        # ylim=(0, 40),
        # box_color="TrialNumber",
        box_fill_alpha=0,
        # invert_axes=True,
        # invert_yaxis=True,
        # box_line_color="gray",
    )
)

points = (
    hv.Scatter(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        framewise=True,
        cmap="Viridis",
        # invert_axes=True,
        # invert_yaxis=True,
        # ylim=(0, 40),
        color="TrialNumber",
        jitter=0.4,
    )
)

# annot = hv.Text()


box * points

implement fly lines

In [None]:
lines = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "fly"],
    )
    .groupby(["food", "fly"])
    .opts(
        framewise=True,
        color=hv.Palette("Viridis"),
        alpha=0.8,
        line_width=2,
        # invert_axes=True,
        # invert_yaxis=True,
        # ylim=(0, 40),
        # line_color="rank",
        # jitter=0.4,
    )
    .overlay("fly")
)

# Curves = (
#     hv.Curve(
#         data=Group_avg,
#         kdims=["Time"],
#         vdims=[
#             "ysmooth",
#             "TrialNumber",
#         ],
#     )
#     .groupby(["TrialNumber"])
#     .opts(
#         height=1000,
#         width=1000,
#         invert_yaxis=True,
#         color=hv.Palette("Viridis"),
#         tools=[
#             "hover",
#             "crosshair",
#         ],
#         muted=True,
#     )
#     .overlay()
# )

In [None]:
box * lines

In [None]:
hv.help(hv.Curve)

Rank flies by first trial speed

In [None]:
# rank the flies by their first trial duration
# first get the first trial duration for each fly
first_trial = Trialdurations_food[Trialdurations_food["TrialNumber"] == 1]

# then sort by duration
first_trial.sort_values(by="duration_s", inplace=True)

In [None]:
# add a rank column with the rank of each fly in the first_trial dataframe
first_trial["rank"] = first_trial["duration_s"].rank()
first_trial

In [None]:
Trialdurations_food["rank"] = Trialdurations_food["fly"].map(
    first_trial.set_index("fly")["rank"]
)
Trialdurations_food

In [None]:
Trialdurations_food_sorted = Trialdurations_food.sort_values(by="rank")

In [None]:
Trialdurations_food_sorted

Rename values and labels

In [None]:
# Rename the food values as follow : "Food" -> "Yes", "noFood" -> "No"
# Trialdurations_food["food"] = Trialdurations_food["food"].map(
#     {"Food": "Yes", "noFood": "No"}
# )


In [None]:
box = (
    hv.BoxWhisker(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        # cmap="RdYlGn",
        box_fill_color="lightgray",
        box_fill_alpha=1,
        # box_line_color="gray",
        outlier_alpha=0,
    )
)

points = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0,
        marker="x",
    )
).overlay("rank")

points_solo = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=[
            "duration_s",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0.2,
        # marker="x",
        size=5,
        alpha=0.5,
    )
)

lines2 = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        alpha=1,
        line_width=1.5,
        # jitter=0.4,
    )
).overlay("rank")

Annotation = hv.Text(0, -30, f"{nflies_food}")

Layout = (box * lines2 * points).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)
Layout_Light = (box * points_solo).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)
Layout

In [None]:
hv.save(
    Layout,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/230221_LabMeeting/SplitFood.html",
    fmt="html",
)

Line is doing something weird

In [None]:
linestest = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlGn"),
        alpha=0.8,
        line_width=1,
        # jitter=0.4,
    )
).overlay()

linestest

In [None]:
hv.help(hv.BoxWhisker)

Pooled data

In [None]:
box_pool = hv.BoxWhisker(
    data=Trialdurations_food, kdims=["TrialNumber"], vdims=["duration_s"]
).opts(
    # cmap="RdYlGn",
    box_fill_color="lightgray",
    box_fill_alpha=1,
    # box_line_color="gray",
)

points_pool = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "rank"],
    )
    .groupby(["rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0,
        marker="x",
    )
).overlay("rank")


lines2_pool = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "rank"],
    )
    .groupby(["rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        alpha=1,
        line_width=1.5,
        # jitter=0.4,
    )
).overlay("rank")

points_solo = hv.Scatter(
    data=Trialdurations_food,
    kdims=["TrialNumber"],
    vdims=[
        "duration_s",
    ],
).opts(
    color=hv.Palette("RdYlBu"),
    jitter=0.2,
    # marker="x",
    size=5,
    alpha=0.5,
)

Annotation = hv.Text(0, -30, f"{nflies_food}")

Layout = (box_pool * lines2_pool * points_pool).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)

# Layout

Layout_Light = (box_pool * points_solo).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)

Layout_Light

In [None]:
hv.save(
    Layout,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/230221_LabMeeting/pooled.html",
    fmt="html",
)

# Cumulated plot

In [None]:
# for each fly and each trial, get the index of the first frame of the trial
first_frame = (
    pooled.groupby(["fly", "TrialNumber"])
    .first()
    .reset_index()
    .loc[:, ["fly", "TrialNumber", "frame", "food"]]
)

In [None]:
# Remove the first trial of each fly from first_frame
first_frame = first_frame[first_frame["TrialNumber"] != 1]

In [None]:
# Sort first_frame by frame value
first_frame = first_frame.sort_values(by="frame")

In [None]:
# Build a column incrementing by 1 for each value of frame
first_frame["cumulated_success"] = range(1, len(first_frame) + 1)
first_frame["frametime"] = first_frame["frame"] / 80

In [None]:
# Plot the cumulative success

cumulcurve_first = (
    hv.Curve(
        data=first_frame,
        kdims=["frametime"],
        vdims=[
            "cumulated_success",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        height=750,
        width=1000,
        # color="black",
        alpha=0.8,
        line_width=1,
        xlabel="Time(s)",
        ylabel="Cumulative success",
        show_grid=True,
        fontscale=1.5,
    )
)
hv.save(
    cumulcurve_first,
    "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Pictures/230316_TeamMeeting/cumulcurve_first.html",
    fmt="html",
)
# cumulcurve_first

In [None]:
# Create a column that for each fly increments by 1 each time a new trial starts in pooled
pooled["cumulated_success"] = pooled["TrialNumber"] - 1
pooled["Time_reel"] = pooled["index"] / 80

In [None]:
cumulcurve = (
    hv.Curve(
        data=pooled,
        kdims=["Time_reel"],
        vdims=["cumulated_success", "food", "fly"],
    )
    .groupby(["food", "fly"])
    .opts(
        height=750,
        width=1000,
        # color="black",
        alpha=0.8,
        line_width=1,
        xlabel="Time(s)",
        ylabel="Cumulative success",
        show_grid=True,
        fontscale=1.5,
    )
).overlay("fly")
hv.save(
    cumulcurve,
    "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Pictures/230316_TeamMeeting/cumulcurve.html",
    fmt="html",
)
# cumulcurve

In [None]:
GroupData = (
    pooled.groupby(
        [
            "Time_reel",
            "food",
        ]
    )
    .mean()
    .reset_index()
)

In [None]:
cumulcurve_pool = (
    hv.Curve(
        data=GroupData,
        kdims=["Time_reel"],
        vdims=[
            "cumulated_success",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        height=750,
        width=1000,
        # color="black",
        alpha=0.8,
        line_width=1,
        xlabel="Time(s)",
        ylabel="Cumulative success",
        show_grid=True,
        fontscale=1.5,
    )
)
hv.save(
    cumulcurve_pool,
    "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Pictures/230316_TeamMeeting/cumulcurve_pool.html",
    fmt="html",
)
# cumulcurve_pool

Add error bars

The right function here is hv.spread

First compute bootstrapped confidence interval for the points

Then plot the points with the confidence interval

In [None]:
Groups = pooled.groupby(
    [
        "Time_reel",
        "food",
    ]
)
Groups

In [None]:
# Apply draw_bs_ci function to the pooled dataframe grouped by time_reel and food
bs_ci = Groups["cumulated_success"].apply(draw_bs_ci)

In [None]:
Groups = pooled.groupby(["food"])

In [None]:
Groups

In [None]:
# get average cumulated success on Groups data
cumul_success = Groups["cumulated_success"].mean().reset_index()

cumul_success overtime

In [None]:
Grp_avg = pooled.groupby(["food", "Time"]).mean()

Not a good approach. Better is just do the same as above but with the pooled data

In [None]:
timesort = pooled.sort_values(by="Time")
timesort

In [None]:
timesort[pool_success] = 