This notebook is meant to explore tracking data statistics across flies. The first element in there is about generating a 'pooled' dataset where all the dataframes are concatenated in one while keeping track of flies identity and conditions.

# Libraries import

In [None]:
import sys
from pathlib import Path
import matplotlib as mpl

mpl.rcParams["figure.figsize"] = (
    10,
    10,
)  # Change figure size including in the jupyter outputs.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import mpmath
import cv2

sys.modules["sympy.mpmath"] = mpmath
from scipy import signal
import datetime
import dateutil
import bokeh.io
import holoviews as hv
from holoviews import opts

hv.extension(
    "bokeh",
    "matplotlib",
)
bokeh.io.output_notebook()

import panel as pn


sys.path.insert(0, "..")
sys.path.insert(0, "../../..")


from Utilities.Utils import *
from Utilities.Processing import *

import black
import jupyter_black

jupyter_black.load()

# Path and data import

Mac Paths

In [None]:
DataPath = [
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie"
    ),
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie_noFood"
    ),
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar_noFood"
    ),
    Path(
        "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar"
    ),
]

Workstation paths

In [None]:
DataPath = [
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie"
    ),
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bowtie_noFood"
    ),
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar_noFood"
    ),
    Path(
        "/mnt/labserver/DURRIEU_Matthias/Experimental_data/Optogenetics/Optobot/MultiMaze_15stepped_gated_bar"
    ),
]

In [None]:
# for each dataset, load the data and append it to the pooled dataframe
pooled = pd.DataFrame()
count = 0
for paths in DataPath:
    datasets = list(paths.glob("**/BallPositions_processed.feather"))
    for dataset in datasets:
        print(dataset)
        df = pd.read_feather(dataset)
        # add a column with the dataset name
        # add lines with NaNs where indices are missing
        df = df.set_index("index").reindex(range(1, df["index"].max() + 1))
        df.reset_index()
        count += 1
        df["fly"] = f"Fly {count}"
        # check if the dataframe path contains "noFood"
        if "noFood" in str(dataset):
            df["food"] = "noFood"
        else:
            df["food"] = "Food"

        # append the dataframe to the pooled dataframe
        pooled = pd.concat([pooled, df])

# Compute average value for ysmooth grouped by trial and fly

In [None]:
# Get average ysmooth over time grouped by trial and fly

Group = pooled.groupby(["food", "TrialNumber", "Time"])

In [None]:
Group_avg = Group.mean()
Group_med = Group.median()

In [None]:
Curves = (
    hv.Curve(
        data=Group_avg,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
            "food",
        ],
    )
    .groupby(["food", "TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay("TrialNumber")
)
Curves

## Plot average value for ysmooth grouped by trial and fly, with a panel with and without food

In [None]:
Curves = (
    hv.Curve(
        data=Group_avg,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
            "food",
        ],
    )
    .groupby(["food", "TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay("TrialNumber")
)
Curves

## Plot with median instead to get estimates less impacted by outliers

In [None]:
Curves = (
    hv.Curve(
        data=Group_med,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
            "food",
        ],
    )
    .groupby(["food", "TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay("TrialNumber")
)
Curves

## Pooled plots

In [None]:
Group = pooled.groupby(["TrialNumber", "Time"])
Group_avg = Group.mean()
Group_med = Group.median()

In [None]:
Curves = (
    hv.Curve(
        data=Group_avg,
        kdims=["Time"],
        vdims=[
            "ysmooth",
            "TrialNumber",
        ],
    )
    .groupby(["TrialNumber"])
    .opts(
        height=1000,
        width=1000,
        invert_yaxis=True,
        color=hv.Palette("Viridis"),
        tools=[
            "hover",
            "crosshair",
        ],
        muted=True,
    )
    .overlay()
)
Curves

In [None]:
hv.save(Curves, "/Users/ulric/Downloads/Average_Pooled.html")

# Plot average trial duration grouped by trial and fly

In [None]:
Trialdurations = pooled.groupby(["fly", "TrialNumber"]).apply(lambda x: x["Time"].max())

In [None]:
# convert Trialdurations to a dataframe and reset index to get fly and TrialNumber as columns
Trialdurations = pd.DataFrame(Trialdurations).reset_index()
# rename column 0 to "duration"
Trialdurations.rename(columns={0: "duration"}, inplace=True)

Trialdurations

In [None]:
# Add column with converted durations in seconds
Trialdurations["duration_s"] = frame2time(
    Trialdurations["duration"], reverse=False, fps=80
)

In [None]:
# compute average duration grouped by TrialNumber
Grouped_avg = Trialdurations.groupby("TrialNumber").mean()

In [None]:
box = hv.BoxWhisker(data=Trialdurations, kdims="TrialNumber", vdims="duration_s").opts(
    height=500,
    width=500,
    framewise=True,
    cmap="Viridis",
    xlabel="Trial",
    ylabel="Duration (sec)",
    # ylim=(0, 40),
    # box_color="TrialNumber",
    box_fill_alpha=0,
    # invert_axes=True,
    # invert_yaxis=True,
    # box_line_color="gray",
)

points = hv.Scatter(data=Trialdurations, kdims="TrialNumber", vdims="duration_s").opts(
    framewise=True,
    cmap="Viridis",
    # invert_axes=True,
    # invert_yaxis=True,
    # ylim=(0, 40),
    color="TrialNumber",
    jitter=0.4,
)


box * points

## Grouped by food  condition

In [None]:
pooled_noFood = pooled.loc[pooled["food"] == "noFood"]

Trialdurations_food = pooled_noFood.groupby(["food", "fly", "TrialNumber"]).apply(
    lambda x: x["Time"].max()
)

Trialdurations_food

In [None]:
Trialdurations_food = pd.DataFrame(Trialdurations_food).reset_index()
Trialdurations_food.rename(columns={0: "duration"}, inplace=True)

Trialdurations_food

In [None]:
Trialdurations_food["duration_s"] = frame2time(
    Trialdurations_food["duration"], reverse=False, fps=80
)

Add replicate number to the plot

In [None]:
# find how many unique values there are in the fly column
nflies = len(pooled["fly"].unique())

# same but only for the noFood condition
nflies_noFood = len(pooled[pooled["food"] == "noFood"]["fly"].unique())

nflies_food = nflies - nflies_noFood

replicates = {"food": ["Food", "noFood"], "n": [nflies_food, nflies_noFood]}

In [None]:
box = (
    hv.BoxWhisker(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        height=1000,
        width=1000,
        framewise=True,
        cmap="Viridis",
        xlabel="Trial",
        ylabel="Duration (sec)",
        # ylim=(0, 40),
        # box_color="TrialNumber",
        box_fill_alpha=0,
        # invert_axes=True,
        # invert_yaxis=True,
        # box_line_color="gray",
    )
)

points = (
    hv.Scatter(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        framewise=True,
        cmap="Viridis",
        # invert_axes=True,
        # invert_yaxis=True,
        # ylim=(0, 40),
        color="TrialNumber",
        jitter=0.4,
    )
)

# annot = hv.Text()


box * points

implement fly lines

In [None]:
lines = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "fly"],
    )
    .groupby(["food", "fly"])
    .opts(
        framewise=True,
        color=hv.Palette("Viridis"),
        alpha=0.8,
        line_width=2,
        # invert_axes=True,
        # invert_yaxis=True,
        # ylim=(0, 40),
        # line_color="rank",
        # jitter=0.4,
    )
    .overlay("fly")
)

# Curves = (
#     hv.Curve(
#         data=Group_avg,
#         kdims=["Time"],
#         vdims=[
#             "ysmooth",
#             "TrialNumber",
#         ],
#     )
#     .groupby(["TrialNumber"])
#     .opts(
#         height=1000,
#         width=1000,
#         invert_yaxis=True,
#         color=hv.Palette("Viridis"),
#         tools=[
#             "hover",
#             "crosshair",
#         ],
#         muted=True,
#     )
#     .overlay()
# )

In [None]:
box * lines

In [None]:
hv.help(hv.Curve)

Rank flies by first trial speed

In [None]:
# rank the flies by their first trial duration
# first get the first trial duration for each fly
first_trial = Trialdurations_food[Trialdurations_food["TrialNumber"] == 1]

# then sort by duration
first_trial.sort_values(by="duration_s", inplace=True)

In [None]:
# add a rank column with the rank of each fly in the first_trial dataframe
first_trial["rank"] = first_trial["duration_s"].rank()
first_trial

In [None]:
Trialdurations_food["rank"] = Trialdurations_food["fly"].map(
    first_trial.set_index("fly")["rank"]
)
Trialdurations_food

In [None]:
Trialdurations_food_sorted = Trialdurations_food.sort_values(by="rank")

In [None]:
Trialdurations_food_sorted

Rename values and labels

In [None]:
# Rename the food values as follow : "Food" -> "Yes", "noFood" -> "No"
# Trialdurations_food["food"] = Trialdurations_food["food"].map(
#     {"Food": "Yes", "noFood": "No"}
# )

In [None]:
box = (
    hv.BoxWhisker(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        # cmap="RdYlGn",
        box_fill_color="lightgray",
        box_fill_alpha=1,
        # box_line_color="gray",
        outlier_alpha=0,
    )
)

points = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0,
        marker="x",
    )
).overlay("rank")

points_solo = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=[
            "duration_s",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0.2,
        # marker="x",
        size=5,
        alpha=0.5,
    )
)

lines2 = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        alpha=1,
        line_width=1.5,
        # jitter=0.4,
    )
).overlay("rank")

max_x = Trialdurations_food["TrialNumber"].max()
min_y = Trialdurations_food["duration_s"].min()
min_x = Trialdurations_food["TrialNumber"].min()
max_y = Trialdurations_food["duration_s"].max()

Annotation = hv.Text(max_x - 1, min_y - 100, f" N = {nflies_noFood}")

BoxTrialDuration = (box * lines2 * points * Annotation).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=2,
)
BoxTrialDuration_Light = (box * points_solo * Annotation).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)
BoxTrialDuration

In [None]:
hv.save(
    Layout,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/FyssenReport/Boxplots_TrialSuccess.png",
    fmt="png",
)

Line is doing something weird

In [None]:
linestest = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlGn"),
        alpha=0.8,
        line_width=1,
        # jitter=0.4,
    )
).overlay()

linestest

In [None]:
hv.help(hv.BoxWhisker)

Pooled data

In [None]:
box_pool = hv.BoxWhisker(
    data=Trialdurations_food, kdims=["TrialNumber"], vdims=["duration_s"]
).opts(
    # cmap="RdYlGn",
    box_fill_color="lightgray",
    box_fill_alpha=1,
    # box_line_color="gray",
)

points_pool = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "rank"],
    )
    .groupby(["rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0,
        marker="x",
    )
).overlay("rank")


lines2_pool = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "rank"],
    )
    .groupby(["rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        alpha=1,
        line_width=1.5,
        # jitter=0.4,
    )
).overlay("rank")

points_solo = hv.Scatter(
    data=Trialdurations_food,
    kdims=["TrialNumber"],
    vdims=[
        "duration_s",
    ],
).opts(
    color=hv.Palette("RdYlBu"),
    jitter=0.2,
    # marker="x",
    size=5,
    alpha=0.5,
)

Annotation = hv.Text(0, -30, f"{nflies_food}")

Layout = (box_pool * lines2_pool * points_pool).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)

# Layout

Layout_Light = (box_pool * points_solo).opts(
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)

Layout_Light

In [None]:
hv.save(
    Layout,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/230221_LabMeeting/pooled.html",
    fmt="html",
)

# Cumulated plot

In [None]:
# for each fly and each trial, get the index of the first frame of the trial
first_frame = (
    pooled.groupby(["fly", "TrialNumber"])
    .first()
    .reset_index()
    .loc[:, ["fly", "TrialNumber", "frame", "food"]]
)

In [None]:
# Remove the first trial of each fly from first_frame
first_frame = first_frame[first_frame["TrialNumber"] != 1]

In [None]:
# Sort first_frame by frame value
first_frame = first_frame.sort_values(by="frame")

In [None]:
# Build a column incrementing by 1 for each value of frame
first_frame["cumulated_success"] = range(1, len(first_frame) + 1)
first_frame["frametime"] = first_frame["frame"] / 80

In [None]:
# Plot the cumulative success

cumulcurve_first = (
    hv.Curve(
        data=first_frame,
        kdims=["frametime"],
        vdims=[
            "cumulated_success",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        height=750,
        width=1000,
        # color="black",
        alpha=0.8,
        line_width=1,
        xlabel="Time(s)",
        ylabel="Cumulative success",
        show_grid=True,
        fontscale=1.5,
    )
)
hv.save(
    cumulcurve_first,
    "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Pictures/230316_TeamMeeting/cumulcurve_first.html",
    fmt="html",
)
# cumulcurve_first

In [None]:
pooled["index"] = pooled.index
# Build a column pooled["cumulated_success"] that is the number of the trial minus 1
pooled["cumulated_success"] = pooled["TrialNumber"] - 1
# fill pooled["cumulated_success"] NaN values with the value of the next known value of pooled["cumulated_success"]
pooled["cumulated_success"] = pooled["cumulated_success"].fillna(method="bfill")

pooled["Time_reel"] = pooled["index"] / 80

In [None]:
cumulcurve = (
    hv.Curve(
        data=pooled,
        kdims=["Time_reel"],
        vdims=["cumulated_success", "food", "fly"],
    )
    .groupby(["food", "fly"])
    .opts(
        height=750,
        width=1000,
        # color="black",
        alpha=0.8,
        line_width=1,
        xlabel="Time(s)",
        ylabel="Cumulative success",
        show_grid=True,
        fontscale=1.5,
    )
).overlay("fly")
hv.save(
    cumulcurve,
    "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Pictures/230316_TeamMeeting/cumulcurve.html",
    fmt="html",
)
# cumulcurve

In [None]:
GroupData = (
    pooled.groupby(
        [
            "Time_reel",
            "food",
        ]
    )
    .mean()
    .reset_index()
)

In [None]:
GroupOps = pooled.groupby(
    [
        "Time_reel",
        "food",
    ]
)

In [None]:
GroupTest.mean()

In [None]:
# Build a column for GroupData that by applying the function "draw_bs_ci" to the column "cumulated_success" of pooled grouped by "Time_reel" and "food"
GroupData["ci"] = draw_bs_ci(
    pooled.groupby(
        [
            "Time_reel",
            "food",
        ]
    ),
    n_reps=300,
)

In [None]:
Confints = GroupOps["cumulated_success"].apply(lambda x: draw_bs_ci(x, n_reps=300))

In [None]:
Confints_process = Confints.reset_index()

In [None]:
# Split values of Confints_process["cumulated_success"] into two columns ci_lower and ci_upper
Confints_process[["ci_lower", "ci_upper"]] = pd.DataFrame(
    Confints_process["cumulated_success"].tolist(), index=Confints_process.index
)

In [None]:
# Define GroupData["ci_minus"] and GroupData["ci_plus"] as the columns containing the values of Confints
GroupData["ci_lower"] = Confints_process["ci_lower"]
GroupData["ci_upper"] = Confints_process["ci_upper"]

# Define GroupData["ci_minus"] and GroupData["ci_plus"] as the columns containing the values of Confints

In [None]:
# Save the confidence intervals in a csv file
Confints.to_csv(
    "/mnt/labserver/DURRIEU_Matthias/Code/UsefulComps/230414_Bootstrapped_Confints_OptobotsBallPushing/Confints.csv"
)

In [None]:
cumulcurve_pool = (
    hv.Curve(
        data=GroupData,
        kdims=["Time_reel"],
        vdims=[
            "cumulated_success",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        height=750,
        width=1000,
        # color="black",
        alpha=1,
        line_width=2,
        xlabel="Time(s)",
        ylabel="Cumulative success",
        show_grid=True,
        fontscale=1.5,
    )
)

# cumulcurve_pool

In [None]:
# add the confidence intervals to the plot using the holoview area function
cumulcurve_pool = cumulcurve_pool * hv.Area(
    data=GroupData,
    kdims=["Time_reel"],
    vdims=["ci_lower", "ci_upper", "food"],
).groupby(
    [
        "food",
    ]
).opts(
    height=750,
    width=1000,
    # color="black",
    alpha=0.2,
    line_width=1,
    xlabel="Time(s)",
    ylabel="Cumulative success",
    show_grid=True,
    fontscale=1.5,
)

# cumulcurve_pool

In [None]:
# Hide the Area outer lines
cumulcurve_pool.opts(
    opts.Area(
        show_legend=False,
        show_frame=False,
        fill_color="blue",
        line_color="black",  # color of the outer lines
        line_width=0,  # width of the outer lines
    )
)

cumulcurve_pool.layout()

In [None]:
hv.save(
    cumulcurve_pool,
    "/Volumes/Ramdya-Lab/DURRIEU_Matthias/Pictures/230316_TeamMeeting/cumulcurve_pool.html",
    fmt="html",
)

Add error bars

The right function here is hv.spread

First compute bootstrapped confidence interval for the points

Then plot the points with the confidence interval

In [None]:
GroupData.head()

In [None]:
Groups = pooled.groupby(
    [
        "Time_reel",
        "food",
    ]
)
Groups.mean()

In [None]:
# Generate SmallSample, a subset of pooled with only 15 lines
SmallSample = pooled.sample(n=500, random_state=1)
SmallSample

In [None]:
SmallGroup = SmallSample.groupby(
    [
        "Time_reel",
        "food",
    ]
)

In [None]:
# Apply draw_bs_ci function to the pooled dataframe grouped by time_reel and food
bs_ci = SmallGroup["cumulated_success"].apply(draw_bs_ci)

In [None]:
Groups = pooled.groupby(["food"])

In [None]:
Groups

In [None]:
# get average cumulated success on Groups data
cumul_success = Groups["cumulated_success"].mean().reset_index()

cumul_success overtime

In [None]:
Grp_avg = pooled.groupby(["food", "Time"]).mean()

Not a good approach. Better is just do the same as above but with the pooled data

In [None]:
timesort = pooled.sort_values(by="Time")
timesort

In [None]:
timesort[pool_success] = 

# Recover the data to remake the plots

In [None]:
# for each dataset, load the data and append it to the pooled dataframe
pooled = pd.DataFrame()
count = 0
for paths in DataPath:
    datasets = list(paths.glob("**/BallPositions_processed.feather"))
    for dataset in datasets:
        df = pd.read_feather(dataset)
        # add a column with the dataset name
        df.set_index("index", inplace=True)
        count += 1
        df["fly"] = f"Fly {count}"
        # check if the dataframe path contains "noFood"
        if "noFood" in str(dataset):
            df["food"] = "noFood"
        else:
            df["food"] = "Food"

        # append the dataframe to the pooled dataframe
        pooled = pd.concat([pooled, df])

In [None]:
pooled.head()

In [None]:
pooled["index"] = pooled.index
# Build a column pooled["cumulated_success"] that is the number of the trial minus 1
pooled["cumulated_success"] = pooled["TrialNumber"] - 1
# fill pooled["cumulated_success"] NaN values with the value of the next known value of pooled["cumulated_success"]
pooled["cumulated_success"] = pooled["cumulated_success"].fillna(method="bfill")

pooled["Time_reel"] = pooled["index"] / 80

pooled.head()

In [None]:
GroupData = (
    pooled.groupby(
        [
            "Time_reel",
            "food",
        ]
    )
    .mean(numeric_only=True)
    .reset_index()
)

GroupOps = pooled.groupby(
    [
        "Time_reel",
        "food",
    ]
)

In [None]:
# import the data from the csv file
Confints = pd.read_csv(
    "/mnt/labserver/DURRIEU_Matthias/Code/UsefulComps/230414_Bootstrapped_Confints_OptobotsBallPushing/Confints.csv"
)

Confints.head()

In [None]:
def convert_string_to_list(s):
    # Remove brackets and split string on whitespace
    elements = s.strip("[]").split()
    # Convert elements to floats and return as list
    return [float(e) for e in elements]


Confints["cumulated_success"] = Confints["cumulated_success"].apply(
    convert_string_to_list
)

Note : Here the issue I had is that when saving Confints to csv the lists were converted to strings and stripped of their commas. So I had to do a bit of a hack to recover the data. I should have saved the data as a different format like feather or pickle. 

In [None]:
Confints_process = Confints.reset_index()

# Split values of Confints_process["cumulated_success"] into two columns ci_lower and ci_upper
Confints_process[["ci_lower", "ci_upper"]] = pd.DataFrame(
    Confints_process["cumulated_success"].tolist(), index=Confints_process.index
)

In [None]:
# Define GroupData["ci_minus"] and GroupData["ci_plus"] as the columns containing the values of Confints
GroupData["ci_lower"] = Confints_process["ci_lower"]
GroupData["ci_upper"] = Confints_process["ci_upper"]

# Define GroupData["ci_minus"] and GroupData["ci_plus"] as the columns containing the values of Confints

In [None]:
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from bokeh.models import HoverTool
from scipy import stats


# Define logistic function
def logistic(x, L, k, x0):
    return L / (1 + np.exp(-k * (x - x0)))


# Filter GroupData to only include rows where 'food' is 'noFood'
GroupData_noFood = GroupData[GroupData["food"] == "noFood"]

# Calculate the number of unique 'flies' in the 'pooled' dataset which have the 'noFood' condition
num_replicates = pooled[pooled["food"] == "noFood"]["fly"].nunique()

# Initial guesses for L, k, x0
p0 = [
    max(GroupData_noFood["cumulated_success"]),
    1,
    np.median(GroupData_noFood["Time_reel"]),
]

# Fit logistic function to data
params, _ = curve_fit(
    logistic,
    GroupData_noFood["Time_reel"],
    GroupData_noFood["cumulated_success"],
    p0,
    maxfev=5000,
)

# params contains the fitted values for L, k, x0
L, k, x0 = params

# Create a new DataFrame for the logistic fit curve
logistic_fit = pd.DataFrame(
    {
        "Time_reel": np.linspace(
            GroupData_noFood["Time_reel"].min(),
            GroupData_noFood["Time_reel"].max(),
            100,
        ),
    }
)

# Calculate y values for the logistic fit curve
logistic_fit["cumulated_success"] = logistic(logistic_fit["Time_reel"], L, k, x0)

# Calculate R-squared for logistic fit curve
y_pred_logistic = logistic(GroupData_noFood["Time_reel"], L, k, x0)
r_squared_logistic = r2_score(GroupData_noFood["cumulated_success"], y_pred_logistic)

# Calculate linear fit for the first half of the data
first_half = GroupData_noFood.iloc[: len(GroupData_noFood) // 2]
slope, intercept, _, _, _ = stats.linregress(
    first_half["Time_reel"], first_half["cumulated_success"]
)

# Create a new DataFrame for the linear fit line
linear_fit = pd.DataFrame(
    {
        "Time_reel": np.linspace(
            GroupData_noFood["Time_reel"].min(),
            GroupData_noFood["Time_reel"].max(),
            100,
        ),
    }
)

# Calculate y values for the linear fit line
linear_fit["cumulated_success"] = slope * linear_fit["Time_reel"] + intercept

# Calculate R-squared for linear fit curve
y_pred_linear = slope * GroupData_noFood["Time_reel"] + intercept
r_squared_linear = r2_score(GroupData_noFood["cumulated_success"], y_pred_linear)

# Calculate center x value and min and maximum xy value
center_x = (
    GroupData_noFood["Time_reel"].max() - GroupData_noFood["Time_reel"].min()
) / 2
max_y = max(
    max(GroupData_noFood["cumulated_success"]), max(logistic_fit["cumulated_success"])
)

max_x = GroupData_noFood["Time_reel"].max()
min_y = GroupData_noFood["cumulated_success"].min()

# Create your plot using GroupData_noFood instead of GroupData and add the fits and annotations
cumulcurve_pool = (
    hv.Curve(
        data=GroupData_noFood, kdims=["Time_reel"], vdims=["cumulated_success", "food"]
    )
    .groupby(["food"])
    .opts(
        height=1000,
        width=1200,
        alpha=1,
        line_width=2,
        xlabel="Time(s)",
        ylabel="",
        show_grid=True,
        fontscale=3,
        title="",
    )
    * hv.Area(
        data=GroupData_noFood,
        kdims=["Time_reel"],
        vdims=["ci_lower", "ci_upper", "food"],
    )
    .groupby(["food"])
    .opts(height=750, width=1000, alpha=0.2, line_width=1)
    * hv.Curve(
        data=logistic_fit, kdims=["Time_reel"], vdims=["cumulated_success"]
    ).opts(color="green")
    # * hv.Text(
    #     center_x, max_y - 1, f"Logistic fit R-squared: {r_squared_logistic:.2f}"
    # ).opts(text_color="green")
    * hv.Curve(data=linear_fit, kdims=["Time_reel"], vdims=["cumulated_success"]).opts(
        color="red"
    )
    # * hv.Text(center_x, max_y, f"Linear fit R-squared: {r_squared_linear:.2f}").opts(
    #     text_color="red"
    # )
    # * hv.Text(max_x - 100, min_y * num_replicates, f"N = {num_replicates}")
).opts(
    opts.Area(
        show_legend=False,
        show_frame=False,
        fill_color="blue",
        line_color="black",
        line_width=0,
    )
)

cumulcurve_pool.layout()

In [None]:
# Save the plot as a png
hv.save(
    cumulcurve_pool,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/240426_PSRI/cumulcurve_pool_wFits.png",
    fmt="png",
)

## Boxplots

In [None]:
pooled_noFood = pooled.loc[pooled["food"] == "noFood"]

Trialdurations_food = pooled_noFood.groupby(["food", "fly", "TrialNumber"]).apply(
    lambda x: x["Time"].max()
)

Trialdurations_food

In [None]:
Trialdurations_food = pd.DataFrame(Trialdurations_food).reset_index()
Trialdurations_food.rename(columns={0: "duration"}, inplace=True)

Trialdurations_food

In [None]:
Trialdurations_food["duration_s"] = frame2time(
    Trialdurations_food["duration"], reverse=False, fps=80
)

Add replicate number to the plot

In [None]:
# find how many unique values there are in the fly column
nflies = len(pooled["fly"].unique())

# same but only for the noFood condition
nflies_noFood = len(pooled[pooled["food"] == "noFood"]["fly"].unique())

nflies_food = nflies - nflies_noFood

replicates = {"food": ["Food", "noFood"], "n": [nflies_food, nflies_noFood]}

In [None]:
box = (
    hv.BoxWhisker(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        height=1000,
        width=1000,
        framewise=True,
        cmap="Viridis",
        xlabel="Trial",
        ylabel="Duration (sec)",
        # ylim=(0, 40),
        # box_color="TrialNumber",
        box_fill_alpha=0,
        # invert_axes=True,
        # invert_yaxis=True,
        # box_line_color="gray",
    )
)

points = (
    hv.Scatter(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        framewise=True,
        cmap="Viridis",
        # invert_axes=True,
        # invert_yaxis=True,
        # ylim=(0, 40),
        color="TrialNumber",
        jitter=0.4,
    )
)

# annot = hv.Text()


box * points

implement fly lines

In [None]:
lines = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "fly"],
    )
    .groupby(["food", "fly"])
    .opts(
        framewise=True,
        color=hv.Palette("Viridis"),
        alpha=0.8,
        line_width=2,
        # invert_axes=True,
        # invert_yaxis=True,
        # ylim=(0, 40),
        # line_color="rank",
        # jitter=0.4,
    )
    .overlay("fly")
)

# Curves = (
#     hv.Curve(
#         data=Group_avg,
#         kdims=["Time"],
#         vdims=[
#             "ysmooth",
#             "TrialNumber",
#         ],
#     )
#     .groupby(["TrialNumber"])
#     .opts(
#         height=1000,
#         width=1000,
#         invert_yaxis=True,
#         color=hv.Palette("Viridis"),
#         tools=[
#             "hover",
#             "crosshair",
#         ],
#         muted=True,
#     )
#     .overlay()
# )

In [None]:
box * lines

In [None]:
hv.help(hv.Curve)

Rank flies by first trial speed

In [None]:
# rank the flies by their first trial duration
# first get the first trial duration for each fly
first_trial = Trialdurations_food[Trialdurations_food["TrialNumber"] == 1]

# then sort by duration
first_trial.sort_values(by="duration_s", inplace=True)

In [None]:
# add a rank column with the rank of each fly in the first_trial dataframe
first_trial["rank"] = first_trial["duration_s"].rank()
first_trial

In [None]:
Trialdurations_food["rank"] = Trialdurations_food["fly"].map(
    first_trial.set_index("fly")["rank"]
)
Trialdurations_food

In [None]:
Trialdurations_food_sorted = Trialdurations_food.sort_values(by="rank")

In [None]:
Trialdurations_food_sorted

Rename values and labels

In [None]:
# Rename the food values as follow : "Food" -> "Yes", "noFood" -> "No"
# Trialdurations_food["food"] = Trialdurations_food["food"].map(
#     {"Food": "Yes", "noFood": "No"}
# )

In [None]:
box = (
    hv.BoxWhisker(
        data=Trialdurations_food, kdims=["TrialNumber", "food"], vdims=["duration_s"]
    )
    .groupby("food")
    .opts(
        # cmap="RdYlGn",
        box_fill_color="lightgray",
        box_fill_alpha=1,
        # box_line_color="gray",
        outlier_alpha=0,
    )
)

points = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0,
        marker="x",
    )
).overlay("rank")

points_solo = (
    hv.Scatter(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=[
            "duration_s",
            "food",
        ],
    )
    .groupby(
        [
            "food",
        ]
    )
    .opts(
        color=hv.Palette("RdYlBu"),
        jitter=0.2,
        # marker="x",
        size=5,
        alpha=0.5,
    )
)

lines2 = (
    hv.Curve(
        data=Trialdurations_food,
        kdims=["TrialNumber"],
        vdims=["duration_s", "food", "rank"],
    )
    .groupby(["food", "rank"])
    .opts(
        color=hv.Palette("RdYlBu"),
        alpha=1,
        line_width=1.5,
        # jitter=0.4,
    )
).overlay("rank")

max_x = Trialdurations_food["TrialNumber"].max()
min_y = Trialdurations_food["duration_s"].min()
min_x = Trialdurations_food["TrialNumber"].min()
max_y = Trialdurations_food["duration_s"].max()

Annotation = hv.Text(max_x - 1, min_y - 100, f" N = {nflies_noFood}")

BoxTrialDuration = (box * lines2 * points).opts(  # * Annotation
    height=1000,
    width=1200,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=3,
)
BoxTrialDuration_Light = (box * points_solo).opts(  # * Annotation
    height=750,
    width=1000,
    framewise=True,
    xlabel="Trial",
    ylabel="Duration (sec)",
    show_grid=True,
    fontscale=1.5,
)
BoxTrialDuration

In [None]:
# Save the plot as a png
hv.save(
    BoxTrialDuration,
    "/mnt/labserver/DURRIEU_Matthias/Pictures/240426_PSRI/TimeBoxplots.png",
    fmt="png",
)

## Fyssen figure panel

In this part I make the learning figure for the fyssen report.

In [None]:
# Make a panel with a) being the plot with the boxplots (called Layout) and b) is the curve with fitted logistic and linear functions, which is called cumulcurve_pool

# Define the panel
Fig = pn.Row(
    pn.Column(
        pn.pane.HTML(
            "<div style='font-size:25px; position:relative; top:10px; left:10px;'><b>A)</b></div>"
        ),
        pn.panel(BoxTrialDuration),
    ),
    pn.Column(
        pn.pane.HTML(
            "<div style='font-size:25px; position:relative; top:10px; left:10px;'><b>B)</b></div>"
        ),
        pn.panel(cumulcurve_pool),
    ),
)

Fig

In [None]:
# Define the panel
Fig_png = pn.Row(
    pn.Column(
        pn.pane.HTML(
            "<div style='font-size:25px; position:relative; top:10px; left:10px;'><b>A)</b></div>"
        ),
        pn.panel(BoxTrialDuration.opts(toolbar=None)),
    ),
    pn.Column(
        pn.pane.HTML(
            "<div style='font-size:25px; position:relative; top:10px; left:10px;'><b>B)</b></div>"
        ),
        pn.panel(cumulcurve_pool.opts(toolbar=None)),
    ),
)

Fig_png.save("/mnt/labserver/DURRIEU_Matthias/Pictures/FyssenReport/Fig3.png")