# Summary

Here we test the tools developped in Utilities/Optobot_utils.py. We then use these tools to analyse Irene's optobot experiments.

# Package imports

In [None]:
import sys
import importlib
from pathlib import Path
import numpy as np
sys.path.insert(0, "../Utilities")

import pickle
import pandas as pd

# sys.path.append("/home/durrieu/Tracking_Analysis/Utilities")
# sys.path.append("/Users/ulric/git/Tracking_Analysis/Utilities")
import Utils
import Optobot_utils

import holoviews as hv
hv.extension('bokeh')

# Import holoviews template
import HoloviewsTemplates

In [None]:
importlib.reload(Optobot_utils)

# Accessing the experiments

We first developp a function to find the experiments in the main directory. We then use a custom class to load the experiments, including metadata and DLC data.

In [None]:
Exps = Optobot_utils.find_experiments(
    Utils.get_labserver() / "Experimental_data" / "Irene_Optobot"
)

#Exps

In [None]:
TestFly = Exps[0]

In [None]:
# Extract the npy file called experiment_dict.npy

# Load the experiment_dict.npy file
exp_dict = np.load(TestFly / "experiment_dict.npy", allow_pickle=True).item()

exp_dict

In [None]:
exp_dict['fps']

In [None]:
exp_dict["fly0"]

In [None]:
# Load the genotype_dict.npy file
gen_dict = np.load(TestFly.parent.parent / "genotype_dict.npy", allow_pickle=True).item()

gen_dict

In [None]:
# Find a .pkl file in the folder
pkl_files = list(TestFly.glob("*.pkl"))

# Get the first .pkl file
pkl_file = pkl_files[0]

In [None]:
# Load the .pkl file
with open(pkl_file, "rb") as input:
    data = pickle.load(input)

In [None]:
# Read the .pkl file
data = pd.read_pickle(pkl_file)

data

In [None]:
# Solve multiindex

data.columns = data.columns.droplevel(0)

data.reset_index(inplace=True)

data

In [None]:
print(data.columns)

In [None]:
# Drop the top level of the multi-index for 'pos_x' and 'pos_y'
data.columns = (
    data.columns.droplevel(0)
    if "pos_x" in data.columns or "pos_y" in data.columns
    else data.columns
)

data

# Testing velocities

In [None]:
importlib.reload(Optobot_utils)

In [None]:
TestFly = Optobot_utils.Fly(TestFly)

In [None]:
TestFly.data

In [None]:
dataset = TestFly.data

In [None]:
TestFly.directory

In [None]:
vel = TestFly.compute_velocity()

In [None]:
vel

In [None]:
# plot the velocity


hv.Curve(vel)

Looks like small movements below 0.1 mm/s could just be tracking noise. Let's use that to classify the frames as moving or not. Also, looking at the video, anything below 0.5 is actually small real movements.

In [None]:
1400/80

# Testing dataframe loading

In [None]:
Exps[16]

In [None]:
TestFly = Optobot_utils.Fly(Exps[16])

In [None]:
len(Exps)

In [None]:
TestFly.data

In [None]:
Exps[0].parent.parent

# Try the whole dataset

In [None]:
data_list = []

for i, exp in enumerate(Exps):
    try:
        # Load the data and add it to a list to be concatenated later
        data = Optobot_utils.Fly(exp).data
        # Add a unique identifier to each DataFrame
        data["id"] = i
        data_list.append(data)
    except:
        print(f"Failed to load {exp}")

In [None]:
data_list[1]

In [None]:
# Check for duplicates
duplicates = any(df.duplicated().any() for df in data_list)
if duplicates:
    print("Duplicate datasets found.")
else:
    print("No duplicate datasets found.")

In [None]:

# Concatenate the DataFrames
dataset = pd.concat(data_list, ignore_index=True)

dataset

In [None]:
try:
    dataset = pd.DataFrame()
    for i, df in enumerate(data_list):
        dataset = pd.concat([dataset, df], ignore_index=True)
except Exception as e:
    print(f"Error occurred while concatenating DataFrame {i}.")
    print(str(e))

In [None]:
data_list[16]

In [None]:
# Inspect the problematic DataFrame
problematic_df = data_list[16]

# Print the first few rows of the DataFrame
print(problematic_df.head())

# Print the index of the DataFrame
print(problematic_df.index)

# Check if the DataFrame has any duplicate indices
if problematic_df.index.duplicated().any():
    print("The DataFrame has duplicate indices.")
else:
    print("The DataFrame does not have duplicate indices.")

# Check if the DataFrame has any duplicate columns
if problematic_df.columns.duplicated().any():
    print("The DataFrame has duplicate columns.")
else:
    print("The DataFrame does not have duplicate columns.")

Testing nbstripout on semihg

In [None]:
print("hello")

# All flies + velocities

In [None]:
data_list = []

for i, exp in enumerate(Exps):
    try:
        # Load the data and add it to a list to be concatenated later
        data = Optobot_utils.Fly(exp).data
        # Add a unique identifier to each DataFrame
        data["id"] = i
        data_list.append(data)
    except Exception as e:
        print(f"Failed to load {exp}")
        #Also print the error
        print(str(e))

# Concatenate the DataFrames
dataset = pd.concat(data_list, ignore_index=True)

dataset

In [None]:
Optobot_utils.Fly(
    Path("/mnt/labserver/DURRIEU_Matthias/Experimental_data/Irene_Optobot/9 days/SynjRQ/SynjRQ-THGal4_m1_9d/221209/193753_s0a0_p6-0")
)

## Detecting resting

The resting would be basically the frames where the velocity is below 0.1 mm/s. Let's try to detect that.

In [None]:
# Create a new column called "resting" that is True if the velocity is less than 0.1 and False otherwise

dataset["resting"] = dataset["velocity"] < 0.1

dataset["moving"] = dataset["velocity"] > 0.5

In [None]:
dataset

During the video there's 10 seconds of light off, 30 seconds of light on, 10 off, 30 on, 10 off, 30 on. Let's add this info to the dataframe. (Done directly in Optobot_Utils)

# Getting some summary statistics

First let's look at average velocities when moving = True grouped by genotype

In [None]:
# Get all unique values of genotype

genotypes = dataset["genotype"].unique()

genotypes

In [None]:
# Replace the genotypes that are the same but spelled differently
# IF_Atg18 and IF-Atg18 are the same genotype
dataset["genotype"] = dataset["genotype"].replace("IF_Atg18", "IF-Atg18")
# IF-THGal4 and IF_THGal4 are the same genotype
dataset["genotype"] = dataset["genotype"].replace("IF_THGal4", "IF-THGal4")
# SYnjRQ and SynjRQ are the same genotype
dataset["genotype"] = dataset["genotype"].replace("SYnjRQ", "SynjRQ")
# SynjRQ and SynjRQ-THGal4 are the same genotype
dataset["genotype"] = dataset["genotype"].replace("SynjRQ-THGal4", "SynjRQ")

# Get all unique values of genotype
genotypes = dataset["genotype"].unique()

genotypes

In [None]:
# Exclude the genotypes that are not of interest, in this case w1118
dataset = dataset[dataset != "w1118"]

In [None]:
# Subset the data to only get moving = True

moving = dataset[dataset["moving"]]

# Get average velocity grouped by genotype and id
Avg_vel = moving.groupby(["genotype", "id"])["velocity"].mean()

In [None]:
# Make a holoviews boxplot of the average velocity grouped by genotype
boxplot = hv.BoxWhisker(Avg_vel, "genotype", "velocity").opts(**HoloviewsTemplates.hv_main["boxwhisker"])

scatter = hv.Scatter(Avg_vel, "genotype", "velocity").opts(**HoloviewsTemplates.hv_main["scatter"])
scatter.opts(color="genotype")

jitterplot = (boxplot * scatter).opts(**HoloviewsTemplates.hv_main["plot"])
jitterplot.opts(ylabel="Average velocity (mm/s)")
jitterplot

Not super conclusive. Let's look at the max velocities.

In [None]:
# Get the max velocity grouped by genotype and id
Max_vel = moving.groupby(["genotype", "id"])["velocity"].max()

# Remake the jitterplot with the max velocity

# Make a holoviews boxplot of the average velocity grouped by genotype
boxplot = hv.BoxWhisker(Max_vel, "genotype", "velocity").opts(
    **HoloviewsTemplates.hv_main["boxwhisker"]
)

scatter = hv.Scatter(Max_vel, "genotype", "velocity").opts(
    **HoloviewsTemplates.hv_main["scatter"]
)
scatter.opts(color="genotype")

jitterplot = (boxplot * scatter).opts(**HoloviewsTemplates.hv_main["plot"])
jitterplot.opts(ylabel="Maximum velocity (mm/s)")


jitterplot

Not super conclusive either. Let's look at the proportion of time spent moving.

In [None]:
# Get the number of rows in the dataset where moving = True, grouped by genotype and id
# Make it a DataFrame
num_moving = moving.groupby(["genotype", "id"]).size().reset_index(name="counts")

# Rename the counts column to "time moving" and divide by 80 to get the time in seconds
num_moving.rename(columns={"counts": "time moving"}, inplace=True)
num_moving["time moving"] = num_moving["time moving"] / 80

num_moving

In [None]:
# Plot
boxplot = hv.BoxWhisker(num_moving, "genotype", "time moving").opts(
    **HoloviewsTemplates.hv_main["boxwhisker"]
)

scatter = hv.Scatter(num_moving, "genotype", "time moving").opts(
    **HoloviewsTemplates.hv_main["scatter"]
)
scatter.opts(color="genotype")

jitterplot = (boxplot * scatter).opts(**HoloviewsTemplates.hv_main["plot"])

jitterplot

In [None]:

# Another method here where we just compute the proportion of time moving instead
# Get the number of rows in the dataset where moving = True, grouped by genotype and id, and divide by the total number of rows grouped by genotype and id

prop_moving = moving.groupby(["genotype", "id"]).size().div(dataset.groupby(["genotype", "id"]).size()).reset_index(name="proportion time moving")
prop_moving

In [None]:
# Plot
boxplot = hv.BoxWhisker(prop_moving, "genotype", "proportion time moving").opts(
    **HoloviewsTemplates.hv_main["boxwhisker"]
)

scatter = hv.Scatter(prop_moving, "genotype", "proportion time moving").opts(
    **HoloviewsTemplates.hv_main["scatter"]
)
scatter.opts(color="genotype")

jitterplot = (boxplot * scatter).opts(**HoloviewsTemplates.hv_main["plot"])

jitterplot