In [None]:
import sys
import os
from icecream import ic

sys.path.insert(0, "..")
sys.path.insert(0, "../../../Utilities")

sys.path.insert(0, "../../..")

import Ballpushing_utils
import Utils
import Processing
import HoloviewsTemplates

import importlib

import holoviews as hv

hv.extension("bokeh")

# Get the list of experiments

In [None]:
# Get the data path
Datapath = Utils.get_data_path()

# Get all folders with "TNT_Fine" in the name

Folders = [
    f for f in os.listdir(Datapath) if "TNT_Fine" in f and "Tracked" in f and os.path.isdir(Datapath / f)
]

Folders

In [None]:
importlib.reload(Ballpushing_utils)

In [None]:
# Generate Experiment objects from each folder

Experiments = [Ballpushing_utils.Experiment(Datapath / f) for f in Folders]

In [None]:
# Check some flies nicknames

TestFly = Experiments[15].flies[3].nickname

In [None]:
TestFly

In [None]:
savepath = Utils.get_labserver() / "Experimental_data/MultiMazeRecorder/Datasets/240227_TNT_Fine_Experiments.pkl"


In [None]:
Ballpushing_utils.save_object(Experiments, savepath.as_posix())

In [None]:
# Load the experiments from the saved file
Experiments = Ballpushing_utils.load_object(savepath.as_posix())

In [None]:
type(Experiments)

In [None]:
type(Experiments[0])

In [None]:
importlib.reload(Ballpushing_utils)

In [None]:
data = Ballpushing_utils.Dataset(Experiments)

In [None]:
print(data)

In [None]:
# For each fly in the dataset, if they have 2 nicknames, just keep the first one
# for fly in data.flies:
#     if len(fly.nickname) > 1:
#         fly.nickname = fly.nickname[0]

I used the above method as a hack to get rid of a supplementary nickname in PR flies. It is fixed directly in the brain region registry now and doesn't need to be used anymore.

In [None]:
data.generate_dataset("summary")

In [None]:
data.data["Nickname"]

In [None]:
mydata = data.data

In [None]:
noNa = mydata.dropna(subset=["FinalEvent"])

In [None]:
# check for non numeric values in FinalEvent

noNa[noNa["FinalEvent"].apply(lambda x: not isinstance(x, int))]

In [None]:
# Check for duplicate or missing indices

noNa[noNa.index.duplicated()]

In [None]:
noNa = noNa.reset_index(drop=True)

In [None]:
type(mydata["FinalEvent"][0])

In [None]:
vdim = ["FinalEvent"]

In [None]:
for metric in vdim:
    if metric in data.data.columns:
        print(f"{metric} exists in the dataset.")
    else:
        print(f"{metric} does not exist in the dataset.")

In [None]:
# Assuming 'data' is your DataFrame and 'vdim' is your metric
print(f"Data type for {vdim}: {mydata[vdim].dtypes}")

In [None]:
# Find unique data types I have in the FinalEvent column
noNa["FinalEvent"].apply(lambda x: type(x)).unique()

In [None]:
data.jitter_boxplot(data.data, "NumberEvents", show=True, save=False)

In [None]:
# Make a subset of the data that removes None rows from "FinalEvent"

subsetfinal = data.data.dropna(subset=["FinalEvent"])

In [None]:
data.jitter_boxplot(subsetfinal, "FinalEvent", show=False, save=True)

# Troubleshooting the data error

I see that in some dataset loading I get: cannot reindex on an axis with duplicate labels
Current dataset. Let's check these.

First one is : 231222_TNT_Fine_1_Videos_Tracked_arena6_corridor5

In [None]:
# Load the fly that is named "231222_TNT_Fine_1_Videos_Tracked_arena6_corridor5"

# Find which fly in the dataset has the name "231222_TNT_Fine_1_Videos_Tracked_arena6_corridor5"

flyname = "231222_TNT_Fine_1_Videos_Tracked_arena6_corridor5"

fly = [fly for fly in data.flies if fly.name == flyname][0]

In [None]:
# Load the data from the fly
problematicFly = Ballpushing_utils.Dataset(fly)

In [None]:
problematicFly.

In [None]:
problematicFly.generate_dataset("summary")

In [None]:
fly.arena_metadata

In [None]:
problematicFly.flies[0].nickname

# PCA on the data

Here I'll try to do PCA on the data to see if I can get something interesting by reducing the dimensionality of the data, including all the summary metrics.

In [None]:
# Subset the data to only include the label and metrics of interest
subset = data.data[
    [
        "NumberEvents",
        "FinalEvent",
        "FinalTime",
        "SignificantEvents",
        "SignificantFirst",
        "SignificantFirstTime",
        "CumulatedBreaks",
        "Pushes",
        "Pulls",
        "Genotype",
        "Brain region",
    ]
]

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import hvplot.pandas

# Separate the "TNTxZ2018" data from the rest of the data
TNTxZ2018_data = PCA_components[PCA_components["Genotype"] == "TNTxZ2018"]
other_data = PCA_components[PCA_components["Genotype"] != "TNTxZ2018"]

# Generate one plot per Brain region
for brain_region in PCA_components["Brain region"].unique():
    df_brain_region = other_data[other_data["Brain region"] == brain_region]

    # Create separate scatter plots for the "TNTxZ2018" genotype and the other genotypes
    plot1 = df_brain_region.hvplot.scatter(
        x="PC1", y="PC2", by="Genotype", hover_cols=["Genotype"], cmap="nipy_spectral"
    )
    plot2 = TNTxZ2018_data.hvplot.scatter(
        x="PC1",
        y="PC2",
        by="Genotype",
        hover_cols=["Genotype"],
        color="black",
        marker="x",
        size=100,
    )

    # Combine the plots
    final_plot = plot1 * plot2

    # Display the plot
    hvplot.show(final_plot)

In [None]:
subset

In [None]:
import hvplot.pandas

# Create a scatter plot for each brain region and genotype
plot = PCA_components.hvplot.scatter(
    x="PC1",
    y="PC2",
    by="Genotype",
    groupby="Brain region",
    hover_cols=["Genotype"],
    cmap="nipy_spectral",
    dynamic=False,
)

# Adjust the appearance of the "TNTxZ2018" points
plot.opts(hv.opts.Scatter("TNTxZ2018", size=100, marker="x", color="black"))

# Display the plot
hvplot.show(plot)

In [None]:
import hvplot.pandas
import holoviews as hv

# Separate the "TNTxZ2018" data from the rest of the data
TNTxZ2018_data = PCA_components[PCA_components["Genotype"] == "TNTxZ2018"]
other_data = PCA_components[PCA_components["Genotype"] != "TNTxZ2018"]

# Initialize an empty Layout
plots = hv.Layout()

# Generate one plot per Brain region
for brain_region in PCA_components["Brain region"].unique():
    df_brain_region = other_data[other_data["Brain region"] == brain_region]

    # Create separate scatter plots for the "TNTxZ2018" genotype and the other genotypes
    plot1 = df_brain_region.hvplot.scatter(
        x="PC1", y="PC2", by="Genotype", hover_cols=["Genotype"], cmap="nipy_spectral"
    )
    plot2 = TNTxZ2018_data.hvplot.scatter(
        x="PC1",
        y="PC2",
        by="Genotype",
        hover_cols=["Genotype"],
        color="black",
        marker="x",
        size=100,
    )

    # Combine the plots
    final_plot = plot1 * plot2

    # Add the plot to the Layout
    plots += final_plot.relabel(f"PCA - Brain Region: {brain_region}")

# Save the Layout
hvplot.save(plots.cols(1), "/mnt/labserver/DURRIEU_Matthias/Experimental_data/MultiMazeRecorder/Plots/240222_PCA_plots.html")
# Display the Layout
hvplot.show(plots.cols(1))