In [None]:
import sys
import os
from icecream import ic

from pathlib import Path

sys.path.insert(0, "..")
sys.path.insert(0, "../../../Utilities")

sys.path.insert(0, "../../..")

import Ballpushing_utils
import Utils
import Processing
import HoloviewsTemplates

import importlib

import holoviews as hv

hv.extension("bokeh")

# Get the list of experiments

In [None]:
# Get the data path
Datapath = Utils.get_data_path()

# Get all folders with "TNT_Fine" in the name

Folders = [
    f for f in os.listdir(Datapath) if "TNT_Fine" in f and "Tracked" in f and os.path.isdir(Datapath / f)
]

Folders

In [None]:
importlib.reload(Ballpushing_utils)

In [None]:
# Generate Experiment objects from each folder

Experiments = [Ballpushing_utils.Experiment(Datapath / f) for f in Folders]

In [None]:
# Check some flies nicknames

TestFly = Experiments[15].flies[3].nickname

In [None]:
TestFly

In [None]:
savepath = Utils.get_labserver() / "Experimental_data/MultiMazeRecorder/Datasets/240306_TNT_Fine_Experiments.pkl"


In [None]:
Ballpushing_utils.save_object(Experiments, savepath.as_posix())

In [None]:
# Load the experiments from the saved file
Experiments = Ballpushing_utils.load_object(savepath.as_posix())

In [None]:
type(Experiments)

In [None]:
type(Experiments[0])

In [None]:
importlib.reload(Ballpushing_utils)

In [None]:
data = Ballpushing_utils.Dataset(Experiments)

In [None]:
print(data)

In [None]:
# For each fly in the dataset, if they have 2 nicknames, just keep the first one
# for fly in data.flies:
#     if len(fly.nickname) > 1:
#         fly.nickname = fly.nickname[0]

I used the above method as a hack to get rid of a supplementary nickname in PR flies. It is fixed directly in the brain region registry now and doesn't need to be used anymore.

In [None]:
data.generate_dataset("summary")

In [None]:
mydata = data.data

# Plotting methods

In [None]:
savepath = Utils.get_labserver() / "Experimental_data/MultiMazeRecorder/Plots/240306_summaries"

In [None]:
importlib.reload(HoloviewsTemplates)

In [None]:
HoloviewsTemplates.jitter_boxplot(data.data, "InsightEffect", show=True, save=False, metadata=data.metadata, bs_controls=True, sort_by="median", hline_method="boxplot", readme=None)

In [None]:
metrics = [
    "NumberEvents",
    "FinalEvent",
    "FinalTime",
    "SignificantEvents",
    "SignificantFirst",
    "SignificantFirstTime",
    "Pushes",
    "Pulls",
    "PushPullRatio",
    "InteractionProportion",
    "AhaMoment",
    "AhaMomentIndex",
    "InsightEffect",
    "TimeToFinish",
    "SignificantRatio",
]

# Loop over the metrics
for metric in metrics:
    # Generate the jitter boxplot for the current metric
    HoloviewsTemplates.jitter_boxplot(
        data.data,
        vdim=metric,
        show=False,
        save=True,
        metadata=data.metadata,
        bs_controls=True,
        sort_by="median",
        hline_method="boxplot",
    )

# PCA on the data

Here I'll try to do PCA on the data to see if I can get something interesting by reducing the dimensionality of the data, including all the summary metrics.

In [None]:
data.data

In [None]:
# Subset the data to remove some of the genotypes. Let's start with the M6 and M7 and PR genotypes, remove these.

subset = data.data[
    ~data.data["Genotype"].isin(["M6", "M7", "PR"])
]

In [None]:
# Subset the data to only include the label and metrics of interest
subset = subset[
    [
        "NumberEvents",
        "FinalEvent",
        "FinalTime",
        "SignificantEvents",
        "SignificantFirst",
        "SignificantFirstTime",
        "PushPullRatio",
        "InteractionProportion",
        "AhaMoment",
        "AhaMomentIndex",
        "InsightEffect",
        "TimeToFinish",
        "SignificantRatio",
        "label",
        "Brain region",
        "fly",
        "Genotype",
    ]
]

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Separate out the features from the labels and brain region
features = subset.drop(["label", "Brain region", "fly", "Genotype"], axis=1)

# Normalize the features
scaler = StandardScaler()

# Remove NaNs
nan_indices = features.dropna().index
features = features.loc[nan_indices].reset_index(drop=True)

features_normalized = scaler.fit_transform(features)

# Perform PCA
pca = PCA(n_components=2)  # Adjust n_components as needed
principalComponents = pca.fit_transform(features_normalized)

# Convert the principal components for each sample to a DataFrame
PCA_components = pd.DataFrame(principalComponents, columns=["PC1", "PC2"])

# Add your labels and brain region to this DataFrame
PCA_components["label"] = subset.loc[nan_indices, "label"].values
PCA_components["Brain region"] = subset.loc[nan_indices, "Brain region"].values
PCA_components["fly"] = subset.loc[nan_indices, "fly"].values
PCA_components["Genotype"] = subset.loc[nan_indices, "Genotype"].values

# PCA summaries

In [None]:
# Print the composition of the principal components
PCs_compo = pd.DataFrame(pca.components_, columns=features.columns, index=["PC1", "PC2"])

# Print the explained variance ratio
print(f"Explained variance of PC1 and PC2 : {pca.explained_variance_ratio_}")

PCs_compo

In [None]:
# Save the composition of the principal components and the explained variance ratio
PCs_compo.to_csv(savepath/"PCs_composition.csv")
pd.DataFrame(pca.explained_variance_ratio_, index=["PC1", "PC2"], columns=["Explained variance"]).to_csv(savepath/"PCA_Explained_variance.csv")

In [None]:
# get all data unique Genotype values

unique_genotypes = subset["Genotype"].unique()

# Check if there is one called "TNTxZ2018"

"TNTxZ2018" in unique_genotypes

In [None]:
num_rows = len(PCA_components[subset["Genotype"] == "TNTxZ2018"])
print(num_rows)

In [None]:
# Find the label associated with Genotype "TNTxZ2018"
TNTxZ2018_label = mydata[mydata["Genotype"] == "TNTxZ2018"]["label"].values[0]

TNTxZ2018_label

In [None]:
import hvplot.pandas
import holoviews as hv

# Separate the "TNTxZ2018" data from the rest of the data
TNTxZ2018_data = PCA_components[PCA_components["label"] == TNTxZ2018_label]
other_data = PCA_components[PCA_components["label"] != TNTxZ2018_label]

# Initialize an empty Layout
plots = hv.Layout()

# Generate one plot per Brain region
for brain_region in PCA_components["Brain region"].unique():
    df_brain_region = other_data[other_data["Brain region"] == brain_region]

    # Create separate scatter plots for the "TNTxZ2018" genotype and the other genotypes
    plot1 = df_brain_region.hvplot.scatter(
        x="PC1", y="PC2", by="label", hover_cols=["fly"], cmap="nipy_spectral"
    )
    plot2 = TNTxZ2018_data.hvplot.scatter(
        x="PC1",
        y="PC2",
        by="label",
        hover_cols=["fly"],
        color="black",
        marker="x",
        size=100,
    )

    # Combine the plots
    final_plot = (plot1 * plot2).opts(width=1000, height = 750)

    # Add the plot to the Layout
    plots += final_plot.relabel(f"PCA - Brain Region: {brain_region}")

# Save the Layout
hvplot.save(plots.cols(1), savepath/"240306_PCA_plots.html")
# Display the Layout
hvplot.show(plots.cols(1))

# Plotting the PCs separately

In [None]:
PCA_components

In [None]:
importlib.reload(HoloviewsTemplates)

In [None]:
# Plot PC1 and PC2 as jitterboxplots
HoloviewsTemplates.jitter_boxplot(
    PCA_components,
    "PC1",
    show=True,
    save=True,
    metadata=[],
    bs_controls=True,
    sort_by="median",
    hline_method="boxplot",
    readme=None,
)

In [None]:
# Same with PC2
HoloviewsTemplates.jitter_boxplot(
    PCA_components,
    "PC2",
    show=True,
    save=True,
    metadata=[],
    bs_controls=True,
    sort_by="median",
    hline_method="boxplot",
    readme=None,
)

In [None]:
# TODO : Find flies with particular pulling