This notebook will regroup analysis performed on full body tracking data related to the TNT screen

In [None]:
from utils_behavior import Sleap_utils
from utils_behavior import HoloviewsTemplates
from utils_behavior import HoloviewsPlots
from utils_behavior import Utils
from utils_behavior import Processing

from utils_behavior import Ballpushing_utils
from utils_behavior import Seaborn_Templates

import importlib

from pathlib import Path

import json

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
import seaborn as sns
from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui



#import lux

import pandas as pd

#lux.config.set_executor_type("Pandas")

import numpy as np
import h5py
import re

> Note that my dataclasses are too heavy (~ 2 Gb each) to be easily computed directly in the notebook. That is why a script called "DataSetBuilder.py" was used to generate a dataset based on TNT data

# Summaries

In [None]:
Summary_data = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/241217_FinalEventCutoffData_norm/summary/241209_Pooled_summary.feather")

In [None]:
Summary_data.columns

In [None]:
Summary_data=Summary_data[~Summary_data["Genotype"].isin(["M6", "M7", "PR", "CS"])]

In [None]:
control_genotypes = [
    "TNTxZ2035",
    "TNTxZ2018",
    "TNTxM7",
]  # Replace with your list of genotypes

In [None]:
# List of metrics

metrics = ['nb_events', 'max_event', 'max_event_time', 'max_distance',
       'final_event', 'final_event_time', 'nb_significant_events',
       'significant_ratio', 'first_significant_event',
       'first_significant_event_time', 'aha_moment', 'aha_moment_time',
       'insight_effect', 'cumulated_breaks_duration', 'pushed', 'pulled',
       'pulling_ratio', 'success_direction', 'interaction_proportion',
       'distance_moved']

In [None]:
importlib.reload(HoloviewsTemplates)

importlib.reload(HoloviewsPlots)

In [None]:
plot = HoloviewsPlots.jitter_boxplot(data=Summary_data, metric="nb_events", kdims="Nickname",groupby="Brain region", render="grouped", control=control_genotypes)

plot

In [None]:
# Plot each of them for 

summary_plots = []

for metric in metrics:
    plot = Seaborn_Templates.sns_plot(Summary_data, metric, "Nickname", "jitterboxplot", group_by="Brain region" )
    
    summary_plots.append(plot)

In [None]:
summary_plots[0]

# Analysis of ball derivative for contact indices

In [None]:
# Load the dataset

TNT_Data = pd.read_csv("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/Experiments/241204_Pooled.csv")

In [None]:
TNT_Data = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/241218_Transformed_contact_data_derivative.feather")

In [None]:
TNT_Data.head()

In [None]:
# Make a new column "Aha_moment" that is True if the average ball displacement grouped by Nickname is at any time greater than 0.2
# Compute the mean ball displacement for each contact index within each group
mean_displacement = TNT_Data.groupby(['Nickname', 'contact_index'])['ball_displacement'].mean().reset_index()

# Check if the mean ball displacement exceeds 0.2 at any contact index for each group
aha_moment = mean_displacement.groupby('Nickname')['ball_displacement'].transform(lambda x: (x > 0.2).any())

# Create a dictionary to map Nickname to Aha_Moment
aha_moment_dict = aha_moment.groupby(mean_displacement['Nickname']).first().to_dict()

# Create the Aha_Moment column
TNT_Data['Aha_Moment'] = TNT_Data['Nickname'].map(aha_moment_dict)

# Display the DataFrame
TNT_Data.head()

In [None]:
TNT_Data.head()

In [None]:
TNT_Learn = TNT_Data[TNT_Data["Aha_Moment"]==True]

TNT_Learn.head()

In [None]:
peaks = TNT_Learn [TNT_Learn["ball_displacement"]>0.2]

In [None]:
# Plot the peaks using Seaborn
plt.figure(figsize=(10, 6))
sns.scatterplot(data=peaks, x='contact_index', y='ball_displacement', hue='Nickname')

plt.xlabel('Contact Index')
plt.ylabel('Ball Displacement')
plt.title('Positions of Peaks > 0.2 of Ball Displacement')
plt.show()

In [None]:
#  Compute the first contact index where ball displacement is greater than 0.2 for each fly
first_peak = peaks.groupby('fly').apply(lambda x: x.loc[x['contact_index'].idxmin()]).reset_index(drop=True)

# Display the first peak for each fly

first_peak.head()


In [None]:
# Generate a sorted dataset based on median value grouped by Nickname

sorted_data = first_peak.groupby('Nickname')['ball_displacement'].median().sort_values().index

# Reorder the dataset based on the sorted Nicknames

first_peak_sorted = first_peak.set_index('Nickname').loc[sorted_data].reset_index()

In [None]:

# Plot the first peak for each fly using Seaborn

plt.figure(figsize=(10, 6))

sns.boxplot(data=first_peak_sorted, x='contact_index', y='Nickname', hue="Brain region")

plt.xlabel('Contact Index')

plt.ylabel('Count')

plt.title('First Peak of Ball Displacement > 0.2')

plt.show()

In [None]:
importlib.reload(Seaborn_Templates)

In [None]:
Seaborn_Templates.sns_plot(plot_type="jitterboxplot", data=first_peak, sort_by="median", metric = "contact_index", kdims= "Nickname",colorby="Brain region")

In [None]:
# Redo with log scale

plt.figure(figsize=(10, 6))

sns.histplot(data=first_peak, x='contact_index', bins=range(0, 100, 1), kde=True, hue='Nickname')

plt.xlabel('Contact Index')

plt.ylabel('Count')

plt.title('First Peak of Ball Displacement > 0.2')

plt.xscale('log')

plt.show()

# Working with transformed data

In [None]:
Transformed = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/241218_Transformed_contact_data_derivative.feather")

In [None]:
Transformed.head()

In [None]:
Transformed.columns

In [None]:
# Get the distribution of y parameters for the ball

# Get all columns with "y_centre_preprocessed" in the name

y_columns = [col for col in Transformed.columns if 'y_centre_preprocessed' in col]

y_columns

In [None]:
# Assuming 'Transformed' is your DataFrame and 'Brain region' is the column to subset by
control_region = 'Control'  # Define the Control Brain region
brain_regions = Transformed['Brain region'].unique()
brain_regions = [region for region in brain_regions if region != control_region]

In [None]:
# Define the number of rows and columns for the grid
n_cols = 3
n_rows = (len(brain_regions) + n_cols - 1) // n_cols

# Create a figure and axes for the grid of plots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(30, 10 * n_rows))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Loop through each Brain region and create a subplot
for i, region in enumerate(brain_regions):
    # Subset the data for the Control Brain region and the current Brain region
    subset_data = Transformed[Transformed['Brain region'].isin([control_region, region])]
    
    # Plot the distribution
    sns.histplot(data=subset_data, x='y_centre_preprocessed_disp_mean', kde=True, hue='Nickname', ax=axes[i], bins=range(-3, 3, 1))
    
    # Set the title for the subplot
    axes[i].set_title(f'{control_region} + {region}')
    axes[i].set_xlabel('y_centre_preprocessed_disp_mean')
    axes[i].set_ylabel('Count')
    
    axes[i].set_xlim(-3, 3)  # Adjust the limits as needed

# Remove any unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

# Include contacts timing

In [None]:
Transformed['y_centre_preprocessed_disp_mean_abs'] = Transformed['y_centre_preprocessed_disp_mean'].abs()

# Calculate the global minimum and maximum values of the absolute y_centre_preprocessed_disp_mean
global_min = Transformed['y_centre_preprocessed_disp_mean_abs'].min()
global_max = Transformed['y_centre_preprocessed_disp_mean_abs'].max()

In [None]:
# Assuming 'data' is your DataFrame
brain_regions = Transformed['Brain region'].unique()

In [None]:
# Sort the data by Nickname within each brain region
Transformed = Transformed.sort_values(by=['Brain region', 'Nickname'])

In [None]:
# Get unique brain regions
brain_regions = Transformed['Brain region'].unique()

# Create log-normalized colormap
metric = 'y_centre_preprocessed_disp_mean_abs'
log_values = np.log1p(Transformed[metric])
global_min = log_values.min()
global_max = log_values.max()

for brain_region in brain_regions:
    subset_data = Transformed[Transformed['Brain region'] == brain_region]
    unique_combinations = subset_data[['Nickname', 'fly']].drop_duplicates()
    
    # Create figure with minimal height per fly
    fig, ax = plt.subplots(figsize=(15, max(4, len(unique_combinations) * 0.2)))
    
    # Create logarithmic colormap
    cmap = plt.get_cmap('viridis')
    norm = mcolors.Normalize(vmin=global_min, vmax=global_max)
    
    # Plot events with minimal spacing
    for i, (nickname, fly) in enumerate(unique_combinations.itertuples(index=False)):
        fly_data = subset_data[(subset_data['Nickname'] == nickname) & 
                             (subset_data['fly'] == fly)]
        for _, row in fly_data.iterrows():
            log_value = np.log1p(row[metric])
            ax.plot([row['start'], row['end']], [i, i], 
                   color=cmap(norm(log_value)), 
                   lw=4,  # Thicker lines for better visibility
                   solid_capstyle='butt')
    
    # Compact y-axis with minimal spacing
    ax.set_yticks(np.arange(len(unique_combinations)))
    ax.set_yticklabels([f"{nickname}-{fly}" for nickname, fly in 
                        unique_combinations.itertuples(index=False)],
                       fontsize=6)  # Even smaller font
    
    # Remove extra whitespace
    ax.margins(y=0.01)  # Minimal margins
    ax.grid(False)
    ax.set_xlabel('Time')
    ax.set_ylabel('Nickname-Fly')
    ax.set_title(f'Raster Plot for {brain_region}')
    
    # Add colorbar
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    cbar = plt.colorbar(sm, ax=ax)
    cbar.set_label(f'log({metric} + 1)')
    
    plt.tight_layout()
    plt.show()

In [None]:
for brain_region in brain_regions:
    subset_data = Transformed[Transformed['Brain region'] == brain_region]
    
    # Create a matrix representation
    # First, get unique nicknames and time points
    nicknames = subset_data['Nickname'].unique()
    time_points = np.arange(subset_data['start'].min(), subset_data['end'].max(), 0.1)  # Adjust step size as needed
    
    # Create empty matrix
    matrix = np.zeros((len(nicknames), len(time_points)))
    matrix_values = np.full_like(matrix, np.nan)
    
    # Fill matrix
    for i, nickname in enumerate(nicknames):
        nick_data = subset_data[subset_data['Nickname'] == nickname]
        for _, row in nick_data.iterrows():
            start_idx = int((row['start'] - time_points[0]) / 0.1)
            end_idx = int((row['end'] - time_points[0]) / 0.1)
            if start_idx < len(time_points) and end_idx < len(time_points):
                matrix_values[i, start_idx:end_idx] = np.log1p(row[metric])
    
    # Create figure
    plt.figure(figsize=(15, max(4, len(nicknames) * 0.3)))
    
    # Plot heatmap
    sns.heatmap(matrix_values,
                cmap='viridis',
                yticklabels=nicknames,
                xticklabels=np.round(time_points[::100], 1),  # Show fewer x-ticks
                cbar_kws={'label': f'log({metric} + 1)'},
                mask=np.isnan(matrix_values))
    
    plt.title(f'Activity Heatmap for {brain_region}')
    plt.xlabel('Time')
    plt.ylabel('Nickname')
    plt.tight_layout()
    plt.show()

In [None]:
for brain_region in brain_regions:
    subset_data = Transformed[Transformed['Brain region'] == brain_region]
    
    # Create a matrix representation
    # First, get unique nicknames and time points
    nicknames = subset_data['Nickname'].unique()
    time_points = np.arange(subset_data['start'].min(), subset_data['end'].max(), 0.1)  # Adjust step size as needed
    
    # Create empty matrix
    matrix = np.zeros((len(nicknames), len(time_points)))
    matrix_values = np.full_like(matrix, np.nan)
    
    # Fill matrix
    for i, nickname in enumerate(nicknames):
        nick_data = subset_data[subset_data['Nickname'] == nickname]
        for _, row in nick_data.iterrows():
            start_idx = int((row['start'] - time_points[0]) / 0.1)
            end_idx = int((row['end'] - time_points[0]) / 0.1)
            if start_idx < len(time_points) and end_idx < len(time_points):
                matrix_values[i, start_idx:end_idx] = np.log1p(row[metric])
    
    # Create figure
    plt.figure(figsize=(15, max(4, len(nicknames) * 0.3)))
    
    # Plot heatmap
    sns.heatmap(matrix_values,
                cmap='viridis',
                yticklabels=nicknames,
                xticklabels=np.round(time_points[::100], 1),  # Show fewer x-ticks
                cbar_kws={'label': f'log({metric} + 1)'},
                mask=np.isnan(matrix_values))
    
    plt.title(f'Activity Heatmap for {brain_region}')
    plt.xlabel('Time')
    plt.ylabel('Nickname')
    plt.tight_layout()
    plt.show()

In [None]:
for brain_region in brain_regions:
    subset_data = Transformed[Transformed['Brain region'] == brain_region]
    
    # Get unique flies sorted by nickname
    flies_df = subset_data[['Nickname', 'fly']].drop_duplicates()
    flies_df = flies_df.sort_values(['Nickname', 'fly'])
    fly_labels = [f"{row.Nickname}-{row.fly}" for idx, row in flies_df.iterrows()]
    
    # Create time points array
    time_points = np.arange(subset_data['start'].min(), subset_data['end'].max(), 0.1)
    
    # Create empty matrix
    matrix = np.zeros((len(fly_labels), len(time_points)))
    matrix_values = np.full_like(matrix, np.nan)
    
    # Fill matrix
    for i, (_, fly_row) in enumerate(flies_df.iterrows()):
        fly_data = subset_data[
            (subset_data['Nickname'] == fly_row['Nickname']) & 
            (subset_data['fly'] == fly_row['fly'])
        ]
        for _, row in fly_data.iterrows():
            start_idx = int((row['start'] - time_points[0]) / 0.1)
            end_idx = int((row['end'] - time_points[0]) / 0.1)
            if start_idx < len(time_points) and end_idx < len(time_points):
                matrix_values[i, start_idx:end_idx] = np.log1p(row[metric])
    
    # Create figure
    plt.figure(figsize=(15, max(4, len(fly_labels) * 0.3)))
    
    # Plot heatmap
    sns.heatmap(matrix_values,
                cmap='viridis',
                yticklabels=fly_labels,
                xticklabels=np.round(time_points[::100], 1),
                cbar_kws={'label': f'log({metric} + 1)'},
                mask=np.isnan(matrix_values))
    
    plt.title(f'Activity Heatmap for {brain_region}')
    plt.xlabel('Time')
    plt.ylabel('Fly')
    plt.tight_layout()
    plt.show()

# Contact TSNE

In [None]:
TSNE_Data = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/TSNE/241210_behavior_map_tsne.feather")

In [None]:
TSNE_Data.head()

In [None]:
# Plot the t-SNE data using Seaborn

plt.figure(figsize=(10, 6))

sns.scatterplot(data=TSNE_Data, x='t-SNE Component 1', y='t-SNE Component 2', hue="Brain region")

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Plot of Behavior Map')

plt.show()



In [None]:
# Make a subset with only the "Brain region" == "Control" and plot it side by side with the "Brain region" != "Control"

control_data = TSNE_Data[TSNE_Data['Brain region'] == 'Control']

experimental_data = TSNE_Data[TSNE_Data['Brain region'] != 'Control']

# Plot the t-SNE data using Seaborn

plt.figure(figsize=(10, 6))

sns.scatterplot(data=control_data, x='t-SNE Component 1', y='t-SNE Component 2', hue="Brain region")

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Plot of Behavior Map - Control')

plt.show()

plt.figure(figsize=(10, 6))

sns.scatterplot(data=experimental_data, x='t-SNE Component 1', y='t-SNE Component 2', hue="Brain region")

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Plot of Behavior Map - Experimental')

plt.show()

In [None]:
# Make a density plot of the t-SNE data

plt.figure(figsize=(10, 6))

sns.kdeplot(data=TSNE_Data, x='t-SNE Component 1', y='t-SNE Component 2', fill=True)

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Density Plot of Behavior Map')

plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors

# Extract t-SNE components
x = TSNE_Data['t-SNE Component 1']
y = TSNE_Data['t-SNE Component 2']

# Compute the KDE
kde = gaussian_kde([x, y], bw_method=0.05)

# Evaluate the KDE on a grid
xmin, xmax = x.min(), x.max()
ymin, ymax = y.min(), y.max()
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
kde_values = kde(positions).reshape(xx.shape)

In [None]:
# Plot the KDE and extract contour levels
plt.figure(figsize=(10, 8))
contour = plt.contourf(xx, yy, kde_values, levels=20, cmap="Blues")
plt.colorbar(label="Density")
plt.title("KDE Contour Plot")
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.show()

In [None]:
# Extract the top 5 density levels (darkest shades)
top_levels = contour.levels[-12:]  # Adjust number of levels if needed
threshold = top_levels[0]  # Use the lowest value of the top 5 levels as threshold

In [None]:
# Plot the area of contour levels above the threshold

plt.figure(figsize=(10, 8))

plt.contourf(xx, yy, kde_values, levels=top_levels, cmap="Blues")

plt.colorbar(label="Density")

plt.title("KDE Contour Plot (Top 5 Levels)")

plt.xlabel("t-SNE Component 1")

plt.ylabel("t-SNE Component 2")

plt.show()

In [None]:
# Plot the TSNE data with contour around the top 5 density levels

plt.figure(figsize=(10, 6))

sns.scatterplot(data=TSNE_Data, x='t-SNE Component 1', y='t-SNE Component 2', alpha=0.5)

plt.contour(xx, yy, kde_values, levels=top_levels, colors='r')

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Plot of Behavior Map with Density Contour')

plt.show()

In [None]:
# Redo but get only the lowest contour level among the selected levels

plt.figure(figsize=(10, 6))

sns.scatterplot(data=TSNE_Data, x='t-SNE Component 1', y='t-SNE Component 2', alpha=0.5)

plt.contour(xx, yy, kde_values, levels=[threshold], colors='r')

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Plot of Behavior Map with Density Contour')

plt.show()

In [None]:
# Make a list of the distinct areas of the contour plot

contour_areas = []

for i in range(len(top_levels) - 1):
    
    # Get the area of the contour between the current and next levels
    area = np.sum(kde_values * (kde_values >= top_levels[i]) * (kde_values < top_levels[i + 1]))
    
    contour_areas.append(area)
    
# Display the areas of the contour plot

contour_areas

# Find the indices of the points that are within the contour area

points_in_contour = np.where(kde_values >= threshold)

# Extract the x and y coordinates of the points within the contour area

x_in_contour = xx[points_in_contour]

y_in_contour = yy[points_in_contour]

# Plot the points within the contour area

plt.figure(figsize=(10, 6))

plt.scatter(x_in_contour, y_in_contour, alpha=0.5)

plt.contour(xx, yy, kde_values, levels=[threshold], colors='r')

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('Points within the Density Contour')

plt.show()

In [None]:
x_in_contour




In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
from scipy.ndimage import label

# Create a binary mask for high-density areas
high_density_mask = kde_values >= threshold

# Label connected components in the binary mask
labeled_array, num_features = label(high_density_mask)

# Create subsets for each distinct area
subsets = []
for i in range(1, num_features + 1):
    area_mask = labeled_array == i
    x_in_area = xx[area_mask]
    y_in_area = yy[area_mask]
    
    # Find points from the original dataset that fall within this area
    points_in_area = TSNE_Data[
        (TSNE_Data['t-SNE Component 1'] >= x_in_area.min()) &
        (TSNE_Data['t-SNE Component 1'] <= x_in_area.max()) &
        (TSNE_Data['t-SNE Component 2'] >= y_in_area.min()) &
        (TSNE_Data['t-SNE Component 2'] <= y_in_area.max())
    ]
    
    # If the area is not empty, save it to a subset list and a file
    
    if len(points_in_area) > 0:
        points_in_area.to_feather(f"/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/Cluster_data/241210_behavior_map_tsne_area_{i}.feather")
        subsets.append(points_in_area)
    
    

# Plot each distinct area with a different color
plt.figure(figsize=(12, 10))
plt.contourf(xx, yy, kde_values, levels=20, cmap="Blues", alpha=0.3)

colors = plt.cm.rainbow(np.linspace(0, 1, num_features))
for i, subset in enumerate(subsets):
    plt.scatter(subset['t-SNE Component 1'], subset['t-SNE Component 2'], 
                color=colors[i], label=f'Area {i+1}', alpha=0.6)

plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title('Distinct High-Density Areas in t-SNE Plot')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.tight_layout()
plt.show()

# Print information about each subset
for i, subset in enumerate(subsets):
    print(f"Area {i+1}: {len(subset)} points")

In [None]:
# Load the source data 

source_data = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/241209_ContactData/241209_Pooled_contact_data.feather")

source_data.head()

In [None]:
# Subset the data based on the threshold

dense_data = TSNE_Data[kde([x, y]) > threshold]

# Plot the dense data

plt.figure(figsize=(10, 6))

sns.scatterplot(data=dense_data, x='t-SNE Component 1', y='t-SNE Component 2', hue="Brain region")

plt.xlabel('t-SNE1')

plt.ylabel('t-SNE2')

plt.title('t-SNE Plot of Dense Data')

plt.show()

In [None]:
# Identify high-density regions based on threshold
high_density_mask = kde_values > threshold
high_density_coords = positions[:, high_density_mask.ravel()].T

# Fine-tune DBSCAN parameters with a grid search
best_eps = None
best_min_samples = None
max_clusters = 0

eps_values = np.arange(1.0, 20.0, 1.0)  # Adjust step size as needed
min_samples_values = range(3, 10)

for eps in eps_values:
    for min_samples in min_samples_values:
        dbscan = DBSCAN(eps=eps, min_samples=min_samples)
        clusters = dbscan.fit_predict(high_density_coords)
        n_clusters = len(set(clusters)) - (1 if -1 in clusters else 0)  # Exclude noise

        if n_clusters > max_clusters:
            max_clusters = n_clusters
            best_eps = eps
            best_min_samples = min_samples

# Apply DBSCAN with the best parameters found
dbscan = DBSCAN(eps=best_eps, min_samples=best_min_samples)
clusters = dbscan.fit_predict(high_density_coords)

# Assign clusters to original data points using nearest neighbors
nn = NearestNeighbors(n_neighbors=1)
nn.fit(high_density_coords)

distances, indices = nn.kneighbors(np.column_stack([x, y]))
TSNE_Data['cluster'] = -1  # Initialize with -1 (noise)
for i, dist in enumerate(distances):
    if dist[0] < best_eps:  # Use best_eps as distance threshold
        TSNE_Data.at[i, 'cluster'] = clusters[indices[i][0]]


In [None]:
# Make a grid plot with each subplot being one brain region

g = sns.FacetGrid(TSNE_Data, col='Brain region', col_wrap=3, height=4)

g.map(sns.scatterplot, 't-SNE Component 1', 't-SNE Component 2')

g.set_axis_labels('t-SNE1', 't-SNE2')

g.set_titles('Brain Region: {col_name}')

plt.show()

In [None]:
# Do the same but with density plots

g = sns.FacetGrid(TSNE_Data, col='Brain region', col_wrap=3, height=4)

g.map(sns.kdeplot, 't-SNE Component 1', 't-SNE Component 2', fill=True)

g.set_axis_labels('t-SNE1', 't-SNE2')

g.set_titles('Brain Region: {col_name}')

plt.show()

In [None]:
TSNE_Data

In [None]:
source_folder = Path("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/TSNE")

In [None]:
# Get all feather files in the source folder
datasets = list(source_folder.glob("*.feather"))

# Define the number of rows and columns for the grid
n_cols = 3
n_rows = (len(datasets) + n_cols - 1) // n_cols

# Create a figure and axes for the grid of plots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Loop through each dataset and create a subplot
for i, dataset_path in enumerate(datasets):
    # Load the dataset
    TSNE_Data = pd.read_feather(dataset_path)
    
    # Create a scatter plot for the t-SNE data
    sns.scatterplot(data=TSNE_Data, x='t-SNE Component 1', y='t-SNE Component 2', ax=axes[i])
    
    # Set the title for the subplot
    axes[i].set_title(dataset_path.stem)
    axes[i].set_xlabel('t-SNE1')
    axes[i].set_ylabel('t-SNE2')

# Remove any unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Load the source PCA
PCA_results = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/PCA/241210_pca_data_transformed_New.feather")


In [None]:
PCA_results.head()

In [None]:
# Plot the 2 first components of the PCA
plt.figure(figsize=(10, 6))

sns.scatterplot(data=PCA_results, x='PCA Component 1', y='PCA Component 2', hue="Brain region")

plt.xlabel('PC1')

plt.ylabel('PC2')

plt.title('PCA Plot of Behavior Map')

plt.show()

In [None]:
 PCA_results.columns