In [1]:
%config IPCompleter.use_jedi = False
%pdb off
%load_ext autoreload
%autoreload 3

# required to enable non-blocking interaction:
%gui qt5

import sys
from copy import deepcopy
from typing import Dict, List, Tuple, Optional, Callable, Union, Any
from typing_extensions import TypeAlias
from neuropy.utils.result_context import IdentifyingContext
from nptyping import NDArray
import neuropy.utils.type_aliases as types
from collections import defaultdict
from functools import partial

import numpy as np
import pandas as pd
import re
from pathlib import Path
from datetime import datetime

from neuropy.utils.indexing_helpers import PandasHelpers
from neuropy.core.user_annotations import UserAnnotationsManager
from pyphocorehelpers.indexing_helpers import partition_df
# Set the maximum number of columns to display
pd.set_option('display.max_columns', 100)

import IPython
from pyphocorehelpers.programming_helpers import IPythonHelpers
from pyphocorehelpers.notebook_helpers import NotebookCellExecutionLogger
from pyphocorehelpers.assertion_helpers import Assert

# Jupyter-lab enable printing for any line on its own (instead of just the last one in the cell)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

_notebook_path:Path = Path(IPythonHelpers.try_find_notebook_filepath(IPython.extract_module_locals())).resolve() # Finds the path of THIS notebook

# Plotting
# import pylustrator # customization of figures
import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
_bak_rcParams = mpl.rcParams.copy()

matplotlib.use('Qt5Agg')

# Switch to the desired interactivity mode
plt.interactive(True)

import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
template: str = 'plotly_dark' # set plotl template
pio.templates.default = template
from pyphocorehelpers.plotting.media_output_helpers import fig_to_clipboard
from pyphocorehelpers.Filesystem.path_helpers import file_uri_from_path, sanitize_filename_for_Windows
from pyphocorehelpers.gui.Jupyter.simple_widgets import fullwidth_path_widget, simple_path_display_widget
from pyphoplacecellanalysis.Pho2D.plotly.Extensions.plotly_helpers import plotly_helper_save_figures, _helper_build_figure, plotly_pre_post_delta_scatter, plot_across_sessions_scatter_results
from pyphocorehelpers.assertion_helpers import Assert

# from ..PendingNotebookCode import plot_across_sessions_scatter_results, plot_histograms, plot_stacked_histograms
from pyphocorehelpers.Filesystem.path_helpers import find_first_extant_path
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import find_csv_files, find_HDF5_files, find_most_recent_files
from pyphoplacecellanalysis.Pho2D.statistics_plotting_helpers import plot_histograms_across_sessions, plot_histograms, plot_stacked_histograms

from pyphoplacecellanalysis.General.Pipeline.Stages.ComputationFunctions.MultiContextComputationFunctions.DirectionalPlacefieldGlobalComputationFunctions import DecoderDecodedEpochsResult
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import load_across_sessions_exported_files, _process_and_load_exported_file, _common_cleanup_operations

from pyphocorehelpers.programming_helpers import metadata_attributes
from pyphocorehelpers.function_helpers import function_attributes

from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import build_session_t_delta, _new_process_csv_files
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import DataFrameFilter
from pyphocorehelpers.plotting.media_output_helpers import figure_to_pil_image, vertical_image_stack, horizontal_image_stack, image_grid

debug_print: bool = False
enable_neptune: bool = False

_TODAY_DAY_ONLY_DATE: str = "2024-11-15"
TODAY_DAY_DATE: str = f"{_TODAY_DAY_ONLY_DATE}_Apogee"
# TODAY_DAY_DATE: str = f"{_TODAY_DAY_ONLY_DATE}_GL"
# TODAY_DAY_DATE: str = f"{_TODAY_DAY_ONLY_DATE}_Lab"
# TODAY_DAY_DATE: str = f"{_TODAY_DAY_ONLY_DATE}_rMBP"

print(f'TODAY_DAY_DATE: {TODAY_DAY_DATE}')

types.session_str: TypeAlias = str # a unique session identifier

if enable_neptune:
    import neptune # for logging progress and results
    from neptune.types import File
    from pyphoplacecellanalysis.General.Batch.NeptuneAiHelpers import Neptuner, AutoValueConvertingNeptuneRun, set_environment_variables 

    ## Gets the notebook filepath for Neptune:
    import IPython
    from pyphocorehelpers.programming_helpers import IPythonHelpers
    notebook_filepath: str = IPythonHelpers.try_find_notebook_filepath(IPython.extract_module_locals())
    assert Path(notebook_filepath).resolve().exists(), f"found notebook filepath: '{notebook_filepath}' does not exist"
    # notebook_filepath

    neptune_kwargs = {'project':"commander.pho/PhoDibaLongShortAcrossSessions",
    'api_token':"eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxOGIxODU2My1lZTNhLTQ2ZWMtOTkzNS02ZTRmNzM5YmNjNjIifQ=="}
            
    neptuner = Neptuner(project_name=neptune_kwargs['project'], api_token=neptune_kwargs['api_token'])


    if neptuner.run is None:
        neptuner.run = AutoValueConvertingNeptuneRun(project=neptuner.project_name, api_token=neptuner.api_token, dependencies="infer", source_files=[notebook_filepath])
        params = {"TODAY_DAY_DATE": TODAY_DAY_DATE, "run_workstation": "Apogee"}
        neptuner.run["parameters"] = params
        neptuner.outputs = neptuner.run['outputs']
        neptuner.figures = neptuner.outputs['figures']

    neptuner_run: AutoValueConvertingNeptuneRun = neptuner.run
    
    # run = neptune.init_run(source_files=["**/*.dvc"])

    # # Pre-execution dataframe view:
    # run["dataset/global_batch_run_progress_df"].upload(File.as_html(global_batch_run.to_dataframe(expand_context=True, good_only=False))) # "path/to/test_preds.csv"

else:
    # no neptune:
    neptuner = None    
    neptuner_run = None



known_bad_sessions: List[IdentifyingContext] = UserAnnotationsManager.get_hardcoded_bad_sessions()
# bad_session_df: pd.DataFrame = pd.DataFrame.from_records([v.to_dict() for v in known_bad_sessions], columns=['format_name', 'animal', 'exper_name', 'session_name'])
# bad_session_df

known_bad_session_strs = [str(v.get_description()) for v in known_bad_sessions]
# known_bad_session_strs

Automatic pdb calling has been turned OFF
TODAY_DAY_DATE: 2024-11-15_Apogee


# Graphics Imports

In [2]:
import matplotlib
from neuropy.core.user_annotations import UserAnnotationsManager
from pyphocorehelpers.DataStructure.RenderPlots.MatplotLibRenderPlots import MatplotlibRenderPlots
from neuropy.utils.matplotlib_helpers import matplotlib_configuration_update
# matplotlib.use('Qt5Agg')
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import AcrossSessionsResults, AcrossSessionTables, AcrossSessionsVisualizations
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import PaperFigureTwo

def _save_matplotlib_fig(matplotlib_output_container: MatplotlibRenderPlots):
    """ captures: neptuner_run
    
    """
    a_fig_context = matplotlib_output_container.context
    assert len(matplotlib_output_container.saved_figures) == 1
    a_saved_fig_path = matplotlib_output_container.saved_figures[0][0]
    assert a_saved_fig_path.exists()

    if neptuner_run is not None:
        a_full_figure_path_key: str = a_fig_context.get_description(separator='/', include_property_names=True, key_value_separator=':') # .replace(' ', '_')
        # a_full_figure_path_key: str = a_fig_context.get_description(separator=':', include_property_names=True, key_value_separator='|')
        print(f'a_full_figure_path_key: "{a_full_figure_path_key}"')
        # neptuner_run['outputs']['figures'][f"{a_full_figure_path_key}"].upload(a_fig)
        neptuner_run['outputs']['figures'][f"{a_full_figure_path_key}"].upload(a_saved_fig_path.as_posix())
        # neptuner.figures[f"{a_full_figure_path_key}"].upload(a_fig)
        

# %matplotlib inline


# with matplotlib_configuration_update(is_interactive=False, backend='nbAgg'):
# 'collected_outputs/across_session_result_long_short_recomputed_inst_firing_rate_2024-06-11_GL.pkl'

In [3]:
import plotly.io as pio
from pyphoplacecellanalysis.Pho2D.plotly.plotly_templates import PlotlyHelpers
from pyphoplacecellanalysis.Pho2D.statistics_plotting_helpers import plot_histograms_across_sessions, plot_stacked_histograms
from pyphoplacecellanalysis.Pho2D.plotly.Extensions.plotly_helpers import plotly_helper_save_figures, _helper_build_figure, plotly_pre_post_delta_scatter, plot_across_sessions_scatter_results, add_copy_save_action_buttons

# fig_size_kwargs = {'width': 1650, 'height': 480}
resolution_multiplier = 1
# fig_size_kwargs = {'width': resolution_multiplier*1650, 'height': resolution_multiplier*480}
 ## set up figure size
fig_size_kwargs = {'width': (resolution_multiplier * 1800), 'height': (resolution_multiplier*480)}
# fig_size_kwargs = {'width': (resolution_multiplier * 1080), 'height': resolution_multiplier*480}
is_dark_mode, template = PlotlyHelpers.get_plotly_template(is_dark_mode=False)
pio.templates.default = template

# figure_export_path = Path(r'E:\Dropbox (Personal)\Active\Kamran Diba Lab\Presentations\2024-05-30 - Pho iNAV Poster\Figures').resolve()
# figure_export_path = Path('/Users/pho/Dropbox (Personal)/Active/Kamran Diba Lab/Presentations/2024-05-30 - Pho iNAV Poster/Figures').resolve()

# assert figure_export_path.exists()

from PIL import Image

from pyphocorehelpers.programming_helpers import copy_image_to_clipboard

def save_plotly(a_fig, a_fig_context):
    """ 
    captures: TODAY_DAY_DATE, figures_folder, neptuner_run
    """
    fig_save_path: Path = figures_folder.joinpath('_'.join([TODAY_DAY_DATE, sanitize_filename_for_Windows(a_fig_context.get_description())])).resolve()
    figure_out_paths = {'.html': fig_save_path.with_suffix('.html'), '.png': fig_save_path.with_suffix('.png')}
    a_fig.write_html(figure_out_paths['.html'])
    display(fullwidth_path_widget(figure_out_paths['.html'], file_name_label='.html'))
    # print(file_uri_from_path(figure_out_paths['.html']))
    a_fig.write_image(figure_out_paths['.png'])
    # print(file_uri_from_path(figure_out_paths['.png']))
    display(fullwidth_path_widget(figure_out_paths['.png'], file_name_label='.png'))

    if neptuner_run is not None:
        a_full_figure_path_key: str = a_fig_context.get_description(separator='/', include_property_names=True, key_value_separator=':') # .replace(' ', '_')
        # a_full_figure_path_key: str = a_fig_context.get_description(separator=':', include_property_names=True, key_value_separator='|')
        print(f'a_full_figure_path_key: "{a_full_figure_path_key}"')
        # neptuner_run['outputs']['figures'][f"{a_full_figure_path_key}"].upload(a_fig)
        neptuner_run['outputs']['figures'][f"{a_full_figure_path_key}"].upload(figure_out_paths['.html'].as_posix())
        # neptuner.figures[f"{a_full_figure_path_key}"].upload(a_fig)
        
    return figure_out_paths


# csv_files

In [4]:
## Load across session t_delta CSV, which contains the t_delta for each session:

## INPUTS: known_bad_session_strs,

# cuttoff_date = datetime(2024, 9, 26)
cuttoff_date = datetime(2024, 10, 28)
# cuttoff_date = datetime(2024, 11, 11)
# cuttoff_date = datetime(2024, 5, 18)
# cuttoff_date = None


In [5]:

known_collected_outputs_paths = [Path(v).resolve() for v in ['/Users/pho/data/collected_outputs',
                                                            '/Volumes/SwapSSD/Data/collected_outputs', r"K:/scratch/collected_outputs", '/Users/pho/Dropbox (University of Michigan)/MED-DibaLabDropbox/Data/Pho/Outputs/output/collected_outputs', r'C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs',
                                                            '/home/halechr/cloud/turbo/Data/Output/collected_outputs',
                                                            '/home/halechr/cloud/turbo/Pho/Output/collected_outputs',
                                                            '/home/halechr/FastData/collected_outputs/',
                                                            ]]
collected_outputs_directory = find_first_extant_path(known_collected_outputs_paths)
assert collected_outputs_directory.exists(), f"collected_outputs_directory: {collected_outputs_directory} does not exist! Is the right computer's config commented out above?"
# fullwidth_path_widget(scripts_output_path, file_name_label='Scripts Output Path:')
print(f'collected_outputs_directory: {collected_outputs_directory}')


# _active_folder_widget = fullwidth_path_widget(collected_outputs_directory)
# display(_active_folder_widget)

# Create a 'figures' subfolder if it doesn't exist
figures_folder: Path = collected_outputs_directory.joinpath('figures').resolve()
figures_folder.mkdir(parents=False, exist_ok=True)
assert figures_folder.exists()
print(f'\tfigures_folder: {file_uri_from_path(figures_folder)}')

# Create an output path for the across session collected results (like the aggregate CSVs built from the individual session CSVs)
across_sessions_output_folder: Path = collected_outputs_directory.joinpath('../across_sessions').resolve()
across_sessions_output_folder.mkdir(parents=False, exist_ok=True)
assert across_sessions_output_folder.exists()
print(f'\tacross_sessions_output_folder: {file_uri_from_path(across_sessions_output_folder)}')


# ## Find the files:
# csv_files = find_csv_files(collected_outputs_directory)
# h5_files = find_HDF5_files(collected_outputs_directory)

# csv_sessions, parsed_csv_files_df  = find_most_recent_files(found_session_export_paths=csv_files, cuttoff_date=cuttoff_date) # #TODO 2024-09-27 02:01: - [ ] Note `csv_sessions` is unused, replaced by `parsed_csv_files_df`
# h5_sessions, parsed_h5_files_df = find_most_recent_files(found_session_export_paths=h5_files)


collected_outputs_directory: K:\scratch\collected_outputs
	figures_folder: file:///K:/scratch/collected_outputs/figures
	across_sessions_output_folder: file:///K:/scratch/across_sessions


In [None]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import parse_filename
from pyphocorehelpers.Filesystem.path_helpers import BaseMatchParser
from pyphocorehelpers.Filesystem.path_helpers import RoundedTimeParser, DayDateWithVariantSuffixParser, DayDateOnlyParser, DayDateOnlyParser, DayDateTimeParser
from pyphocorehelpers.Filesystem.path_helpers import try_iterative_parse_chain

## Parsing Testing
a_file_basename: str = Path("2024-11-15_GL-kdiba-gor01-one-2006-6-08_14-26-15__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df).csv")
_out_parsed_result_tuple = parse_filename(a_file_basename)
export_datetime, session_str, custom_replay_name, export_file_type, decoding_time_bin_size_str = _out_parsed_result_tuple
_out_parsed_result_tuple

In [None]:
a_file_basename: str = "2024-11-15_GL-kdiba-gor01-one-2006-6-08_14-26-15__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)"
# a_file_basename: str = "2024-11-15-kdiba-gor01-one-2006-6-08_14-26-15__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)"

final_parsed_output_dict = try_iterative_parse_chain(basename=a_file_basename, debug_print=False)
final_parsed_output_dict


In [None]:
## begin by splitting on "__"
export_datetime_session_name_half, custom_context_data_info_half = a_file_basename.split("__", maxsplit=1)
export_datetime_session_name_half # '2024-11-15_GL-kdiba-gor01-one-2006-6-08_14-26-15'
custom_context_data_info_half # 'withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)'
## find the parenthesis wrapped part:
custom_context_data_info_half


In [None]:
filename: str = deepcopy(custom_context_data_info_half)
# matches '*-(merged_complete_epoch_stats_df)'
paren_wrapped_data_name_suffix_pattern = r"(?P<remaining_string>.+?)-\((?P<export_file_type>[A-Za-z_]+)\)"
match = re.match(paren_wrapped_data_name_suffix_pattern, filename)
parsed_output_dict = match.groupdict()
export_file_type: str = parsed_output_dict['export_file_type']
remaining_additional_context_str: str = parsed_output_dict['remaining_string']
## OUTPUTS: export_file_type, remaining_additional_context_str
export_file_type
remaining_additional_context_str
# complete_session_context_str: str = f"{remaining_additional_context_str}-"

In [6]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import load_across_sessions_exported_files

final_sessions, sessions_t_delta_tuple, df_results, (parsed_csv_files_df, csv_files, csv_sessions), (parsed_h5_files_df, h5_files, h5_sessions), excluded_or_outdated_files_list = load_across_sessions_exported_files(cuttoff_date=cuttoff_date, debug_print=True)
all_sessions_laps_df, all_sessions_ripple_df, all_sessions_laps_time_bin_df, all_sessions_ripple_time_bin_df, all_sessions_MultiMeasure_laps_df, all_sessions_MultiMeasure_ripple_df, all_sessions_all_scores_ripple_df, all_sessions_merged_complete_epoch_stats_df = df_results
t_delta_df, t_delta_dict, (earliest_delta_aligned_t_start, latest_delta_aligned_t_end) = sessions_t_delta_tuple ## UNPACK
all_sessions_merged_complete_epoch_stats_df = all_sessions_merged_complete_epoch_stats_df[all_sessions_merged_complete_epoch_stats_df['custom_replay_name'] != ''] # only non-blank values

collected_outputs_directory: K:\scratch\collected_outputs
earliest_delta_aligned_t_start: -2057.225948, latest_delta_aligned_t_end: 1661.856002
for file "2024-11-15_GL-kdiba-pin01-one-fet11-01_12-58-54__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)" using more modern parse method...
for file "2024-11-15_GL-kdiba-pin01-one-fet11-01_12-58-54__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)" using more modern parse method...
for file "2024-11-15_GL-kdiba-gor01-two-2006-6-07_16-40-19__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)" using more modern parse method...
for file "2024-11-15_GL-kdiba-gor01-two-2006-6-07_16-40-19__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df)" using more modern parse method...
for file "2024-11-15_GL-kdiba-gor01-one-2006-6-08_14-26-15__withNormalComputedRepl

In [18]:
def _subfn_perform_add_merged_complete_epoch_stats_df(a_paired_main_ripple_df: pd.DataFrame, a_all_sessions_merged_complete_epoch_stats_df: pd.DataFrame):
    """ adds the columns ['Long_BestDir_quantile', 'Short_BestDir_quantile', 'best_overall_quantile'] to the dataframe `a_paired_main_ripple_df`
    
    """
    # all_sessions_merged_complete_epoch_stats_df['time_bin_size'] = 0.25 # 'time_bin_size', 'session_experience_rank', 'session_experience_orientation_rank', 'custom_replay_name'-- all missing for this df
    # all_sessions_merged_complete_epoch_stats_df['custom_replay_name'] = 'withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]'
    # all_sessions_merged_complete_epoch_stats_df['Long_BestDir_quantile'] # 'Long_BestDir_quantile', 'Short_BestDir_quantile'
    a_paired_main_ripple_df = deepcopy(a_paired_main_ripple_df)
    a_all_sessions_merged_complete_epoch_stats_df = deepcopy(a_all_sessions_merged_complete_epoch_stats_df)
    
    a_all_sessions_merged_complete_epoch_stats_df = a_all_sessions_merged_complete_epoch_stats_df[a_all_sessions_merged_complete_epoch_stats_df['custom_replay_name'] != ''] # only non-blank values

    ## need to match 'start' and the other
    # all_sessions_merged_complete_epoch_stats_relevant_df: pd.DataFrame = deepcopy(all_sessions_merged_complete_epoch_stats_df.epochs.matching_epoch_times_slice(epoch_times=all_sessions_all_scores_ripple_df.ripple_start_t))
    all_sessions_merged_complete_epoch_stats_relevant_df: pd.DataFrame = deepcopy(a_all_sessions_merged_complete_epoch_stats_df.epochs.matching_epoch_times_slice(epoch_times=a_paired_main_ripple_df.ripple_start_t))
    all_sessions_merged_complete_epoch_stats_relevant_df

    ## INPUTS: all_sessions_merged_complete_epoch_stats_relevant_df
    # all_sessions_merged_complete_epoch_stats_relevant_df # 'Long_BestDir_quantile', 'Short_BestDir_quantile'
    session_name_t_delta_dict = {'_'.join(k.split('_')[-2:]):v['t_delta'] for k, v in t_delta_dict.items()} # '2006-6-08_21-16-25'
    if 'delta_aligned_start_t' not in all_sessions_merged_complete_epoch_stats_relevant_df.columns:
        all_sessions_merged_complete_epoch_stats_relevant_df['delta_aligned_start_t'] = all_sessions_merged_complete_epoch_stats_relevant_df['start'] - all_sessions_merged_complete_epoch_stats_relevant_df['session_name'].map(lambda x: session_name_t_delta_dict[x])
    all_sessions_merged_complete_epoch_stats_relevant_df['best_overall_quantile'] = np.nanmax(all_sessions_merged_complete_epoch_stats_relevant_df[['Long_BestDir_quantile', 'Short_BestDir_quantile']], axis=1)
    all_sessions_merged_complete_epoch_stats_relevant_df


    ## INPUTS: df_filter, all_sessions_merged_complete_epoch_stats_relevant_df, 

    #TODO 2024-11-15 12:02: - [ ] We also need to constrain based on the time_bin_size and replay_name in addition to just searching for matching epoch start times.

    initial_row_count: int = len(a_paired_main_ripple_df)
    print(f'initial_row_count: {initial_row_count}')
    # relevent_column_names = ['start', 'stop', 'label', 'duration', 'LR_Long_ActuallyIncludedAclus', 'LR_Long_rel_num_cells', 'RL_Long_ActuallyIncludedAclus', 'RL_Long_rel_num_cells', 'LR_Short_ActuallyIncludedAclus', 'LR_Short_rel_num_cells', 'RL_Short_ActuallyIncludedAclus', 'RL_Short_rel_num_cells', 'combined_best_direction_indicies', 'long_relative_direction_likelihoods', 'short_relative_direction_likelihoods', 'long_best_direction_indices', 'short_best_direction_indices', 'LR_Short_spearman', 'LR_Long_spearman', 'RL_Short_pearson', 'RL_Long_spearman', 'LR_Short_pearson', 'RL_Long_pearson', 'LR_Long_pearson', 'RL_Short_spearman', 'LR_Short_spearman_Z', 'LR_Long_spearman_Z', 'RL_Short_pearson_Z', 'RL_Long_spearman_Z', 'LR_Short_pearson_Z', 'RL_Long_pearson_Z', 'LR_Long_pearson_Z', 'RL_Short_spearman_Z', 'Long_BestDir_spearman', 'Short_BestDir_spearman', 'LR_Short_spearman_percentile', 'LR_Long_spearman_percentile', 'RL_Short_pearson_percentile', 'RL_Long_spearman_percentile', 'LR_Short_pearson_percentile', 'RL_Long_pearson_percentile', 'LR_Long_pearson_percentile', 'RL_Short_spearman_percentile', 'LR_Long_percentile', 'RL_Long_percentile', 'LR_Short_percentile', 'RL_Short_percentile', 'Long_BestDir_quantile', 'Short_BestDir_quantile', 'LongShort_BestDir_quantile_diff', 'LongShort_LR_quantile_diff', 'LongShort_RL_quantile_diff', 'session_name', 'custom_replay_name', 'time_bin_size', 'session_experience_rank', 'session_experience_orientation_rank', 'is_novel_exposure', 'delta_aligned_start_t', 'best_overall_quantile']
    relevent_column_names = ['start', 'stop', 'label', 'duration', 'delta_aligned_start_t', 'Long_BestDir_quantile', 'Short_BestDir_quantile', 'best_overall_quantile']
    shuffle_column_names = ['Long_BestDir_quantile', 'Short_BestDir_quantile', 'best_overall_quantile']
    # paired_main_ripple_df.delta_aligned_start_t

    # paired_main_ripple_df.start

    # all_sessions_merged_complete_epoch_stats_relevant_df.start

    ## Just need to obtain shuffle scores for each of the epochs in the filtered df:
    filtered_temp: pd.DataFrame = all_sessions_merged_complete_epoch_stats_relevant_df.loc[all_sessions_merged_complete_epoch_stats_relevant_df.epochs.find_data_indicies_from_epoch_times(a_paired_main_ripple_df.start)].reset_index(drop=True)[relevent_column_names] ## get only the matching entries
    # filtered_temp
    # print(list(filtered_temp.columns))
    ## Filll NaNs first:
    a_paired_main_ripple_df[shuffle_column_names] = np.nan

    ## Need to assign to `df_filter.all_sessions_MultiMeasure_ripple_df`
    _relevent_indexes = a_paired_main_ripple_df.epochs.find_data_indicies_from_epoch_times(filtered_temp.start) # indexes into `paired_main_ripple_df`, but wait, not all indicies are guarnateed to be in here right?
    # _relevent_indexes

    ## Sanity checks:
    # len(_relevent_indexes)
    assert np.sum(np.isnan(_relevent_indexes)) == 0 # no NaNs
    assert np.sum(_relevent_indexes < 0) == 0 # no -1 (not found sentinal value)s
    ## assign the relevent values
    a_paired_main_ripple_df.loc[_relevent_indexes, shuffle_column_names] = deepcopy(filtered_temp[shuffle_column_names]) ## only copy the shuffle columns
    assert initial_row_count == len(a_paired_main_ripple_df)
    return a_paired_main_ripple_df

all_sessions_all_scores_ripple_df = _subfn_perform_add_merged_complete_epoch_stats_df(a_paired_main_ripple_df=all_sessions_all_scores_ripple_df, a_all_sessions_merged_complete_epoch_stats_df=all_sessions_merged_complete_epoch_stats_df)
all_sessions_all_scores_ripple_df
# all_sessions_MultiMeasure_ripple_df = _subfn_perform_add_merged_complete_epoch_stats_df(a_paired_main_ripple_df=all_sessions_MultiMeasure_ripple_df, a_all_sessions_merged_complete_epoch_stats_df=all_sessions_merged_complete_epoch_stats_df)


initial_row_count: 24631


Unnamed: 0,start,stop,label,duration,is_user_annotated_epoch,is_valid_epoch,session_name,delta_aligned_start_t,pre_post_delta_category,maze_id,P_LR,P_RL,P_Long,P_Short,P_Long_LR,congruent_dir_bins_ratio_long_LR,coverage_long_LR,direction_change_bin_ratio_long_LR,integral_second_derivative_long_LR,intercept_long_LR,jump_long_LR,longest_sequence_length_ratio_long_LR,pearsonr_long_LR,score_long_LR,speed_long_LR,stddev_of_diff_long_LR,total_congruent_direction_change_long_LR,total_variation_long_LR,travel_long_LR,velocity_long_LR,wcorr_long_LR,P_Long_RL,congruent_dir_bins_ratio_long_RL,coverage_long_RL,direction_change_bin_ratio_long_RL,integral_second_derivative_long_RL,intercept_long_RL,jump_long_RL,longest_sequence_length_ratio_long_RL,pearsonr_long_RL,score_long_RL,speed_long_RL,stddev_of_diff_long_RL,total_congruent_direction_change_long_RL,total_variation_long_RL,travel_long_RL,velocity_long_RL,wcorr_long_RL,P_Short_LR,congruent_dir_bins_ratio_short_LR,...,short_best_intercept,intercept_diff,long_best_speed,short_best_speed,speed_diff,long_best_wcorr,short_best_wcorr,wcorr_diff,long_best_pearsonr,short_best_pearsonr,pearsonr_diff,long_best_travel,short_best_travel,travel_diff,long_best_coverage,short_best_coverage,coverage_diff,long_best_jump,short_best_jump,jump_diff,long_best_longest_sequence_length_ratio,short_best_longest_sequence_length_ratio,longest_sequence_length_ratio_diff,long_best_direction_change_bin_ratio,short_best_direction_change_bin_ratio,direction_change_bin_ratio_diff,long_best_congruent_dir_bins_ratio,short_best_congruent_dir_bins_ratio,congruent_dir_bins_ratio_diff,long_best_total_congruent_direction_change,short_best_total_congruent_direction_change,total_congruent_direction_change_diff,long_best_total_variation,short_best_total_variation,total_variation_diff,long_best_integral_second_derivative,short_best_integral_second_derivative,integral_second_derivative_diff,long_best_stddev_of_diff,short_best_stddev_of_diff,stddev_of_diff_diff,time_bin_size,custom_replay_name,session_experience_rank,session_experience_orientation_rank,is_novel_exposure,is_filter_included,Long_BestDir_quantile,Short_BestDir_quantile,best_overall_quantile
0,40.187177,40.488202,0,0.301025,False,True,kdiba_gor01_one_2006-6-08_14-26-15,-1171.370903,pre-delta,0.0,0.664169,0.335831,0.544355,0.455645,0.361544,0.363636,0.157895,0.090909,241946.584175,-1427.1885588143907,0.004673,0.500000,0.129413,0.471575,41.378433611145994,86.039397,0.000000,439.990677,0.186912,-41.378433611145994,0.214083,0.182811,0.454545,0.350877,0.454545,540635.591373,-325.8414981018566,0.004673,0.250000,-0.360773,0.170968,13.792811203715296,142.173880,610.676716,1198.595294,0.509174,-13.792811203715296,-0.348438,0.302625,0.545455,...,-344.80661350696505,1082.3819453074257,41.378433611145994,13.792811203715296,27.585622407430698,0.214083,0.124031,0.090052,0.129413,0.147619,-0.018207,0.186912,0.191567,-0.004655,0.157895,0.210526,-0.052632,0.004673,0.004340,0.000333,0.500000,0.500000,0.000000,0.090909,0.181818,-0.090909,0.363636,0.545455,-0.181818,0.000000,163.099992,-163.099992,439.990677,303.441846,136.548831,241946.584175,84523.766547,157422.817627,86.039397,52.041595,33.997803,0.025,,2,1,False,False,,,
1,41.011916,41.359114,1,0.347197,False,True,kdiba_gor01_one_2006-6-08_14-26-15,-1170.546164,pre-delta,0.0,,,,,,0.333333,0.228070,0.416667,472095.808593,3878.4098537356113,0.004673,0.307692,-0.041734,0.284373,88.50387189050716,128.065639,603.090670,1198.595294,0.466743,88.50387189050716,0.028805,,0.500000,0.315789,0.500000,311910.682340,3348.342258350398,0.004172,0.153846,-0.184740,0.222090,75.86046162043483,108.828847,606.883693,1001.358093,0.389937,75.86046162043483,-0.416560,,0.333333,...,-823.9240892729676,2524.4181690774303,75.86046162043483,25.28682054014501,50.573641080289825,-0.416560,-0.307390,0.109170,-0.184740,-0.145443,0.039297,0.389937,0.526809,-0.136872,0.315789,0.421053,-0.105263,0.004172,0.004960,-0.000788,0.153846,0.153846,0.000000,0.500000,0.500000,0.000000,0.500000,0.583333,-0.083333,606.883693,523.437185,83.446508,1001.358093,910.325539,91.032554,311910.682340,187822.599536,124088.082804,108.828847,88.589625,20.239222,0.025,,2,1,False,False,,,
2,43.429702,43.489729,2,0.060027,False,False,kdiba_gor01_one_2006-6-08_14-26-15,-1168.128378,pre-delta,0.0,,,,,,1.000000,0.105263,0.000000,0.000000,13429.772137784617,0.000167,1.000000,0.158074,0.882221,303.4418464817533,0.000000,7.586046,7.586046,0.035449,303.4418464817533,-0.727450,,1.000000,0.175439,0.000000,0.000000,13418.39306854155,0.000167,1.000000,-0.413563,0.647253,303.4418464817533,0.000000,7.586046,7.586046,0.035449,303.4418464817533,-0.484635,,1.000000,...,19975.346844334425,-6556.953775792874,303.4418464817533,455.16276972263057,-151.72092324087726,-0.484635,-0.702376,-0.217740,-0.413563,-0.471881,-0.058318,0.035449,0.079021,-0.043573,0.175439,0.157895,0.017544,0.000167,0.000372,-0.000205,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,1.000000,0.000000,7.586046,11.379069,-3.793023,7.586046,11.379069,-3.793023,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.025,,2,1,False,False,,,
3,44.189868,44.279447,3,0.089579,False,False,kdiba_gor01_one_2006-6-08_14-26-15,-1167.368212,pre-delta,0.0,,,,,,0.500000,0.157895,0.000000,14.387024,247.59017074053855,0.000083,1.000000,0.063765,0.706546,0.0,1.896512,3.793023,3.793023,0.008862,-0.0,0.185883,,0.500000,0.368421,0.500000,51793.286735,-33500.73250737714,0.004506,0.666667,-0.587523,0.288288,758.6046162043833,113.790692,204.823246,227.581385,0.531732,-758.6046162043833,-0.721453,,1.000000,...,3570.4580223423222,29930.27448503482,758.6046162043833,75.86046162043861,682.7441545839447,-0.721453,-0.273222,0.448231,-0.587523,-0.524257,0.063265,0.531732,0.052681,0.479051,0.368421,0.245614,0.122807,0.004506,0.000372,0.004134,0.666667,1.000000,-0.333333,0.500000,0.000000,0.500000,0.500000,0.500000,0.000000,204.823246,11.379069,193.444177,227.581385,15.172092,212.409293,51793.286735,230.192385,51563.094350,113.790692,7.586046,106.204646,0.025,,2,1,False,False,,,
4,44.588736,44.829950,4,0.241213,False,True,kdiba_gor01_one_2006-6-08_14-26-15,-1166.969344,pre-delta,0.0,,,,,,0.375000,0.122807,0.250000,219416.504445,-1451.7310593105647,0.004673,0.777778,-0.281180,0.543649,37.93023081021647,105.336449,227.581385,443.783700,0.259219,-37.93023081021647,-0.379002,,0.500000,0.245614,0.375000,260750.424664,4476.928130463184,0.003672,0.333333,-0.517863,0.387876,94.82557702554111,112.531235,352.751147,682.744155,0.398799,94.82557702554111,-0.216429,,0.500000,...,-2331.735858984293,2145.192271478891,94.82557702554111,56.89534621532478,37.93023081021633,-0.216429,-0.593579,-0.377150,-0.517863,-0.442478,0.075385,0.398799,0.210724,0.188076,0.245614,0.298246,-0.052632,0.003672,0.004340,-0.000669,0.333333,0.666667,-0.333333,0.375000,0.250000,0.125000,0.500000,0.375000,0.125000,352.751147,140.341854,212.409293,682.744155,242.753477,439.990677,260750.424664,43851.649436,216898.775229,112.531235,54.728407,57.802827,0.025,,2,1,False,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24626,1429.924337,1430.429558,90,0.505221,,,kdiba_vvp01_two_2006-4-10_12-58-3,497.098131,post-delta,,0.536577,0.463423,0.383571,0.616429,0.205815,,,,,,,,0.039529,,,,,,,,0.144656,0.177755,,,,,,,,0.292715,,,,,,,,0.196344,0.330762,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",3,1,False,False,,,
24627,1435.942904,1436.334707,91,0.391803,,,kdiba_vvp01_two_2006-4-10_12-58-3,503.116698,post-delta,,0.360057,0.639943,0.374217,0.625783,0.134740,,,,,,,,-0.013777,,,,,,,,-0.161458,0.239478,,,,,,,,0.044008,,,,,,,,0.087238,0.225318,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",3,1,False,False,,,
24628,1437.783184,1437.959609,92,0.176425,,,kdiba_vvp01_two_2006-4-10_12-58-3,504.956978,post-delta,,0.691837,0.308163,0.520839,0.479161,0.360336,,,,,,,,-0.771032,,,,,,,,-0.485093,0.160503,,,,,,,,-0.684722,,,,,,,,-0.336139,0.331501,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",3,1,False,False,,,
24629,1445.197729,1445.522931,93,0.325202,,,kdiba_vvp01_two_2006-4-10_12-58-3,512.371523,post-delta,,,,,,,,,,,,,,-0.539592,,,,,,,,-0.459773,,,,,,,,,-0.476516,,,,,,,,-0.225709,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",3,1,False,False,,,


In [21]:
all_sessions_all_scores_ripple_df['best_overall_quantile'].notna().sum()

2197

In [None]:
"2024-11-15_GL-kdiba-gor01-one-2006-6-08_14-26-15__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]-(merged_complete_epoch_stats_df).csv"
excluded_or_outdated_files_list

In [None]:
parsed_csv_files_df

In [None]:
all_sessions_merged_complete_epoch_stats_relevant_df # 'Long_BestDir_quantile', 'Short_BestDir_quantile'

In [None]:
all_sessions_all_scores_ripple_df['custom_replay_name'].unique()

In [None]:
parsed_h5_files_df

In [None]:
# INPUTS: sessions_t_delta_tuple
t_delta_df, t_delta_dict, (earliest_delta_aligned_t_start, latest_delta_aligned_t_end) = sessions_t_delta_tuple ## UNPACK
t_delta_dict

In [None]:
all_sessions_all_scores_ripple_df

In [None]:
all_sessions_MultiMeasure_ripple_df

In [None]:
parsed_csv_files_df

In [None]:
parsed_csv_files_df['custom_replay_name'].unique()

In [None]:
# all_sessions_MultiMeasure_laps_df
all_columns = list(all_sessions_all_scores_ripple_df.columns)
# all_columns
bad_columns = [v for v in all_columns if ('.1' in v)]
bad_columns # ['P_Long.1', 'P_Short.1', 'P_LR.1', 'P_RL.1']

equiv_columns = [v.strip('.1') for v in bad_columns]
equiv_columns # ['P_Long', 'P_Short', 'P_LR', 'P_RL']

equiv_values = all_sessions_all_scores_ripple_df[equiv_columns]
bad_values = all_sessions_all_scores_ripple_df[bad_columns]

equiv_values
bad_values
does_row_have_bad_entry = np.any(np.isclose(equiv_values.to_numpy(), bad_values.to_numpy()), axis=1)
does_row_have_bad_entry

# h5_files

In [8]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import load_across_sessions_exported_h5_files

## INPUTS: h5_sessions, session_dict, cuttoff_date, known_bad_session_strs
parsed_h5_files_df, h5_contexts_paths_dict = load_across_sessions_exported_h5_files(collected_outputs_directory=collected_outputs_directory, cuttoff_date=cuttoff_date,
                                                                                    known_bad_session_strs=known_bad_session_strs)
h5_session_contexts = list(h5_contexts_paths_dict.keys())
included_h5_paths = list(h5_contexts_paths_dict.values())

if neptuner_run is not None:
    _neptuner_run_parameters = dict(parsed_h5_files_df=parsed_h5_files_df,
                                       )
    for k, v in _neptuner_run_parameters.items():
        neptuner_run[f'parsed/{k}'] = v
    _neptuner_run_parameters = {} # reset after writing

parsed_h5_files_df
# h5_contexts_paths_dict


collected_outputs_directory: K:\scratch\collected_outputs
ERR: Could not parse filename: "kdiba-gor01-one-2006-6-08_14-26-15__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2]_(first_spike_activity_data)"
ERR: Could not parse filename: "kdiba-gor01-two-2006-6-12_16-53-46__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]_(first_spike_activity_data)"
ERR: Could not parse filename: "kdiba-gor01-one-2006-6-12_15-55-31__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]_(first_spike_activity_data)"
ERR: Could not parse filename: "kdiba-vvp01-two-2006-4-09_16-40-54__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]_(first_spike_activity_data)"
ERR: Could not parse filename: "kdiba-vvp01-two-2006-4-10_12-58-3__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]_(first_spike_activity_data)"
ERR: Could not parse filename: "kdiba-vvp01-one-2006-4-10_12-25-50__withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]_(first

Unnamed: 0,export_datetime,session,custom_replay_name,file_type,decoding_time_bin_size_str,path
2,2024-11-07,11-03_12-3-25,,pipeline_results,,K:\scratch\collected_outputs\2024-11-07_GL_11-...
11,2024-11-06,11-03_12-3-25-,,withNormalComputedReplays,,K:\scratch\collected_outputs\2024-11-06_GL-11-...
15,2024-11-07,2006-4-09_16-40-54,,pipeline_results,,K:\scratch\collected_outputs\2024-11-07_GL_200...
24,2024-11-06,2006-4-09_16-40-54-,,withNormalComputedReplays,,K:\scratch\collected_outputs\2024-11-06_GL-200...
28,2024-11-07,2006-4-09_17-29-30,,pipeline_results,,K:\scratch\collected_outputs\2024-11-07_GL_200...
29,2024-10-29,2006-4-09_17-29-30-,,withNormalComputedReplays,,K:\scratch\collected_outputs\2024-10-29_GL-200...
32,2024-11-07,2006-4-10_12-25-50,,pipeline_results,,K:\scratch\collected_outputs\2024-11-07_GL_200...
41,2024-11-06,2006-4-10_12-25-50-,,withNormalComputedReplays,,K:\scratch\collected_outputs\2024-11-06_GL-200...
45,2024-11-07,2006-4-10_12-58-3,,pipeline_results,,K:\scratch\collected_outputs\2024-11-07_GL_200...
54,2024-11-06,2006-4-10_12-58-3-,,withNormalComputedReplays,,K:\scratch\collected_outputs\2024-11-06_GL-200...


In [9]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import AcrossSessionTables
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import AcrossSessionsVisualizations
from pyphocorehelpers.indexing_helpers import reorder_columns, reorder_columns_relative

included_session_contexts = deepcopy(h5_session_contexts)
included_h5_paths = deepcopy(included_h5_paths)
num_sessions = len(included_session_contexts)
(neuron_identities_table, long_short_fr_indicies_analysis_table, neuron_replay_stats_table), output_path_dicts = AcrossSessionTables.build_and_save_all_combined_tables(included_session_contexts, included_h5_paths,
                                                                                                                                                    override_output_parent_path=across_sessions_output_folder, output_path_suffix=f'{TODAY_DAY_DATE}',
                                                                                                                                                    should_restore_native_column_types=True, include_csv=True, include_pkl=True)


if neptuner_run is not None:
    _neptuner_run_parameters = dict(neuron_identities_table=neuron_identities_table, long_short_fr_indicies_analysis_table=long_short_fr_indicies_analysis_table, neuron_replay_stats_table=neuron_replay_stats_table,
                                       num_sessions=num_sessions)
    for k, v in _neptuner_run_parameters.items():
        neptuner_run[f'parsed/{k}'] = v
    _neptuner_run_parameters = {} # reset after writing

    for output_name, a_paths_dict in output_path_dicts.items():
        for format_extension, an_output_path in a_paths_dict.items():
            neptuner_run[f"output_files/{format_extension}/{output_name}"].upload(an_output_path.resolve().as_posix())


# {'neuron_identities_table': {'.csv': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee/neuron_identities_table.csv'),
#   '.pkl': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee/neuron_identities_table.pkl')},
#  'long_short_fr_indicies_analysis_table': {'.csv': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee/long_short_fr_indicies_analysis_table.csv'),
#   '.pkl': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee/long_short_fr_indicies_analysis_table.pkl')},
#  'neuron_replay_stats_table': {'.csv': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee/neuron_replay_stats_table.csv'),
#   '.pkl': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee/neuron_replay_stats_table.pkl')}}

output_path_dicts

## Move the "height" columns to the end
neuron_replay_stats_table = reorder_columns_relative(neuron_replay_stats_table, column_names=['neuron_uid', 'format_name', 'animal', 'exper_name', 'session_name', 'neuron_type', 'aclu', 'session_uid', 'session_datetime'], relative_mode='start')

neuron_replay_stats_table


failed for file path: K:\scratch\collected_outputs\2024-11-07_GL_2006-6-08_14-26-15_pipeline_results.h5, table_key: /kdiba/gor01/one/2006-6-08_14-26-15/neuron_identities/table. wth exception HDF5 error back trace

  File "C:\b\abs_e4nt8ttmb0\croot\hdf5_1686163951797\work\src\H5F.c", line 620, in H5Fopen
    unable to open file
  File "C:\b\abs_e4nt8ttmb0\croot\hdf5_1686163951797\work\src\H5VLcallback.c", line 3502, in H5VL_file_open
    failed to iterate over available VOL connector plugins
  File "C:\b\abs_e4nt8ttmb0\croot\hdf5_1686163951797\work\src\H5PLpath.c", line 579, in H5PL__path_table_iterate
    can't iterate over plugins in plugin path '(null)'
  File "C:\b\abs_e4nt8ttmb0\croot\hdf5_1686163951797\work\src\H5PLpath.c", line 712, in H5PL__path_table_iterate_process_path
    can't open directory
  File "C:\b\abs_e4nt8ttmb0\croot\hdf5_1686163951797\work\src\H5VLcallback.c", line 3351, in H5VL__file_open
    open failed
  File "C:\b\abs_e4nt8ttmb0\croot\hdf5_1686163951797\work\sr

{'neuron_identities_table': {'.csv': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee/neuron_identities_table.csv'),
  '.pkl': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee/neuron_identities_table.pkl')},
 'long_short_fr_indicies_analysis_table': {'.csv': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee/long_short_fr_indicies_analysis_table.csv'),
  '.pkl': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee/long_short_fr_indicies_analysis_table.pkl')},
 'neuron_replay_stats_table': {'.csv': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee/neuron_replay_stats_table.csv'),
  '.pkl': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee/neuron_replay_stats_table.pkl')}}

Unnamed: 0,neuron_uid,format_name,animal,exper_name,session_name,neuron_type,aclu,session_uid,session_datetime,long_pf_peak_x,short_pf_peak_x,track_membership,long_non_replay_mean,short_non_replay_mean,non_replay_diff,long_replay_mean,short_replay_mean,replay_diff,long_mean,short_mean,mean_diff,num_replays,long_num_replays,short_num_replays,custom_frs_index,is_rate_extrema,is_refined_exclusive,is_refined_LxC,is_refined_SxC,long_LR_pf2D_peak_x,long_LR_pf2D_peak_y,long_RL_pf2D_peak_x,long_RL_pf2D_peak_y,short_LR_pf2D_peak_x,short_LR_pf2D_peak_y,short_RL_pf2D_peak_x,short_RL_pf2D_peak_y,long_LR_pf1D_peak,long_RL_pf1D_peak,short_LR_pf1D_peak,short_RL_pf1D_peak,peak_diff_LR_pf1D_peak,peak_diff_RL_pf1D_peak,is_long_peak_left_cap,is_long_peak_right_cap,is_long_peak_either_cap,LS_pf_peak_x_diff,session_experience_rank,session_experience_orientation_rank,is_novel_exposure
0,kdiba|vvp01|one|2006-4-09_17-29-30|2,kdiba,vvp01,one,2006-4-09_17-29-30,intr,2,kdiba|vvp01|one|2006-4-09_17-29-30,2006-04-09 17:29:30,,,SHARED,23.453478,24.705966,1.252488,34.960375548062366,28.405719924833864,-6.554655623228502,29.206926798993095,26.555842786960547,-2.6510840120325483,111,56,55,0.031424,False,False,False,False,,,,,,,,,,,,,,,,,,,1,0,True
1,kdiba|vvp01|one|2006-4-09_17-29-30|3,kdiba,vvp01,one,2006-4-09_17-29-30,pyr,3,kdiba|vvp01|one|2006-4-09_17-29-30,2006-04-09 17:29:30,137.371217,137.371217,SHARED,0.938093,0.689961,-0.248132,11.841367913328904,11.587826118949987,-0.25354179437891666,6.3897304767758225,6.1388935026656375,-0.250836974110185,42,24,18,0.163378,False,False,False,False,131.775207,-29.982220,135.987868,-31.801836,137.401472,-32.827960,138.963649,-32.459796,131.643424,135.461953,135.461953,139.280482,3.818529,3.818529,,,,,1,0,True
2,kdiba|vvp01|one|2006-4-09_17-29-30|4,kdiba,vvp01,one,2006-4-09_17-29-30,pyr,4,kdiba|vvp01|one|2006-4-09_17-29-30,2006-04-09 17:29:30,57.182107,72.456223,SHARED,1.514562,0.643474,-0.871088,9.609075018772401,8.80738453033958,-0.801690488432822,5.561818503281282,4.725429376107072,-0.8363891271742103,22,10,12,0.287797,False,False,False,False,,,50.132783,-44.796867,,,65.408562,-46.172816,181.284302,143.099011,139.280482,143.099011,-42.003820,0.000000,,,,,1,0,True
3,kdiba|vvp01|one|2006-4-09_17-29-30|5,kdiba,vvp01,one,2006-4-09_17-29-30,intr,5,kdiba|vvp01|one|2006-4-09_17-29-30,2006-04-09 17:29:30,,,SHARED,27.713596,29.345487,1.631892,85.27018152411017,79.87298476234909,-5.39719676176108,56.49188852594443,54.60923612296769,-1.8826524029767455,123,59,64,0.046365,False,False,False,False,,,,,,,,,,,,,,,,,,,1,0,True
4,kdiba|vvp01|one|2006-4-09_17-29-30|6,kdiba,vvp01,one,2006-4-09_17-29-30,pyr,6,kdiba|vvp01|one|2006-4-09_17-29-30,2006-04-09 17:29:30,83.911810,,LEFT_ONLY,0.889872,0.300901,-0.588970,12.661190810918045,11.670858668085733,-0.9903321428323117,6.775531253938155,5.985880044895163,-0.7896512090429919,35,14,21,0.176469,False,False,False,False,74.018817,-34.325798,75.702684,-33.545938,,,,,,,,,,,,,,,1,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,kdiba|pin01|one|11-03_12-3-25|26,kdiba,pin01,one,11-03_12-3-25,pyr,26,kdiba|pin01|one|11-03_12-3-25,2009-11-03 12:03:25,165.446655,165.446655,SHARED,0.748178,0.645689,-0.102489,3.911364544034709,4.808702115341916,0.8973375713072067,2.3297713495761116,2.7271954629024493,0.39742411332633765,15,6,9,0.000000,False,False,False,False,157.443325,146.548543,162.097762,146.556675,,,155.594651,146.383100,,211.957258,,193.353016,,-18.604241,False,False,False,0.0,4,4,False
142,kdiba|pin01|one|11-03_12-3-25|27,kdiba,pin01,one,11-03_12-3-25,pyr,27,kdiba|pin01|one|11-03_12-3-25,2009-11-03 12:03:25,68.704600,105.913082,SHARED,3.007414,1.764802,-1.242611,10.710771389936925,5.5993103663062005,-5.111461023630724,6.8590924564766755,3.6820562766273865,-3.177036179849289,19,15,4,0.000000,False,False,False,False,75.576492,142.549499,71.022130,139.718392,108.587352,139.686117,96.350084,146.416211,,174.748775,,171.027927,,-3.720848,False,False,False,-37.20848254723059,4,4,False
143,kdiba|pin01|one|11-03_12-3-25|28,kdiba,pin01,one,11-03_12-3-25,pyr,28,kdiba|pin01|one|11-03_12-3-25,2009-11-03 12:03:25,,120.796475,RIGHT_ONLY,0.103471,0.763868,0.660397,,3.6794506612500983,,,2.221659407686447,,1,0,1,0.000000,False,False,False,False,105.915617,146.521333,,,,,117.592810,146.508780,,,,,,,False,False,False,,4,4,False
144,kdiba|pin01|one|11-03_12-3-25|29,kdiba,pin01,one,11-03_12-3-25,pyr,29,kdiba|pin01|one|11-03_12-3-25,2009-11-03 12:03:25,38.937814,76.146296,SHARED,2.650572,0.968037,-1.682535,20.23812624037057,2.68117123796121,-17.55695500240936,11.444349312357804,1.8246041401753124,-9.619745172182492,9,8,1,0.000000,False,False,False,False,39.151982,141.029097,39.180451,140.628150,75.931168,139.682106,76.310395,139.717057,,130.098596,,163.586230,,33.487634,True,False,True,-37.20848254723059,4,4,False


# 🚧🎯⌚ `first_spike_activity_data` loading and analysis - 2024-11-01

In [None]:
import pandas as pd
from pathlib import Path
from neuropy.core.neuron_identities import NeuronIdentityDataframeAccessor
from neuropy.core.flattened_spiketrains import SpikesAccessor
from pyphoplacecellanalysis.SpecificResults.PendingNotebookCode import CellsFirstSpikeTimes
from neuropy.utils.result_context import IdentifyingContext

# matching_custom_replay_name_str: str = "withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2]"
matching_custom_replay_name_str: str = "withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]"
first_spike_activity_parsed_h5_files_df = parsed_h5_files_df[parsed_h5_files_df['file_type'] == 'first_spike_activity_data']
first_spike_activity_parsed_h5_files_df = first_spike_activity_parsed_h5_files_df[first_spike_activity_parsed_h5_files_df['custom_replay_name'] == matching_custom_replay_name_str]
first_spike_activity_parsed_h5_files_df
first_spike_activity_data_h5_files = [v.as_posix() for v in first_spike_activity_parsed_h5_files_df['path'].to_list()]
first_spike_activity_data_h5_files

test_obj: CellsFirstSpikeTimes = CellsFirstSpikeTimes.init_from_batch_hdf5_exports(first_spike_activity_data_h5_files=first_spike_activity_data_h5_files)
test_obj
# INPUTS: sessions_t_delta_tuple
t_delta_df, t_delta_dict, (earliest_delta_aligned_t_start, latest_delta_aligned_t_end) = sessions_t_delta_tuple ## UNPACK
# t_delta_dict
# curr_session_t_delta = t_delta_dict.get(session_name, {}).get('t_delta', None)

# test_obj.all_cells_first_spike_time_df
# test_obj.first_spikes_dict
# test_obj.all_cells_first_spike_time_df.session_name # "kdiba-gor01-one-2006-6-08_14-26-15"
# IdentifyingContext.try_init_from_session_key(session_str="kdiba-gor01-one-2006-6-08_14-26-15", separator='-').get_description(separator='_')

test_obj.add_session_info(t_delta_dict=t_delta_dict)
test_obj.post_init_cleanup()


In [None]:
post_initial_laps_all_cells_first_spike_time_df = deepcopy(test_obj.all_cells_first_spike_time_df[test_obj.all_cells_first_spike_time_df['lap_spike_lap'] > 4].reset_index(drop=True))
post_initial_laps_all_cells_first_spike_time_df

In [None]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, Span, HoverTool
from bokeh.palettes import Category10
import pandas as pd
from copy import deepcopy

# Enable Bokeh output in the Jupyter notebook
output_notebook()

In [None]:
spike_width = 0.9
spike_height = 0.8 # Height of the tick mark (can be adjusted)
active_df: pd.DataFrame = deepcopy(post_initial_laps_all_cells_first_spike_time_df)
# active_df: pd.DataFrame = deepcopy(test_obj.all_cells_first_spike_time_df)
enable_aclu_horizontal_trace_lines: bool = False
## INPUTS: spike_width: float, spike_height: float, active_df: pd.DataFrame

# Data preparation (factorize for color)
_temp_active_spikes_df = deepcopy(active_df)[['neuron_uid', 'first_spike_PBE', 'first_spike_lap', 'session_uid']]
_temp_active_spikes_df['lap_spike_relative_first_spike'] = _temp_active_spikes_df['first_spike_PBE'] - _temp_active_spikes_df['first_spike_lap']
_temp_active_spikes_df['aclu'], _ = pd.factorize(_temp_active_spikes_df['neuron_uid'])
_temp_active_spikes_df['aclu'] = _temp_active_spikes_df['aclu'] + 1  # Optionally start aclu from 1

# Add tick_top and tick_bottom columns for short tick representation

_temp_active_spikes_df['tick_bottom'] = _temp_active_spikes_df['aclu'] - spike_height / 2
_temp_active_spikes_df['tick_top'] = _temp_active_spikes_df['aclu'] + spike_height / 2
_temp_active_spikes_df = _temp_active_spikes_df.dropna(subset=['lap_spike_relative_first_spike', 'tick_top', 'tick_bottom'])

# Bokeh specific preparation
source = ColumnDataSource(_temp_active_spikes_df)
unique_sessions = _temp_active_spikes_df['session_uid'].unique()

# Create a wider figure by setting the width to 3x the default
p = figure(title="First PBE Spike Relative to First Lap Spike (t=0)", 
           x_axis_label="First PBE Spike Time (seconds, relative to first lap spike)", 
           y_axis_label="Cell ID", 
           tools="pan,box_zoom,reset",
           width=1300)

colors = Category10[len(unique_sessions)]

# Iterate over sessions to create vertical tick "spikes" for each point
for i, session in enumerate(unique_sessions):
    session_data = _temp_active_spikes_df[_temp_active_spikes_df['session_uid'] == session]
    session_source = ColumnDataSource(session_data)
    # Remove rows with NaN values in critical columns
    
    # Render each spike as a very short vertical bar (like a tick mark)
    _ = p.vbar(x='lap_spike_relative_first_spike', bottom='tick_bottom', 
           top='tick_top', width=spike_width, 
           source=session_source, color=colors[i], legend_label=f"Session: {session}")

# Add a vertical line at x=0 to indicate the first lap spike event
vline = Span(location=0, dimension='height', line_color='red', line_width=2, line_alpha=0.5)
p.add_layout(vline)

if enable_aclu_horizontal_trace_lines:
    # Add grid lines at each aclu value
    unique_aclus = _temp_active_spikes_df['aclu'].unique()
    print(f'unique_aclus: {unique_aclus}')
    for aclu_value in unique_aclus:
        hline = Span(location=aclu_value, dimension='width', line_color='gray', line_width=1, line_dash='dotted')
        p.add_layout(hline)

# Add hover tool for interactivity
hover = HoverTool(tooltips=[("Neuron", "@neuron_uid"), ("Session", "@session_uid"), ("Relative Spike Time", "@lap_spike_relative_first_spike")])
p.add_tools(hover)

# Adjust legend properties
p.legend.click_policy = "hide"
p.legend.background_fill_alpha = 0.3  # Make the legend background more transparent
p.add_layout(p.legend[0], 'right')  # Move the legend outside the plot area to the right

# Show the plot inline in the Jupyter notebook
show(p)


In [None]:
import altair as alt
import pandas as pd
from copy import deepcopy

# Data preparation
_temp_active_spikes_df = deepcopy(post_initial_laps_all_cells_first_spike_time_df)[['neuron_uid', 'first_spike_PBE', 'first_spike_lap', 'session_uid']]
_temp_active_spikes_df['lap_spike_relative_first_spike'] = _temp_active_spikes_df['first_spike_PBE'] - _temp_active_spikes_df['first_spike_lap']
_temp_active_spikes_df['aclu'], _ = pd.factorize(_temp_active_spikes_df['neuron_uid'])
_temp_active_spikes_df['aclu'] = _temp_active_spikes_df['aclu'] + 1

# Plotting
chart = alt.Chart(_temp_active_spikes_df).mark_circle(size=60).encode(
    x='lap_spike_relative_first_spike',
    y='aclu',
    color='session_uid:N',
    tooltip=['neuron_uid', 'session_uid']
).properties(
    title='First PBE Spike Relative to First Lap Spike (t=0)'
).interactive()

chart.show()


#### Reads Matlab exported .CSV

In [None]:
# global_session.epochs
## INPUTS: global_data_root_parent_path
# matlab_exported_csv = Path('/home/halechr/repos/matlab-to-neuropy-exporter/output/2024-11-05_good_sessions_table.csv').resolve()
matlab_exported_csv = Path('C:/Users/pho/repos/matlab-to-neuropy-exporter/output/2024-11-07_good_sessions_table.csv').resolve()
Assert.path_exists(matlab_exported_csv)
session_info_matlab_df: pd.DataFrame = pd.read_csv(matlab_exported_csv)
session_info_matlab_df['session_context'] = session_info_matlab_df['session_export_path'].map(lambda v: IdentifyingContext.try_init_from_session_key('_'.join(Path(v).relative_to(global_data_root_parent_path).parts), separator='_'))
user_annotation_code_strs = []
for a_tuple in session_info_matlab_df[['session_context', 'first_valid_pos_time', 'last_valid_pos_time']].itertuples(index=False):
    _code_str: str = f"user_annotations[{a_tuple.session_context.get_initialization_code_string()}].update(track_start_t={a_tuple.first_valid_pos_time}, track_end_t={a_tuple.last_valid_pos_time})"
    user_annotation_code_strs.append(_code_str)

## OUTPUTS: user_annotation_code_strs
print(user_annotation_code_strs)

In [None]:

_out = test_obj.plot_first_lap_spike_relative_first_PBE_spike_scatter_figure()


In [None]:
test_obj.global_spikes_df
test_obj.all_cells_first_spike_time_df

In [None]:
test_obj.all_cells_first_spike_time_df['first_spike_lap']



In [None]:


_out = test_obj.plot_first_lap_spike_relative_first_PBE_spike_scatter_figure()


In [None]:
# trimmed_result_tuples_dict
# post_initial_laps_result_tuples_dict
_accumulated_global_spikes_df

In [None]:
test_obj.global_spikes_df # session_uid: "2024|11|05|kdiba-gor01-one-2006-6-08_14-26-15"

In [None]:
app, win, plots, plots_data = test_obj.plot_first_lap_spike_relative_first_PBE_spike_scatter_figure()


# Across Session CSV Outputs

In [10]:
## INPUTS: parsed_csv_files_df, TODAY_DAY_DATE
display(parsed_csv_files_df)

across_sessions_parsed_csv_files_path = across_sessions_output_folder.joinpath(f'{TODAY_DAY_DATE}_parsed_csv_files_df.csv').resolve()
# parsed_csv_files_df.to_clipboard(excel=True)
parsed_csv_files_df.to_csv(across_sessions_parsed_csv_files_path)
display(fullwidth_path_widget(across_sessions_parsed_csv_files_path, file_name_label='across_sessions_parsed_csv_files_path:'))

Unnamed: 0,export_datetime,session,custom_replay_name,file_type,decoding_time_bin_size_str,path
4,2024-11-12 00:00:00,11-03_12-3-25,,merged_complete_epoch_stats_df,,K:\scratch\collected_outputs\2024-11-12_GL-11-...
7,2024-11-06 00:00:00,11-03_12-3-25,,neuron_replay_stats_df,,K:\scratch\collected_outputs\2024-11-06_GL-11-...
12,2024-11-12 00:00:00,2006-4-09_16-40-54,,merged_complete_epoch_stats_df,,K:\scratch\collected_outputs\2024-11-12_GL-200...
15,2024-11-06 00:00:00,2006-4-09_16-40-54,,neuron_replay_stats_df,,K:\scratch\collected_outputs\2024-11-06_GL-200...
20,2024-11-12 00:00:00,2006-4-10_12-25-50,,merged_complete_epoch_stats_df,,K:\scratch\collected_outputs\2024-11-12_GL-200...
...,...,...,...,...,...,...
2207,2024-11-06 04:10:00,kdiba_vvp01_two_2006-4-10_12-58-3,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",laps_weighted_corr_merged_df,1.5,K:\scratch\collected_outputs\2024-11-06_0410AM...
2244,2024-11-06 04:10:00,kdiba_vvp01_two_2006-4-10_12-58-3,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",ripple_all_scores_merged_df,0.01,K:\scratch\collected_outputs\2024-11-06_0410AM...
2246,2024-11-06 04:10:00,kdiba_vvp01_two_2006-4-10_12-58-3,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",ripple_all_scores_merged_df,0.025,K:\scratch\collected_outputs\2024-11-06_0410AM...
2263,2024-11-05 00:00:00,kdiba_vvp01_two_2006-4-10_12-58-3,"withNormalComputedReplays-qclu_[1, 2, 4, 6, 7,...",ripple_marginals_df,,K:\scratch\collected_outputs\2024-11-05-kdiba_...


Box(children=(Label(value='across_sessions_parsed_csv_files_path:', layout=Layout(width='auto')), HTML(value="…

In [11]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import export_across_session_CSVs

## INPUTS: df_results
all_sessions_laps_df, all_sessions_ripple_df, all_sessions_laps_time_bin_df, all_sessions_ripple_time_bin_df, all_sessions_MultiMeasure_laps_df, all_sessions_MultiMeasure_ripple_df, all_sessions_all_scores_ripple_df, all_sessions_merged_complete_epoch_stats_df = df_results
final_across_session_summary_CSVs_output_path = across_sessions_output_folder.resolve()
display(fullwidth_path_widget(final_across_session_summary_CSVs_output_path, file_name_label='final_across_session_summary_CSVs_output_path:'))
final_csv_export_paths = export_across_session_CSVs(final_output_path=final_across_session_summary_CSVs_output_path, TODAY_DAY_DATE=TODAY_DAY_DATE,
                                                    all_sessions_laps_df=all_sessions_laps_df,  all_sessions_ripple_df=all_sessions_ripple_df,
                                                    all_sessions_laps_time_bin_df=all_sessions_laps_time_bin_df, all_sessions_ripple_time_bin_df=all_sessions_ripple_time_bin_df, 
                                                    # all_sessions_simple_pearson_laps_df=all_sessions_simple_pearson_laps_df,  all_sessions_simple_pearson_ripple_df=all_sessions_simple_pearson_ripple_df,
                                                    all_sessions_MultiMeasure_laps_df=all_sessions_MultiMeasure_laps_df,  all_sessions_MultiMeasure_ripple_df=all_sessions_MultiMeasure_ripple_df,
                                                    all_sessions_all_scores_ripple_df=all_sessions_all_scores_ripple_df,  all_sessions_all_scores_laps_df=None,
                                                )

# final_csv_export_paths: {'AcrossSession_Laps_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_Laps_per-Epoch.csv'),
#  'AcrossSession_Ripple_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_Ripple_per-Epoch.csv'),
#  'AcrossSession_Laps_per-TimeBin': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_Laps_per-TimeBin.csv'),
#  'AcrossSession_Ripple_per-TimeBin': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_Ripple_per-TimeBin.csv'),
#  'AcrossSession_SimplePearson_Laps_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_SimplePearson_Laps_per-Epoch.csv'),
#  'AcrossSession_SimplePearson_Ripple_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_SimplePearson_Ripple_per-Epoch.csv'),
#  'AcrossSession_AllScores_Ripple_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-06-05_Apogee_AcrossSession_AllScores_Ripple_per-Epoch.csv')}

if neptuner_run is not None:
    _neptuner_run_parameters = dict(across_sessions_parsed_csv_files_path=across_sessions_parsed_csv_files_path.as_posix(), final_across_session_summary_CSVs_output_path=final_across_session_summary_CSVs_output_path.as_posix(),
                                       )
    for k, v in _neptuner_run_parameters.items():
        neptuner_run[f'output_files/{k}'] = v
    _neptuner_run_parameters = {} # reset after writing

    for k, v in final_csv_export_paths.items():
        neptuner_run[f"output_files/{k}"].upload(v.resolve().as_posix())
        
    neptuner_run.sync()
    
final_csv_export_paths

Box(children=(Label(value='final_across_session_summary_CSVs_output_path:', layout=Layout(width='auto')), HTML…

{'AcrossSession_Laps_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_Laps_per-Epoch.csv'),
 'AcrossSession_Ripple_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_Ripple_per-Epoch.csv'),
 'AcrossSession_Laps_per-TimeBin': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_Laps_per-TimeBin.csv'),
 'AcrossSession_Ripple_per-TimeBin': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_Ripple_per-TimeBin.csv'),
 'AcrossSession_MultiMeasure_Laps_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_MultiMeasure_Laps_per-Epoch.csv'),
 'AcrossSession_MultiMeasure_Ripple_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_MultiMeasure_Ripple_per-Epoch.csv'),
 'AcrossSession_AllScores_Ripple_per-Epoch': WindowsPath('K:/scratch/across_sessions/2024-11-15_Apogee_AcrossSession_AllScores_Ripple_per-Epoch.csv')}

# 2024-03-02 - Get only the user-annotated ripples:

In [None]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import _split_user_annotated_ripple_df

## Bump
# input_df = all_sessions_simple_pearson_ripple_df
# input_df = all_sessions_all_scores_ripple_df

all_sessions_all_scores_ripple_df, (valid_ripple_df, invalid_ripple_df), (user_approved_ripple_df, user_rejected_ripple_df) = _split_user_annotated_ripple_df(all_sessions_all_scores_ripple_df)

## 2024-03-14 - 'is_valid_epoch' column
# 'is_valid_epoch'
## OUTPUTS: valid_ripple_df, invalid_ripple_df, user_approved_ripple_df, user_rejected_ripple_df, (user_annotated_epoch_unique_session_names, unannotated_session_names)
user_approved_ripple_df

### 2024-02-29 - 4pm - Filter the events for those meeting wcorr criteria:


In [None]:
# df: pd.DataFrame = deepcopy(all_sessions_user_annotated_ripple_df)
df: pd.DataFrame = deepcopy(valid_ripple_df) # valid epochs, but not just those that the user approved
# df: pd.DataFrame = deepcopy(user_approved_ripple_df)

## INPUTS: df

min_wcorr_threshold: float = 0.33
min_wcorr_diff_threshold: float = 0.2

# is_included_large_wcorr_diff = np.any((df[['wcorr_abs_diff']].abs() > min_wcorr_diff_threshold), axis=1)
is_included_large_wcorr_diff = np.any((df[['wcorr_diff']].abs() > min_wcorr_diff_threshold), axis=1)
is_included_high_wcorr = np.any((df[['long_best_wcorr', 'short_best_wcorr']].abs() > min_wcorr_threshold), axis=1)

df = df[is_included_high_wcorr]
df

# wcorr_long_LR


In [None]:
all_sessions_all_scores_ripple_df.time_bin_size.unique() # does not seem to return NaNs

In [None]:
all_sessions_ripple_time_bin_df.time_bin_size.unique()

In [None]:
all_sessions_ripple_time_bin_df

# 2024-03-28 - AcrossSessionTable (PhoDibaPaper2023 formats) .h5 and figure exports

neuron_identities_table, long_short_fr_indicies_analysis_table, neuron_replay_stats_table: all tables of the same length, one entry per neuron

In [None]:
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import AcrossSessionTables, AcrossSessionsResults, AcrossSessionsVisualizations
from neuropy.utils.mixins.HDF5_representable import HDF_Converter
from pyphoplacecellanalysis.General.Batch.runBatch import BatchResultDataframeAccessor

# output_path_suffix: str = '2024-09-26'
# output_path_suffix: str = '2024-10-22'
output_path_suffix: str = '2024-10-22'
# output_path_suffix: str = '2024-10-04'
# inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{output_path_suffix}.pkl'
# inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{output_path_suffix}_0.0009.pkl' # single time bin size
# inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{output_path_suffix}_0.0015.pkl' # single time bin size
# inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{output_path_suffix}_0.0025.pkl' # single time bin size
# inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{output_path_suffix}_0.025.pkl' # single time bin size
inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{output_path_suffix}_1000.0.pkl' # single time bin size

## INPUTS: included_session_contexts, included_h5_paths
neuron_identities_table, long_short_fr_indicies_analysis_table, neuron_replay_stats_table = AcrossSessionTables.build_all_known_tables(included_session_contexts, included_h5_paths, should_restore_native_column_types=True)

## different than load_all_combined_tables, which seems to work with `long_short_fr_indicies_analysis_table`
# graphics_output_dict |= AcrossSessionsVisualizations.across_sessions_firing_rate_index_figure(long_short_fr_indicies_analysis_results=long_short_fr_indicies_analysis_table, num_sessions=num_sessions, save_figure=True)

## Load all across-session tables from the pickles:
output_path_suffix: str = f'{output_path_suffix}'
neuron_identities_table, long_short_fr_indicies_analysis_table, neuron_replay_stats_table = AcrossSessionTables.load_all_combined_tables(override_output_parent_path=collected_outputs_directory, output_path_suffix=output_path_suffix) # output_path_suffix=f'2023-10-04-GL-Recomp'
# num_sessions = len(neuron_replay_stats_table.session_uid.unique().to_numpy())
# print(f'num_sessions: {num_sessions}')
num_sessions: int = len(long_short_fr_indicies_analysis_table['session_uid'].unique())
print(f'num_sessions: {num_sessions}')

inst_fr_output_load_filepath: Path = collected_outputs_directory.joinpath(inst_fr_output_filename).resolve() # single time bin size # non-instantaneous version
assert inst_fr_output_load_filepath.exists()
# inst_fr_output_filename: str = inst_fr_output_load_filepath.name
# across_session_inst_fr_computation, across_sessions_instantaneous_fr_dict, across_sessions_instantaneous_frs_list = AcrossSessionsResults.load_across_sessions_data(global_data_root_parent_path=global_data_root_parent_path, inst_fr_output_filename=inst_fr_output_filename)
across_session_inst_fr_computation, across_sessions_instantaneous_fr_dict, across_sessions_instantaneous_frs_list = AcrossSessionsResults.load_across_sessions_data(global_data_root_parent_path=inst_fr_output_load_filepath.parent, inst_fr_output_filename=inst_fr_output_filename)

graphics_output_dict = AcrossSessionsResults.post_compute_all_sessions_processing(global_data_root_parent_path=collected_outputs_directory, output_path_suffix=output_path_suffix, plotting_enabled=False, output_override_path=Path('../../output'), inst_fr_output_filename=inst_fr_output_filename)

num_sessions = len(across_sessions_instantaneous_fr_dict)
print(f'num_sessions: {num_sessions}')

# Convert byte strings to regular strings
neuron_replay_stats_table = neuron_replay_stats_table.applymap(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
neuron_replay_stats_table

### Figure Generation

In [None]:
long_short_fr_indicies_analysis_table

In [None]:
neuron_identities_table

In [None]:
neuron_replay_stats_table

In [None]:
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import pho_stats_bar_graph_t_tests

LxC_Laps_T_result, SxC_Laps_T_result, LxC_Replay_T_result, SxC_Replay_T_result = pho_stats_bar_graph_t_tests(across_session_inst_fr_computation)

# LxC_Laps_T_result: TtestResult(statistic=5.550057784140024, pvalue=4.394229331160663e-05, df=16)
# SxC_Laps_T_result: TtestResult(statistic=-4.50982955925142, pvalue=0.001125880142367611, df=10)
# LxC_Replay_T_result: TtestResult(statistic=-0.4086778656072959, pvalue=0.6881937588138113, df=16)
# SxC_Replay_T_result: TtestResult(statistic=-3.551930809035679, pvalue=0.0052513298000637825, df=10)



# LxC_Replay_T_result is NOT p<0.05 significant (pvalue=0.6882)
# SxC_Replay_T_result IS NOT p<0.05 significant (pvalue=0.0052513298000637825)

In [None]:
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import PaperFigureTwo
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import AcrossSessionsVisualizations

# %matplotlib inline 
# %matplotlib qt5
## Plotting:
graphics_output_dict = {}

matplotlib_configuration_update(is_interactive=True, backend='Qt5Agg')

## 2023-10-04 - Run `AcrossSessionsVisualizations` corresponding to the PhoDibaPaper2023 figures for all sessions
## Hacks the `PaperFigureTwo` and `InstantaneousSpikeRateGroupsComputation`
global_multi_session_context, _out_aggregate_fig_2 = AcrossSessionsVisualizations.across_sessions_bar_graphs(across_session_inst_fr_computation, num_sessions=num_sessions, enable_tiny_point_labels=False, enable_hover_labels=False, enabled_point_connection_lines=True, save_figure=False)


In [None]:
plt.show()


In [None]:
# across_session_inst_fr_computation
## Document `InstantaneousSpikeRateGroupsComputation`
from pyphocorehelpers.print_helpers import DocumentationFilePrinter
from pyphocorehelpers.print_helpers import print_keys_if_possible

doc_output_parent_folder = Path(r'C:\Users\pho\repos\Spike3DWorkEnv\Spike3D\EXTERNAL\DEVELOPER_NOTES\DataStructureDocumentation').resolve()
Assert.path_exists(doc_output_parent_folder)
doc_printer = DocumentationFilePrinter(doc_output_parent_folder=doc_output_parent_folder, doc_name='InstantaneousSpikeRateGroupsComputation')
doc_printer.save_documentation('InstantaneousSpikeRateGroupsComputation', across_session_inst_fr_computation, non_expanded_item_keys=['_reverse_cellID_index_map'])

In [None]:
doc_printer = DocumentationFilePrinter(doc_output_parent_folder=doc_output_parent_folder, doc_name='SingleBarResult')
doc_printer.save_documentation('SingleBarResult', across_session_inst_fr_computation.Fig2_Replay_FR[0], non_expanded_item_keys=['_reverse_cellID_index_map'])
doc_printer.save_documentation('SingleBarResult', across_session_inst_fr_computation.Fig2_Replay_FR[1], non_expanded_item_keys=['_reverse_cellID_index_map'])
doc_printer.save_documentation('SingleBarResult', across_session_inst_fr_computation.Fig2_Replay_FR[2], non_expanded_item_keys=['_reverse_cellID_index_map'])
doc_printer.save_documentation('SingleBarResult', across_session_inst_fr_computation.Fig2_Replay_FR[3], non_expanded_item_keys=['_reverse_cellID_index_map'])

In [None]:
across_session_inst_fr_computation.get_summary_dataframe()

In [None]:
across_sessions_instantaneous_frs_list


### 2024-09-10 - TODO: Filter out the datapoints from the bar plot corresponding to the novel vs. non-novel sessions

In [None]:
import re
from typing import List, Tuple
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import ConciseSessionIdentifiers

# `across_session_inst_fr_computation.Fig2_Replay_FR`

novel_Fig2_Replay_FR = deepcopy(across_session_inst_fr_computation.Fig2_Replay_FR)
non_novel_Fig2_Replay_FR = deepcopy(across_session_inst_fr_computation.Fig2_Replay_FR)

# [0] and [1] both only use LxC_aclus
LxC_aclus = deepcopy(across_session_inst_fr_computation.Fig2_Replay_FR[0].LxC_aclus.tolist()) # ['a0s0_109', 'a0s1_3', 'a0s1_29', 'a0s1_103', 'a0s3_90', 'a0s4_91', 'a0s4_95', 'a1s1_23', 'a1s2_25', 'a1s3_14', 'a1s3_30', 'a1s3_32', 'a2s0_8', 'a2s0_27', 'a2s1_27']
_LxC_aclus_alt = deepcopy(across_session_inst_fr_computation.Fig2_Replay_FR[1].LxC_aclus.tolist()) # ['a0s0_109', 'a0s1_3', 'a0s1_29', 'a0s1_103', 'a0s3_90', 'a0s4_91', 'a0s4_95', 'a1s1_23', 'a1s2_25', 'a1s3_14', 'a1s3_30', 'a1s3_32', 'a2s0_8', 'a2s0_27', 'a2s1_27']
assert _LxC_aclus_alt == LxC_aclus

# [2] and [3] both only use SxC_aclus
SxC_aclus = deepcopy(across_session_inst_fr_computation.Fig2_Replay_FR[2].SxC_aclus.tolist()) # ['a0s0_109', 'a0s1_3', 'a0s1_29', 'a0s1_103', 'a0s3_90', 'a0s4_91', 'a0s4_95', 'a1s1_23', 'a1s2_25', 'a1s3_14', 'a1s3_30', 'a1s3_32', 'a2s0_8', 'a2s0_27', 'a2s1_27']
_SxC_aclus_alt = deepcopy(across_session_inst_fr_computation.Fig2_Replay_FR[3].SxC_aclus.tolist()) # ['a0s0_109', 'a0s1_3', 'a0s1_29', 'a0s1_103', 'a0s3_90', 'a0s4_91', 'a0s4_95', 'a1s1_23', 'a1s2_25', 'a1s3_14', 'a1s3_30', 'a1s3_32', 'a2s0_8', 'a2s0_27', 'a2s1_27']
assert _SxC_aclus_alt == SxC_aclus

parsed_LxC_aclus_df: pd.DataFrame = ConciseSessionIdentifiers.parse_concise_abbreviated_neuron_identifying_strings(LxC_aclus)
# parsed_LxC_aclus_df

parsed_SxC_aclus_df: pd.DataFrame = ConciseSessionIdentifiers.parse_concise_abbreviated_neuron_identifying_strings(SxC_aclus)
# parsed_SxC_aclus_df

# novel_LxC_indicies, non_novel_LxC_indicies = 
# partition_df(parsed_LxC_aclus_df, partitionColumn='is_session_novel')
# partition_df(parsed_SxC_aclus_df, partitionColumn='is_session_novel')

novel_LxC_indicies = np.where(parsed_LxC_aclus_df['is_session_novel'])[0]
non_novel_LxC_indicies = np.where(np.logical_not(parsed_LxC_aclus_df['is_session_novel']))[0]

(novel_LxC_indicies, non_novel_LxC_indicies)

novel_SxC_indicies = np.where(parsed_SxC_aclus_df['is_session_novel'])[0]
non_novel_SxC_indicies = np.where(np.logical_not(parsed_SxC_aclus_df['is_session_novel']))[0]

(novel_SxC_indicies, non_novel_SxC_indicies)

## OUTPUTS: (novel_LxC_indicies, non_novel_LxC_indicies), (novel_SxC_indicies, non_novel_SxC_indicies)
curr_idx: int = 0
novel_Fig2_Replay_FR[curr_idx].LxC_aclus = novel_Fig2_Replay_FR[curr_idx].LxC_aclus[novel_LxC_indicies]
novel_Fig2_Replay_FR[curr_idx].values = novel_Fig2_Replay_FR[curr_idx].values[novel_LxC_indicies]
curr_idx: int = 1
novel_Fig2_Replay_FR[curr_idx].LxC_aclus = novel_Fig2_Replay_FR[curr_idx].LxC_aclus[novel_LxC_indicies]
novel_Fig2_Replay_FR[curr_idx].values = novel_Fig2_Replay_FR[curr_idx].values[novel_LxC_indicies]
curr_idx: int = 2
novel_Fig2_Replay_FR[curr_idx].SxC_aclus = novel_Fig2_Replay_FR[curr_idx].SxC_aclus[novel_SxC_indicies]
novel_Fig2_Replay_FR[curr_idx].values = novel_Fig2_Replay_FR[curr_idx].values[novel_SxC_indicies]
curr_idx: int = 3
novel_Fig2_Replay_FR[curr_idx].SxC_aclus = novel_Fig2_Replay_FR[curr_idx].SxC_aclus[novel_SxC_indicies]
novel_Fig2_Replay_FR[curr_idx].values = novel_Fig2_Replay_FR[curr_idx].values[novel_SxC_indicies]

across_session_inst_fr_computation.Fig2_Replay_FR = novel_Fig2_Replay_FR

In [None]:
global_multi_session_context, _out_aggregate_fig_2 = AcrossSessionsVisualizations.across_sessions_bar_graphs(across_session_inst_fr_computation, num_sessions=num_sessions, enable_tiny_point_labels=True, enable_hover_labels=False, save_figure=False)

In [None]:
## group by 'is_novel_exposure'
## for `long_short_fr_indicies_analysis_table`
is_novel_partitioned_dfs = dict(zip(*partition_df(long_short_fr_indicies_analysis_table, partitionColumn='is_novel_exposure')))
novel_session_uids = is_novel_partitioned_dfs[True]['session_uid'].unique().tolist()
non_novel_session_uids = is_novel_partitioned_dfs[False]['session_uid'].unique().tolist()
## build dicts:
novel_only_inst_fr_dict = {k:v for k, v in across_sessions_instantaneous_fr_dict.items() if k.get_description(separator='|') in novel_session_uids}
non_novel_inst_fr_dict = {k:v for k, v in across_sessions_instantaneous_fr_dict.items() if k.get_description(separator='|') not in novel_session_uids}
# novel_only_inst_fr_dict

In [None]:
# all
# graphics_output_dict |= AcrossSessionsVisualizations.across_sessions_firing_rate_index_figure(long_short_fr_indicies_analysis_results=long_short_fr_indicies_analysis_table, num_sessions=num_sessions, save_figure=False)
# split based on session novelty:
graphics_output_dict |= AcrossSessionsVisualizations.across_sessions_firing_rate_index_figure(long_short_fr_indicies_analysis_results=is_novel_partitioned_dfs[True], num_sessions=len(novel_session_uids), save_figure=False)
graphics_output_dict |= AcrossSessionsVisualizations.across_sessions_firing_rate_index_figure(long_short_fr_indicies_analysis_results=is_novel_partitioned_dfs[False], num_sessions=len(non_novel_session_uids), save_figure=False)


In [None]:
graphics_output_dict |= AcrossSessionsVisualizations.across_sessions_long_and_short_firing_rate_replays_v_laps_figure(neuron_replay_stats_table=neuron_replay_stats_table, num_sessions=num_sessions, save_figure=True)


In [None]:
## Aggregate across all of the sessions to build a new combined `InstantaneousSpikeRateGroupsComputation`, which can be used to plot the "PaperFigureTwo", bar plots for many sessions.
global_multi_session_context = IdentifyingContext(format_name='kdiba', num_sessions=num_sessions) # some global context across all of the sessions, not sure what to put here.



In [None]:
_restore_previous_matplotlib_settings_callback = matplotlib_configuration_update(is_interactive=True, backend='Qt5Agg')
# _restore_previous_matplotlib_settings_callback = matplotlib_configuration_update(is_interactive=False, backend='AGG')

plt.show()


In [None]:
plt.close('all')    

In [None]:
np.sqrt(2)

In [None]:

# ax_histdiagonal.



In [None]:
max_num_bins = np.max(diagonal_hist_artist[0])
max_num_bins
# diagonal_hist_artist[1]

In [None]:

matplotlib_configuration_update(is_interactive=True, backend='Qt5Agg')
matplotlib_output_container: MatplotlibRenderPlots = AcrossSessionsVisualizations.across_sessions_firing_rate_index_figure(long_short_fr_indicies_analysis_results=long_short_fr_indicies_analysis_table, num_sessions=num_sessions, save_figure=False)

In [None]:
duplicates = long_short_fr_indicies_analysis_table[['x_frs_index', 'y_frs_index']].duplicated().sum()
print(f"Number of duplicate rows: {duplicates}")


In [None]:
print(long_short_fr_indicies_analysis_table[['x_frs_index', 'y_frs_index']].std())


In [None]:

# long_short_fr_indicies_analysis_table[['x_frs_index']].corrwith(long_short_fr_indicies_analysis_table['y_frs_index']) # long_short_fr_indicies_analysis_table[['x_frs_index']].corrwith(long_short_fr_indicies_analysis_table['y_frs_index'])
# Compute correlation and p-value
df_cleaned: pd.DataFrame = long_short_fr_indicies_analysis_table[['x_frs_index', 'y_frs_index']].dropna()
df_cleaned

print(df_cleaned.shape)
corr, p_value = pearsonr(df_cleaned['x_frs_index'], df_cleaned['y_frs_index'])
print(f"Correlation: {corr}, P-value: {p_value}")
# Correlation: 0.48548405144431905, P-value: 1.0243398539233502e-48



In [None]:
plt.close('all')

In [None]:
graphics_output_dict.saved_figures[0][0] #.plot_data['saved_figures']

In [None]:
from pyphocorehelpers.plotting.media_output_helpers import fig_to_clipboard

fig_to_clipboard(fig)


In [None]:
# copy_image_to_clipboard(graphics_output_dict['figures'][0])
fig_to_clipboard(matplotlib_output_container.figures[0])

In [None]:
matplotlib_configuration_update(is_interactive=True, backend='Qt5Agg')
graphics_output_dict = AcrossSessionsVisualizations.across_sessions_long_and_short_firing_rate_replays_v_laps_figure(neuron_replay_stats_table=neuron_replay_stats_table, num_sessions=num_sessions)

In [None]:
_save_matplotlib_fig(graphics_output_dict)


In [None]:
## Load the saved across-session results:
# Outputs: across_session_inst_fr_computation, across_sessions_instantaneous_fr_dict, across_sessions_instantaneous_frs_list, neuron_identities_table, long_short_fr_indicies_analysis_table, neuron_replay_stats_table

BATCH_DATE_TO_USE = f'2024-10-22'
inst_fr_output_filename: str = f'across_session_result_long_short_recomputed_inst_firing_rate_{BATCH_DATE_TO_USE}.pkl'

inst_fr_output_file = collected_outputs_directory.joinpath(inst_fr_output_filename).resolve()
Assert.path_exists(inst_fr_output_file)

across_session_inst_fr_computation, across_sessions_instantaneous_fr_dict, across_sessions_instantaneous_frs_list = AcrossSessionsResults.load_across_sessions_data(global_data_root_parent_path=collected_outputs_directory, inst_fr_output_filename=inst_fr_output_filename)
# across_sessions_instantaneous_fr_dict = loadData(global_batch_result_inst_fr_file_path)
num_sessions = len(across_sessions_instantaneous_fr_dict)
print(f'num_sessions: {num_sessions}')

## Load all across-session tables from the pickles:
output_path_suffix: str = f'{BATCH_DATE_TO_USE}'
neuron_identities_table, long_short_fr_indicies_analysis_table, neuron_replay_stats_table = AcrossSessionTables.load_all_combined_tables(override_output_parent_path=collected_outputs_directory, output_path_suffix=output_path_suffix) # output_path_suffix=f'2023-10-04-GL-Recomp'
num_sessions = len(neuron_replay_stats_table.session_uid.unique().to_numpy())
print(f'num_sessions: {num_sessions}')
# neuron_replay_stats_table


In [None]:
neptuner.stop()

## 2024-09-04 - Batch Output Files

In [12]:
from neuropy.core.user_annotations import UserAnnotationsManager
from pyphocorehelpers.Filesystem.metadata_helpers import FilesystemMetadata, get_file_metadata
from pyphocorehelpers.Filesystem.path_helpers import discover_data_files, generate_copydict, copy_movedict, copy_file, save_copydict_to_text_file, read_copydict_from_text_file, invert_filedict
from pyphoplacecellanalysis.General.Batch.runBatch import get_file_str_if_file_exists
from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import check_output_h5_files, copy_files_in_filelist_to_dest
from pyphoplacecellanalysis.General.Batch.runBatch import ConcreteSessionFolder, BackupMethods

# a_batch_progress_df = included_session_batch_progress_df.copy()
included_session_contexts
h5_contexts_paths_dict
# h5_session_contexts

debug_print = False
known_global_data_root_parent_paths = [Path(r'/nfs/turbo/umms-kdiba/Data'), Path(r'W:\Data'), Path(r'/home/halechr/cloud/turbo/Data'), Path(r'/media/halechr/MAX/Data'), Path(r'/Volumes/MoverNew/data')] # , Path(r'/home/halechr/FastData'), Path(r'/home/halechr/turbo/Data'), Path(r'W:\Data'), Path(r'/home/halechr/cloud/turbo/Data')
global_data_root_parent_path = find_first_extant_path(known_global_data_root_parent_paths)
assert global_data_root_parent_path.exists(), f"global_data_root_parent_path: {global_data_root_parent_path} does not exist! Is the right computer's config commented out above?"
## Build Pickle Path:
# Hardcoded included_session_contexts:
included_session_contexts = UserAnnotationsManager.get_hardcoded_good_sessions()
good_session_concrete_folders = ConcreteSessionFolder.build_concrete_session_folders(global_data_root_parent_path, included_session_contexts)

# Output Paths:
included_h5_paths = [get_file_str_if_file_exists(v.pipeline_results_h5) for v in good_session_concrete_folders]
# copy_dict = ConcreteSessionFolder.build_backup_copydict(good_session_concrete_folders, backup_mode=BackupMethods.RenameInSourceDirectory, only_include_file_types=['local_pkl', 'global_pkl'])
check_output_h5_files(included_h5_paths)

## OUTPUTS: included_h5_paths, included_session_contexts, good_session_concrete_folders

included_h5_paths

# included_file_types_paths_dict = {'h5': included_h5_paths, 'recomputed_inst_fr_comps': []}
# included_file_types_paths_dict['recomputed_inst_fr_comps'] = [get_file_path_if_file_exists(v.output_folder.joinpath(f'{RESULT_DATE_TO_USE}_recomputed_inst_fr_comps_0.0005.h5').resolve()) for v in good_session_concrete_folders]
# included_file_types_paths_dict


[Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'one', session_name= '2006-6-08_14-26-15'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'one', session_name= '2006-6-09_1-22-43'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'one', session_name= '2006-6-12_15-55-31'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'two', session_name= '2006-6-07_16-40-19'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'two', session_name= '2006-6-12_16-53-46'),
 Context(format_name= 'kdiba', animal= 'vvp01', exper_name= 'one', session_name= '2006-4-09_17-29-30'),
 Context(format_name= 'kdiba', animal= 'vvp01', exper_name= 'one', session_name= '2006-4-10_12-25-50'),
 Context(format_name= 'kdiba', animal= 'vvp01', exper_name= 'two', session_name= '2006-4-09_16-40-54'),
 Context(format_name= 'kdiba', animal= 'vvp01', exper_name= 'two', session_name= '2006-4-10_12-58-3'),
 Context(format_name= 'kdiba', animal= 'pin01', exper_name= 'one',

{Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'one', session_name= '2006-6-08_14-26-15'): WindowsPath('K:/scratch/collected_outputs/2024-11-07_GL_2006-6-08_14-26-15_pipeline_results.h5'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'one', session_name= '2006-6-09_1-22-43'): WindowsPath('K:/scratch/collected_outputs/2024-11-07_GL_2006-6-09_1-22-43_pipeline_results.h5'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'one', session_name= '2006-6-12_15-55-31'): WindowsPath('K:/scratch/collected_outputs/2024-11-07_GL_2006-6-12_15-55-31_pipeline_results.h5'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'two', session_name= '2006-6-07_16-40-19'): WindowsPath('K:/scratch/collected_outputs/2024-11-07_GL_2006-6-07_16-40-19_pipeline_results.h5'),
 Context(format_name= 'kdiba', animal= 'gor01', exper_name= 'two', session_name= '2006-6-12_16-53-46'): WindowsPath('K:/scratch/collected_outputs/2024-11-07_GL_2006-6-12_16-53-46_pipeline_result

Unnamed: 0,path,modification_time,creation_time,file_size
0,W:\Data\KDIBA\gor01\one\2006-6-08_14-26-15\out...,2024-09-05 07:52:47.956683,2023-08-31 22:00:50.256063,0.01
1,W:\Data\KDIBA\gor01\one\2006-6-09_1-22-43\outp...,2024-10-07 14:05:39.696000,2024-06-24 16:56:48.538135,2.06
2,W:\Data\KDIBA\gor01\one\2006-6-12_15-55-31\out...,2024-09-04 19:59:09.388734,2023-08-03 18:14:52.394542,0.85
3,W:\Data\KDIBA\gor01\two\2006-6-07_16-40-19\out...,2024-10-30 13:44:22.917765,2023-08-03 18:15:07.920659,2.11
4,W:\Data\KDIBA\gor01\two\2006-6-12_16-53-46\out...,2024-11-01 07:57:45.986552,2023-08-08 03:21:30.604896,0.68
5,W:\Data\KDIBA\vvp01\one\2006-4-09_17-29-30\out...,2024-06-12 10:03:53.694698,2023-08-08 03:21:51.139954,0.25
6,W:\Data\KDIBA\vvp01\one\2006-4-10_12-25-50\out...,2024-09-04 00:39:11.863333,2023-08-08 03:22:12.043931,0.75
7,W:\Data\KDIBA\vvp01\two\2006-4-09_16-40-54\out...,2024-09-04 00:41:56.797752,2023-08-08 03:22:32.722443,0.91
8,W:\Data\KDIBA\vvp01\two\2006-4-10_12-58-3\outp...,2024-09-04 00:45:10.419033,2023-08-08 03:22:53.658802,0.74
9,W:\Data\KDIBA\pin01\one\11-03_12-3-25\output\p...,2024-09-04 00:53:55.369443,2023-08-08 03:23:56.407699,0.54


['W:\\Data\\KDIBA\\gor01\\one\\2006-6-08_14-26-15\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\gor01\\one\\2006-6-09_1-22-43\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\gor01\\one\\2006-6-12_15-55-31\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\gor01\\two\\2006-6-07_16-40-19\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\gor01\\two\\2006-6-12_16-53-46\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\vvp01\\one\\2006-4-09_17-29-30\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\vvp01\\one\\2006-4-10_12-25-50\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\vvp01\\two\\2006-4-09_16-40-54\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\vvp01\\two\\2006-4-10_12-58-3\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\pin01\\one\\11-03_12-3-25\\output\\pipeline_results.h5',
 'W:\\Data\\KDIBA\\pin01\\one\\fet11-01_12-58-54\\output\\pipeline_results.h5']

# Plotting via Plotly
`!pip install kaleido=="v0.1.0.post1" `

In [13]:
from functools import partial
from pyphoplacecellanalysis.Pho2D.plotly.Extensions.plotly_helpers import add_copy_save_action_buttons
from pyphoplacecellanalysis.Pho2D.plotly.Extensions.plotly_helpers import plotly_pre_post_delta_scatter
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import _perform_plot_pre_post_delta_scatter
from neuropy.utils.result_context import DisplaySpecifyingIdentifyingContext

## INPUTS: earliest_delta_aligned_t_start, latest_delta_aligned_t_end
is_dark_mode, template = PlotlyHelpers.get_plotly_template(is_dark_mode=False)
# should_save: bool = True
should_save: bool = False

_perform_plot_pre_post_delta_scatter = partial(
    _perform_plot_pre_post_delta_scatter,
    time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end),
    fig_size_kwargs=fig_size_kwargs,
    is_dark_mode=is_dark_mode,
    save_plotly=save_plotly,
)

_perform_plot_pre_post_delta_scatter_with_embedded_context = partial(
    _perform_plot_pre_post_delta_scatter,
    data_context=None,
)


# def _build_filter_changed_plotly_plotting_callback_fn(df_filter: DataFrameFilter):
#     """ captures: _perform_plot_pre_post_delta_scatter_with_embedded_context, should_save, """
#     df_filter.output_widget.clear_output(wait=True)
#     fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False, should_save=should_save,
#                                                                                                                            custom_output_widget=df_filter.output_widget)
#     with df_filter.output_widget:
#         # fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False, should_save=should_save)
#         display(fig)


def _build_filter_changed_plotly_plotting_callback_fn(df_filter: DataFrameFilter):
    """ `filtered_all_sessions_all_scores_ripple_df` versions - captures: _perform_plot_pre_post_delta_scatter_with_embedded_context, should_save, """
    # df_filter.output_widget.clear_output(wait=True)
    # fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_all_scores_ripple_df), is_dark_mode=False, should_save=should_save,
    #                                                                                                                        custom_output_widget=df_filter.output_widget)
    fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_all_scores_ripple_df), is_dark_mode=False, should_save=should_save)
    
    # with df_filter.output_widget:
    #     # fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False, should_save=should_save)
    #     display(fig)
    display(fig)


## Set dataframe context metadata

def _perform_update_df_context_metadata(data_context: IdentifyingContext, concatenated_ripple_df: pd.DataFrame):
    """ sets the metadata in-place for the dataframe """
    concatenated_ripple_df.attrs.update(**dict(data_context=deepcopy(data_context)))

_perform_update_df_context_metadata(data_context=IdentifyingContext(epochs_name='laps', data_grain='per_epoch', title_prefix="Lap Per Epoch", dataframe_name='df'), concatenated_ripple_df=all_sessions_laps_df)
_perform_update_df_context_metadata(data_context=IdentifyingContext(epochs_name='laps', data_grain='per_time_bin', title_prefix="Lap Individual Time Bins", dataframe_name='time_bin_df'), concatenated_ripple_df=all_sessions_laps_time_bin_df)
_perform_update_df_context_metadata(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', title_prefix="PBE Per Epoch", dataframe_name='df'), concatenated_ripple_df = all_sessions_ripple_df)
_perform_update_df_context_metadata(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_time_bin', title_prefix="PBE Individual Time Bins", dataframe_name='time_bin_df'), concatenated_ripple_df = all_sessions_ripple_time_bin_df)
_perform_update_df_context_metadata(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', dataframe_name='MultiMeasure_ripple_df', title_prefix="MultiMeasure_ripple_df - PBE Per Epoch"), concatenated_ripple_df = all_sessions_MultiMeasure_ripple_df)
_perform_update_df_context_metadata(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', dataframe_name='all_scores_ripple_df', title_prefix="all_sessions_all_scores_ripple_df - PBE Per Epoch"), concatenated_ripple_df = all_sessions_all_scores_ripple_df)
_perform_update_df_context_metadata(data_context = IdentifyingContext(epochs_name='laps', data_grain='per_epoch', dataframe_name='MultiMeasure_laps_df', title_prefix="MultiMeasure_laps_df - Lap Per Epoch"), concatenated_ripple_df = all_sessions_MultiMeasure_laps_df)


# all_sessions_MultiMeasure_ripple_df.attrs


# Unfiltered

In [None]:


# grainularity_desc: str = 'by-time-bin'
# laps_df: pd.DataFrame = all_sessions_laps_time_bin_df
# ripple_df: pd.DataFrame = all_sessions_ripple_time_bin_df
# _laps_histogram_out, _ripple_histogram_out = _perform_plot_pre_post_delta_scatter(grainularity_desc='by-time-bin', laps_df=all_sessions_laps_time_bin_df, ripple_df=all_sessions_ripple_time_bin_df)

# ==================================================================================================================== #
# Produces all four hist/scatter/hist plotly figures with clear labels in one call                                     #
# ==================================================================================================================== #

# BEGIN CALL _________________________________________________________________________________________________________ #
new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='laps', data_grain='per_epoch', title_prefix="Lap Per Epoch", dataframe_name='df'), concatenated_ripple_df=deepcopy(all_sessions_laps_df))
# new_fig

new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='laps', data_grain='per_time_bin', title_prefix="Lap Individual Time Bins", dataframe_name='time_bin_df'), concatenated_ripple_df=deepcopy(all_sessions_laps_time_bin_df))
# new_fig

new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', title_prefix="PBE Per Epoch", dataframe_name='df'), concatenated_ripple_df = deepcopy(all_sessions_ripple_df))
# new_fig

new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_time_bin', title_prefix="PBE Individual Time Bins", dataframe_name='time_bin_df'), concatenated_ripple_df = deepcopy(all_sessions_ripple_time_bin_df))
# new_fig



# 🟢 Filtered (for specific qclus/firing_Hz/replays

## Get filtered for a particular type of replay:

```
## filter by specific set of replays:
# dfs_list = (all_sessions_ripple_df, all_sessions_ripple_time_bin_df, all_sessions_simple_pearson_ripple_df, all_sessions_wcorr_ripple_df, all_sessions_all_scores_ripple_df)

replay_name: str = ''
# replay_name: str = 'withNewComputedReplays-qclu_[1, 2]-frateThresh_5.0' # 4307 rows
# replay_name: str = 'withNewKamranExportedReplays-qclu_[1,2]-frateThresh_5.0' # 1417 rows, 1437 rows
# replay_name: str = 'withNormalComputedReplays-qclu_[1,2]-frateThresh_1.0' # 2802 rows, 2831 rows

time_bin_size: float = 0.025
# time_bin_size: float = 0.02
# time_bin_size: float = 0.01
filtered_all_sessions_ripple_df = deepcopy(all_sessions_ripple_df)[(all_sessions_ripple_df['custom_replay_name'] == replay_name) & (all_sessions_ripple_df['time_bin_size'] == time_bin_size)]
filtered_all_sessions_ripple_time_bin_df = deepcopy(all_sessions_ripple_time_bin_df)[(all_sessions_ripple_time_bin_df['custom_replay_name'] == replay_name) & (all_sessions_ripple_time_bin_df['time_bin_size'] == time_bin_size)]
filtered_all_sessions_MultiMeasure_ripple_df = deepcopy(all_sessions_MultiMeasure_ripple_df)[(all_sessions_MultiMeasure_ripple_df['custom_replay_name'] == replay_name) & (all_sessions_MultiMeasure_ripple_df['time_bin_size'] == time_bin_size)]
# filtered_all_sessions_simple_pearson_ripple_df = deepcopy(all_sessions_simple_pearson_ripple_df)[(all_sessions_simple_pearson_ripple_df['custom_replay_name'] == replay_name) & (all_sessions_simple_pearson_ripple_df['time_bin_size'] == time_bin_size)]
# filtered_all_sessions_wcorr_ripple_df = deepcopy(all_sessions_wcorr_ripple_df)[(all_sessions_wcorr_ripple_df['custom_replay_name'] == replay_name) & (all_sessions_wcorr_ripple_df['time_bin_size'] == time_bin_size)]
filtered_all_sessions_all_scores_ripple_df = deepcopy(all_sessions_all_scores_ripple_df)[(all_sessions_all_scores_ripple_df['custom_replay_name'] == replay_name) & (all_sessions_all_scores_ripple_df['time_bin_size'] == time_bin_size)]
## OUTPUTS: filtered_all_sessions_ripple_df, filtered_all_sessions_ripple_time_bin_df, filtered_all_sessions_simple_pearson_ripple_df, filtered_all_sessions_wcorr_ripple_df, filtered_all_sessions_all_scores_ripple_df
# filtered_all_sessions_simple_pearson_ripple_df
filtered_all_sessions_all_scores_ripple_df
```


In [22]:
all_sessions_all_scores_ripple_df['best_overall_quantile']


0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
         ..
24626   NaN
24627   NaN
24628   NaN
24629   NaN
24630   NaN
Name: best_overall_quantile, Length: 24631, dtype: float64

In [23]:
# pd.options.mode.copy_on_write = False
# pd.options.mode.chained_assignment = 'raise'
# pd.options.mode.chained_assignment = None
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import DataFrameFilter


min_wcorr_threshold: float = 0.7
high_pearsonr_threshold: float = 0.9
high_shuffle_score_threshold: float = 0.9

additional_filter_predicates = {
    'high_wcorr': (lambda df: np.any((df[['long_best_wcorr', 'short_best_wcorr']].abs() > min_wcorr_threshold), axis=1)),
    'user_selected': lambda df: np.all((df[['is_user_annotated_epoch', 'is_valid_epoch']]), axis=1),
    'high_pearsonr_corr': (lambda df: np.any((df[['long_LR_pf_peak_x_pearsonr', 'long_RL_pf_peak_x_pearsonr', 'short_LR_pf_peak_x_pearsonr', 'short_RL_pf_peak_x_pearsonr']].abs() > high_pearsonr_threshold), axis=1)),
    'high_shuffle_percentile_score': (lambda df: (df[['best_overall_quantile']].abs() > high_shuffle_score_threshold)),
}

# replay_name: str = ''
# replay_name: str = 'withNewComputedReplays-qclu_[1, 2]-frateThresh_5.0' # 4307 rows
# replay_name: str = 'withNewKamranExportedReplays-qclu_[1,2]-frateThresh_5.0' # 1417 rows, 1437 rows
# replay_name: str = 'withNormalComputedReplays-qclu_[1,2]-frateThresh_1.0' # 2802 rows, 2831 rows
replay_name: str = 'withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]'

# time_bin_size: float = 0.025
time_bin_size: Tuple[float] = (0.025, 0.058)

# Assuming your DataFrames are already defined:
# all_sessions_ripple_df, all_sessions_ripple_time_bin_df, etc.

df_filter: DataFrameFilter = DataFrameFilter(
    all_sessions_ripple_df=all_sessions_ripple_df,
    all_sessions_ripple_time_bin_df=all_sessions_ripple_time_bin_df,
    all_sessions_MultiMeasure_ripple_df=all_sessions_MultiMeasure_ripple_df,
    all_sessions_all_scores_ripple_df=all_sessions_all_scores_ripple_df,
    all_sessions_laps_df=all_sessions_laps_df,
    all_sessions_laps_time_bin_df=all_sessions_laps_time_bin_df,
    all_sessions_MultiMeasure_laps_df=all_sessions_MultiMeasure_laps_df,
    additional_filter_predicates=additional_filter_predicates,
    on_filtered_dataframes_changed_callback_fns={'build_filter_changed_plotly_plotting_callback_fn': _build_filter_changed_plotly_plotting_callback_fn},
)

df_filter.replay_name = replay_name # 'withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]'
# df_filter.time_bin_size = (time_bin_size, )
df_filter.time_bin_size = time_bin_size
# df_filter.time_bin_size = (0.025, 0.058)
# df_filter.on_filtered_dataframes_changed_callback_fns['build_filter_changed_plotly_plotting_callback_fn'] = _build_filter_changed_plotly_plotting_callback_fn
# df_filter.update_filters()
df_filter.on_filtered_dataframes_changed_callback_fns['build_filter_changed_plotly_plotting_callback_fn'] = _build_filter_changed_plotly_plotting_callback_fn
df_filter.update_filters()


VBox(children=(HBox(children=(Dropdown(description='Replay Name:', layout=Layout(width='500px'), options=('', …

Please select at least one Time Bin Size.
Please select at least one Time Bin Size.
DataFrames filtered with Replay Name: 'withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]' and Time Bin Sizes: [0.025, 0.058]


{'filtered_all_sessions_ripple_df': 5942,
 'filtered_all_sessions_ripple_time_bin_df': 34835,
 'filtered_all_sessions_MultiMeasure_ripple_df': 5942,
 'filtered_all_sessions_all_scores_ripple_df': 5277,
 'filtered_all_sessions_laps_df': 1810,
 'filtered_all_sessions_laps_time_bin_df': 272188,
 'filtered_all_sessions_MultiMeasure_laps_df': 1810}

k: build_filter_changed_plotly_plotting_callback_fn
num_events: 5277
figure_sup_huge_title_text: <span style='font-weight:bold; font-size:12px;'>PBE | per_epoch | all_scores_ripple_df | None</span><br><span style='font-size:12px;'>None</span>
footer_text: "PBE|per_epoch|all_scores_ripple_df|all_sessions_all_scores_ripple_df - PBE Per Epoch"


DataFrames filtered with Replay Name: 'withNormalComputedReplays-frateThresh_5.0-qclu_[1, 2, 4, 6, 7, 9]' and Time Bin Sizes: [0.025, 0.058]


{'filtered_all_sessions_ripple_df': 5942,
 'filtered_all_sessions_ripple_time_bin_df': 34835,
 'filtered_all_sessions_MultiMeasure_ripple_df': 5942,
 'filtered_all_sessions_all_scores_ripple_df': 5277,
 'filtered_all_sessions_laps_df': 1810,
 'filtered_all_sessions_laps_time_bin_df': 272188,
 'filtered_all_sessions_MultiMeasure_laps_df': 1810}

k: build_filter_changed_plotly_plotting_callback_fn
num_events: 5277
figure_sup_huge_title_text: <span style='font-weight:bold; font-size:12px;'>PBE | per_epoch | all_scores_ripple_df | None</span><br><span style='font-size:12px;'>None</span>
footer_text: "PBE|per_epoch|all_scores_ripple_df|all_sessions_all_scores_ripple_df - PBE Per Epoch"


failed to apply predicate "high_shuffle_percentile_score" to df: all_sessions_ripple_df
failed to apply predicate "high_shuffle_percentile_score" to df: all_sessions_ripple_time_bin_df
failed to apply predicate "high_shuffle_percentile_score" to df: all_sessions_MultiMeasure_ripple_df


In [25]:
df_filter.filtered_all_sessions_all_scores_ripple_df

Unnamed: 0,start,stop,label,duration,is_user_annotated_epoch,is_valid_epoch,session_name,delta_aligned_start_t,pre_post_delta_category,maze_id,P_LR,P_RL,P_Long,P_Short,P_Long_LR,congruent_dir_bins_ratio_long_LR,coverage_long_LR,direction_change_bin_ratio_long_LR,integral_second_derivative_long_LR,intercept_long_LR,jump_long_LR,longest_sequence_length_ratio_long_LR,pearsonr_long_LR,score_long_LR,speed_long_LR,stddev_of_diff_long_LR,total_congruent_direction_change_long_LR,total_variation_long_LR,travel_long_LR,velocity_long_LR,wcorr_long_LR,P_Long_RL,congruent_dir_bins_ratio_long_RL,coverage_long_RL,direction_change_bin_ratio_long_RL,integral_second_derivative_long_RL,intercept_long_RL,jump_long_RL,longest_sequence_length_ratio_long_RL,pearsonr_long_RL,score_long_RL,speed_long_RL,stddev_of_diff_long_RL,total_congruent_direction_change_long_RL,total_variation_long_RL,travel_long_RL,velocity_long_RL,wcorr_long_RL,P_Short_LR,congruent_dir_bins_ratio_short_LR,...,short_best_intercept,intercept_diff,long_best_speed,short_best_speed,speed_diff,long_best_wcorr,short_best_wcorr,wcorr_diff,long_best_pearsonr,short_best_pearsonr,pearsonr_diff,long_best_travel,short_best_travel,travel_diff,long_best_coverage,short_best_coverage,coverage_diff,long_best_jump,short_best_jump,jump_diff,long_best_longest_sequence_length_ratio,short_best_longest_sequence_length_ratio,longest_sequence_length_ratio_diff,long_best_direction_change_bin_ratio,short_best_direction_change_bin_ratio,direction_change_bin_ratio_diff,long_best_congruent_dir_bins_ratio,short_best_congruent_dir_bins_ratio,congruent_dir_bins_ratio_diff,long_best_total_congruent_direction_change,short_best_total_congruent_direction_change,total_congruent_direction_change_diff,long_best_total_variation,short_best_total_variation,total_variation_diff,long_best_integral_second_derivative,short_best_integral_second_derivative,integral_second_derivative_diff,long_best_stddev_of_diff,short_best_stddev_of_diff,stddev_of_diff_diff,time_bin_size,custom_replay_name,session_experience_rank,session_experience_orientation_rank,is_novel_exposure,is_filter_included,Long_BestDir_quantile,Short_BestDir_quantile,best_overall_quantile
1002,40.187177,40.488202,0,0.301025,,,kdiba_gor01_one_2006-6-08_14-26-15,-1171.370903,pre-delta,,0.664169,0.335831,0.544355,0.455645,0.361544,,,,,,,,0.129413,,,,,,,,0.214083,0.182811,,,,,,,,-0.360773,,,,,,,,-0.348438,0.302625,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,,,
1003,41.011916,41.359114,1,0.347197,,,kdiba_gor01_one_2006-6-08_14-26-15,-1170.546164,pre-delta,,,,,,,,,,,,,,-0.041734,,,,,,,,0.028805,,,,,,,,,-0.184740,,,,,,,,-0.416560,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,,,
1004,43.429702,43.489729,2,0.060027,,,kdiba_gor01_one_2006-6-08_14-26-15,-1168.128378,pre-delta,,,,,,,,,,,,,,0.158074,,,,,,,,-0.727450,,,,,,,,,-0.413563,,,,,,,,-0.484635,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,,,
1005,44.189868,44.279447,3,0.089579,,,kdiba_gor01_one_2006-6-08_14-26-15,-1167.368212,pre-delta,,,,,,,,,,,,,,0.063765,,,,,,,,0.185883,,,,,,,,,-0.587523,,,,,,,,-0.721453,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,,,
1006,44.588736,44.829950,4,0.241213,,,kdiba_gor01_one_2006-6-08_14-26-15,-1166.969344,pre-delta,,,,,,,,,,,,,,-0.281180,,,,,,,,-0.379002,,,,,,,,,-0.517863,,,,,,,,-0.216429,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24265,1429.924337,1430.429558,193,0.505221,,,kdiba_vvp01_two_2006-4-10_12-58-3,497.098131,post-delta,,0.492714,0.507286,0.318115,0.681885,0.156740,,,,,,,,,,,,,,,,,0.161376,,,,,,,,,,,,,,,,,0.335974,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,3,1,False,True,,,
24266,1435.942904,1436.334707,196,0.391803,,,kdiba_vvp01_two_2006-4-10_12-58-3,503.116698,post-delta,,0.392986,0.607014,0.467267,0.532733,0.183629,,,,,,,,-0.384258,,,,,,,,,0.283638,,,,,,,,-0.244800,,,,,,,,,0.209357,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,3,1,False,True,,,
24267,1437.783184,1437.959609,197,0.176425,,,kdiba_vvp01_two_2006-4-10_12-58-3,504.956978,post-delta,,0.683787,0.316213,0.572094,0.427906,0.391190,,,,,,,,,,,,,,,,,0.180904,,,,,,,,,,,,,,,,,0.292596,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,3,1,False,True,,,
24268,1445.197729,1445.522931,200,0.325202,,,kdiba_vvp01_two_2006-4-10_12-58-3,512.371523,post-delta,,,,,,,,,,,,,,0.650036,,,,,,,,,,,,,,,,,-0.459171,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,3,1,False,True,,,


In [None]:



def add_merged_complete_epoch_stats_df(df_filter, all_sessions_merged_complete_epoch_stats_df: pd.DataFrame):
    """ adds in the percentile score values to the dataframes 
    """

    # ==================================================================================================================== #
    # BEGIN FUNCTION BODY                                                                                                  #
    # ==================================================================================================================== #
    # paired_main_ripple_df: pd.DataFrame = df_filter.all_sessions_all_scores_ripple_df
    # paired_main_ripple_df: pd.DataFrame = df_filter.all_sessions_MultiMeasure_ripple_df
    # paired_main_ripple_df = _perform_add_merged_complete_epoch_stats_df(a_paired_main_ripple_df=paired_main_ripple_df, a_all_sessions_merged_complete_epoch_stats_df=all_sessions_merged_complete_epoch_stats_df)
    
    ## re-assign to the appopriate dataframe
    # df_filter.all_sessions_all_scores_ripple_df = paired_main_ripple_df
    # df_filter.all_sessions_MultiMeasure_ripple_df = paired_main_ripple_df
    
    df_filter.all_sessions_all_scores_ripple_df = _subfn_perform_add_merged_complete_epoch_stats_df(a_paired_main_ripple_df=df_filter.all_sessions_all_scores_ripple_df, a_all_sessions_merged_complete_epoch_stats_df=all_sessions_merged_complete_epoch_stats_df)
    # df_filter.all_sessions_MultiMeasure_ripple_df = _perform_add_merged_complete_epoch_stats_df(a_paired_main_ripple_df=df_filter.all_sessions_MultiMeasure_ripple_df, a_all_sessions_merged_complete_epoch_stats_df=all_sessions_merged_complete_epoch_stats_df)
    

add_merged_complete_epoch_stats_df(df_filter=df_filter, all_sessions_merged_complete_epoch_stats_df=all_sessions_merged_complete_epoch_stats_df)

In [None]:
df_filter.update_filters()

In [None]:
df_filter.update_filters()
# # assert np.size(df_filter.all_sessions_all_scores_ripple_df)[0] == np.size(filtered_temp)[0], f"np.size(df_filter.all_sessions_all_scores_ripple_df)[0]: {np.size(df_filter.all_sessions_all_scores_ripple_df)[0]} must equal np.size(filtered_temp)[0]: {np.size(filtered_temp)[0]}."
# # assert len(filtered_temp) == len(df_filter.all_sessions_all_scores_ripple_df), f"len(filtered_temp): {len(filtered_temp)}, len(df_filter.all_sessions_all_scores_ripple_df): {len(df_filter.all_sessions_all_scores_ripple_df)}"
# filtered_temp = pd.concat([df_filter.all_sessions_all_scores_ripple_df, filtered_temp], axis='columns')
# filtered_temp
# assert len(filtered_temp) == len(df_filter.all_sessions_all_scores_ripple_df), f"len(filtered_temp): {len(filtered_temp)}, len(df_filter.all_sessions_all_scores_ripple_df): {len(df_filter.all_sessions_all_scores_ripple_df)}" ## ensure they match after the fact

# ## re-assign
# df_filter.all_sessions_all_scores_ripple_df = deepcopy(filtered_temp)
# df_filter.all_sessions_all_scores_ripple_df
# df_filter.update_filtered_dataframes()

In [None]:
# df_filter.all_sessions_MultiMeasure_ripple_df
# fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(all_sessions_merged_complete_epoch_stats_relevant_df), is_dark_mode=False, should_save=False)
# new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='PBEs', data_grain='per_epoch', title_prefix="PBEs Per Epoch", dataframe_name='df'), concatenated_ripple_df=deepcopy(all_sessions_merged_complete_epoch_stats_relevant_df), variable_name='Long_BestDir_quantile')
new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='PBEs', data_grain='per_epoch', title_prefix="PBEs Per Epoch", dataframe_name='df'),
																									    concatenated_ripple_df=deepcopy(all_sessions_merged_complete_epoch_stats_relevant_df), variable_name='best_overall_quantile')
new_fig

### 2024-11-15 - Next Steps after shuffles

In [29]:
# high_percentile_score_df: pd.DataFrame = deepcopy(all_sessions_merged_complete_epoch_stats_relevant_df[all_sessions_merged_complete_epoch_stats_relevant_df['best_overall_quantile'] >= 0.9]) ## TODO: make a filter okay? Handle NaNs
# high_percentile_score_df: pd.DataFrame = deepcopy(df_filter.all_sessions_all_scores_ripple_df) # [df_filter.all_sessions_all_scores_ripple_df['best_overall_quantile'] >= 0.9] ## TODO: make a filter okay? Handle NaNs
# high_percentile_score_df: pd.DataFrame = deepcopy(df_filter.all_sessions_all_scores_ripple_df)[df_filter.all_sessions_all_scores_ripple_df['best_overall_quantile'] >= 0.9] ## TODO: make a filter okay? Handle NaNs
high_percentile_score_df: pd.DataFrame = deepcopy(df_filter.filtered_all_sessions_all_scores_ripple_df)[df_filter.filtered_all_sessions_all_scores_ripple_df['best_overall_quantile'] >= 0.9] ## TODO: make a filter okay? Handle NaNs
high_percentile_score_df
# df_filter.filtered_all_sessions_all_scores_ripple_df

# high_percentile_score_df['start']


Unnamed: 0,start,stop,label,duration,is_user_annotated_epoch,is_valid_epoch,session_name,delta_aligned_start_t,pre_post_delta_category,maze_id,P_LR,P_RL,P_Long,P_Short,P_Long_LR,congruent_dir_bins_ratio_long_LR,coverage_long_LR,direction_change_bin_ratio_long_LR,integral_second_derivative_long_LR,intercept_long_LR,jump_long_LR,longest_sequence_length_ratio_long_LR,pearsonr_long_LR,score_long_LR,speed_long_LR,stddev_of_diff_long_LR,total_congruent_direction_change_long_LR,total_variation_long_LR,travel_long_LR,velocity_long_LR,wcorr_long_LR,P_Long_RL,congruent_dir_bins_ratio_long_RL,coverage_long_RL,direction_change_bin_ratio_long_RL,integral_second_derivative_long_RL,intercept_long_RL,jump_long_RL,longest_sequence_length_ratio_long_RL,pearsonr_long_RL,score_long_RL,speed_long_RL,stddev_of_diff_long_RL,total_congruent_direction_change_long_RL,total_variation_long_RL,travel_long_RL,velocity_long_RL,wcorr_long_RL,P_Short_LR,congruent_dir_bins_ratio_short_LR,...,short_best_intercept,intercept_diff,long_best_speed,short_best_speed,speed_diff,long_best_wcorr,short_best_wcorr,wcorr_diff,long_best_pearsonr,short_best_pearsonr,pearsonr_diff,long_best_travel,short_best_travel,travel_diff,long_best_coverage,short_best_coverage,coverage_diff,long_best_jump,short_best_jump,jump_diff,long_best_longest_sequence_length_ratio,short_best_longest_sequence_length_ratio,longest_sequence_length_ratio_diff,long_best_direction_change_bin_ratio,short_best_direction_change_bin_ratio,direction_change_bin_ratio_diff,long_best_congruent_dir_bins_ratio,short_best_congruent_dir_bins_ratio,congruent_dir_bins_ratio_diff,long_best_total_congruent_direction_change,short_best_total_congruent_direction_change,total_congruent_direction_change_diff,long_best_total_variation,short_best_total_variation,total_variation_diff,long_best_integral_second_derivative,short_best_integral_second_derivative,integral_second_derivative_diff,long_best_stddev_of_diff,short_best_stddev_of_diff,stddev_of_diff_diff,time_bin_size,custom_replay_name,session_experience_rank,session_experience_orientation_rank,is_novel_exposure,is_filter_included,Long_BestDir_quantile,Short_BestDir_quantile,best_overall_quantile
1034,146.624765,146.741746,34,0.116982,,,kdiba_gor01_one_2006-6-08_14-26-15,-1064.933315,pre-delta,,,,,,,,,,,,,,-0.869983,,,,,,,,-0.323436,,,,,,,,,-0.826586,,,,,,,,-0.170160,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,0.996094,0.976562,0.996094
1042,172.619462,172.848480,42,0.229017,,,kdiba_gor01_one_2006-6-08_14-26-15,-1038.938618,pre-delta,,,,,,,,,,,,,,0.024051,,,,,,,,-0.290709,,,,,,,,,0.141592,,,,,,,,0.097829,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,0.883789,0.925781,0.925781
1048,188.797251,189.046114,48,0.248863,,,kdiba_gor01_one_2006-6-08_14-26-15,-1022.760829,pre-delta,,0.999878,0.000122,0.866134,0.133866,0.866028,,,,,,,,-0.165838,,,,,,,,0.011035,0.000106,,,,,,,,0.217475,,,,,,,,0.127308,0.133850,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,0.872070,0.925781,0.925781
1062,218.107403,218.506609,62,0.399206,,,kdiba_gor01_one_2006-6-08_14-26-15,-993.450677,pre-delta,,0.770004,0.229996,0.514188,0.485812,0.395927,,,,,,,,-0.000959,,,,,,,,-0.171656,0.118261,,,,,,,,0.289950,,,,,,,,0.440802,0.374077,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,0.953125,0.854492,0.953125
1077,306.009751,306.232778,78,0.223027,,,kdiba_gor01_one_2006-6-08_14-26-15,-905.548329,pre-delta,,0.633552,0.366448,0.548837,0.451163,0.347717,,,,,,,,0.167154,,,,,,,,0.298177,0.201120,,,,,,,,0.272604,,,,,,,,-0.035848,0.285835,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.025,withNormalComputedReplays-frateThresh_5.0-qclu...,2,1,False,True,0.983398,0.990234,0.990234
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19587,2640.086170,2640.625920,1155,0.539750,,,kdiba_pin01_one_fet11-01_12-58-54,582.860222,post-delta,,0.381356,0.618644,0.446054,0.553946,0.170105,,,,,,,,0.241009,,,,,,,,,0.275948,,,,,,,,-0.075753,,,,,,,,,0.211251,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,0,0,True,True,0.992188,0.954102,0.992188
19602,2775.125414,2775.277325,1197,0.151910,,,kdiba_pin01_one_fet11-01_12-58-54,717.899466,post-delta,,0.408134,0.591866,0.263112,0.736888,0.107385,,,,,,,,,,,,,,,,,0.155727,,,,,,,,,,,,,,,,,0.300749,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,0,0,True,True,0.963867,0.969727,0.969727
19617,2850.943257,2851.506508,1238,0.563251,,,kdiba_pin01_one_fet11-01_12-58-54,793.717309,post-delta,,0.398731,0.601269,0.452213,0.547787,0.180311,,,,,,,,,,,,,,,,,0.271902,,,,,,,,,,,,,,,,,0.218419,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,0,0,True,True,0.930664,0.944336,0.944336
19637,2945.068920,2945.482657,1294,0.413737,,,kdiba_pin01_one_fet11-01_12-58-54,887.842972,post-delta,,0.519432,0.480568,0.459393,0.540607,0.238623,,,,,,,,-0.433849,,,,,,,,,0.220769,,,,,,,,-0.374267,,,,,,,,,0.280809,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.058,withNormalComputedReplays-frateThresh_5.0-qclu...,0,0,True,True,0.861328,0.924805,0.924805


In [30]:

# new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='PBEs', data_grain='per_epoch', title_prefix="PBEs Per Epoch", dataframe_name='df'),
# 																									    concatenated_ripple_df=deepcopy(high_percentile_score_df), variable_name='best_overall_quantile')
new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='PBEs', data_grain='per_epoch', title_prefix="PBEs Per Epoch", dataframe_name='df'),
																									    concatenated_ripple_df=deepcopy(high_percentile_score_df), variable_name='P_Short')
new_fig



num_events: 265
figure_sup_huge_title_text: <span style='font-weight:bold; font-size:12px;'>PBEs | per_epoch | df | None</span><br><span style='font-size:12px;'>None</span>
footer_text: "PBEs|per_epoch|PBEs Per Epoch|df"


Box(children=(Label(value='.html', layout=Layout(width='auto')), HTML(value="<b style='font-size: smaller;'>K:…

Box(children=(Label(value='.png', layout=Layout(width='auto')), HTML(value="<b style='font-size: smaller;'>K:\…

In [None]:
## Sub-select for high-shuffle score events (0.9 or higher), use as a filter
## Plot these bad boys on the scatter

## Look at Long-like events on the filtered df that occur after delta, these should be the good examples of long-like after delta.
## Confirm that I'm shuffling appropriately for the 


In [None]:
# df_filter.all_sessions_MultiMeasure_ripple_df
fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False, should_save=False)

In [None]:
fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False, should_save=should_save)


In [None]:

n_events: int = len(df_filter.filtered_all_sessions_MultiMeasure_ripple_df)
df: pd.DataFrame = df_filter.filtered_all_sessions_MultiMeasure_ripple_df

df.attrs.update(**dict(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', dataframe_name='MultiMeasure_ripple_df', filter='user-annotated', n_events=f'{n_events} events',
                                            title_prefix=f"<b>user-annotated FILTERED</b> MultiMeasure_ripple_df - <b>PBE</b> <i>Per <b>Epoch</b></i>"),
                        ))


fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter_with_embedded_context(concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False, should_save=should_save)

In [None]:
min_wcorr_threshold: float = 0.7

df_filter.additional_filter_predicates = {
    'high_wcorr': (lambda df: np.any((df[['long_best_wcorr', 'short_best_wcorr']].abs() > min_wcorr_threshold), axis=1)),
    # 'user_selected': lambda df: np.all((df[['is_user_annotated_epoch', 'is_valid_epoch']]), axis=1),
}
df_filter.update_filters()

# df_filter.update_filtered_dataframes
fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', dataframe_name='MultiMeasure_ripple_df', filter='high-wcorr', title_prefix="<b>high-wcorr FILTERED</b> MultiMeasure_ripple_df - <b>PBE</b> <i>Per <b>Epoch</b></i>"),
                                                                                                concatenated_ripple_df = deepcopy(df_filter.filtered_all_sessions_MultiMeasure_ripple_df), is_dark_mode=False)

In [None]:
df_filter.time_bin_size
df_filter.replay_name

filter_context = df_filter.filter_context # IdentifyingContext(time_bin_sizes=df_filter.time_bin_size, custom_suffix=df_filter.replay_name)
filter_context

_out_figs_dict = {}

## INPUTS: df_filter

# BEGIN CALL _________________________________________________________________________________________________________ #
new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='laps', data_grain='per_epoch', title_prefix="FILTERED Lap Per Epoch", dataframe_name='df', **filter_context.to_dict()), concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_laps_df))
_out_figs_dict[new_fig_context] = new_fig


new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context=IdentifyingContext(epochs_name='laps', data_grain='per_time_bin', title_prefix="FILTERED Lap Individual Time Bins", dataframe_name='time_bin_df', **filter_context.to_dict()), concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_laps_time_bin_df))
_out_figs_dict[new_fig_context] = new_fig


new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', title_prefix="FILTERED PBE Per Epoch", dataframe_name='df', **filter_context.to_dict()), concatenated_ripple_df = deepcopy(df_filter.filtered_all_sessions_ripple_df))
_out_figs_dict[new_fig_context] = new_fig

new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_time_bin', title_prefix="FILTERED PBE Individual Time Bins", dataframe_name='time_bin_df', **filter_context.to_dict()), concatenated_ripple_df = deepcopy(df_filter.filtered_all_sessions_ripple_time_bin_df))
_out_figs_dict[new_fig_context] = new_fig


new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', dataframe_name='all_scores_ripple_df', filter_status='filtered', title_prefix="FILTERED  - Filtered PBE Per Epoch"),
                                                                                                        concatenated_ripple_df = deepcopy(df_filter.filtered_all_sessions_all_scores_ripple_df))
_out_figs_dict[new_fig_context] = new_fig

# new_fig, new_fig_context, _extras_output_dict, figure_out_paths = _perform_plot_pre_post_delta_scatter(data_context = IdentifyingContext(epochs_name='PBE', data_grain='per_epoch', dataframe_name='all_scores_ripple_df', filter_status='filtered', title_prefix="filtered_all_sessions_all_scores_ripple_df - Filtered PBE Per Epoch"),
# 																										concatenated_ripple_df = deepcopy(df_filter.filtered_all_sessions_all_scores_ripple_df))
# _out_figs_dict[new_fig_context] = new_fig



# OUTPUTS: _out_figs_dict

In [None]:
## INPUTS: _out_figs_dict
all_img_list = []
laps_list = []
PBEs_list = []

for a_context, a_fig in _out_figs_dict.items():
    fig_img = figure_to_pil_image(a_fig=a_fig)
    epochs_name: str = a_context.epochs_name

    if 'laps' in epochs_name:
        laps_list.append(fig_img)
        
    if 'PBE' in epochs_name:
        PBEs_list.append(fig_img)
        
    
# all_img_list
## OUTPUTS: laps_list, PBEs_list

In [None]:

PBEs_img = vertical_image_stack(PBEs_list)
PBEs_img
copy_image_to_clipboard(combined_all_img)


In [None]:
laps_img = vertical_image_stack(laps_list)
laps_img
copy_image_to_clipboard(laps_img)


In [None]:
# # combined_all_img = horizontal_image_stack([figure_to_pil_image(a_fig=a_fig) for a_fig in [fig_laps, fig_ripples]])
# combined_all_img = vertical_image_stack([
# 	horizontal_image_stack([fig_laps, fig_ripples]),
# 	# horizontal_image_stack(PBEs_list)
# ])
# combined_all_img

copy_image_to_clipboard(combined_all_img)

In [None]:

# Example usage:
all_session_figures = plot_across_sessions_scatter_results(collected_outputs_directory, concatenated_laps_df=deepcopy(df_filter.filtered_all_sessions_laps_df), concatenated_ripple_df=deepcopy(df_filter.filtered_all_sessions_ripple_df),
                                                           enabled_time_bin_sizes=df_filter.time_bin_size,
                                                           earliest_delta_aligned_t_start=earliest_delta_aligned_t_start, latest_delta_aligned_t_end=latest_delta_aligned_t_end,
                                                           laps_title_prefix=f"FILTERED_Laps", ripple_title_prefix=f"FILTERED_Ripples", save_figures=True, figure_save_extension=['.html','.png'], is_dark_mode=is_dark_mode)
fig_laps, fig_ripples = all_session_figures[0]
# fig_laps.update_layout(fig_size_kwargs)
# fig_ripples.update_layout(fig_size_kwargs)

fig = fig_ripples


In [None]:
df_filter.time_bin_size
df_filter.replay_name

filter_context = df_filter.filter_context # IdentifyingContext(time_bin_sizes=df_filter.time_bin_size, custom_suffix=df_filter.replay_name)
filter_context

footer_text: str = filter_context.get_description(separator='|', subset_excludelist=['time_bin_sizes'])

print(f'footer_text: "{footer_text}"')

# Add footer annotation
fig = fig.update_layout(
    annotations=[
        dict(
            text=footer_text,
            **{'x': 0.5, 'y': -0.19, 'xref': 'paper', 'yref': 'paper', 'showarrow': False, 'font': {'size': 11, 'color': 'gray'}, 'textangle': 0, 'xanchor': 'center', 'yanchor': 'middle'},
        )
    ],
    margin=dict(b=60)  # Increase bottom margin to accommodate the footer
)


# fig_laps.show()
# fig_ripples.show()
# fig_laps.write_html(f"../output/{TODAY_DAY_DATE}_AcrossSession_fig_laps.html")
# fig_ripples.write_html(f"../output/{TODAY_DAY_DATE}_AcrossSession_fig_ripples.html")
full_fig = fig.full_figure_for_development()
full_fig.to_json(collected_outputs_directory.joinpath('figures/11-Sessions_filtered_ripples_marginal.json'))


In [None]:
# variable_name = 'short_best_direction_change_bin_ratio'
# variable_name = 'long_best_congruent_dir_bins_ratio'
variable_name = 'short_best_wcorr'
# 'color':'is_user_annotated_epoch'
# 'color': 'is_user_annotated_epoch', 
px_scatter_kwargs = {'x': 'delta_aligned_start_t', 'y': variable_name, 'title': f"'{variable_name}'"} # , 'color': 'time_bin_size', 'range_y': [-1.0, 1.0], , 'labels': {'session_name': 'Session', 'time_bin_size': 'tbin_size', 'is_user_annotated_epoch':'user_sel'}
# hist_kwargs = dict(color="time_bin_size")
hist_kwargs = dict(color="pre_post_delta_category") # , histnorm='probability density'
new_fig_ripples, figure_context = plotly_pre_post_delta_scatter(data_results_df=concatenated_ripple_df, out_scatter_fig=None, histogram_bins=histogram_bins,
                        px_scatter_kwargs=px_scatter_kwargs, histogram_variable_name=variable_name, hist_kwargs=hist_kwargs, forced_range_y=None,
                        time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end), is_dark_mode=is_dark_mode)
_extras_output_dict = {}
_extras_output_dict["y_mid_line"] = new_fig_ripples.add_hline(y=0.0, line=dict(color="rgba(0.8,0.8,0.8,.75)", width=2), row='all', col='all')
new_fig_ripples.update_layout(fig_size_kwargs)
new_fig_ripples.show()
figure_out_paths = save_plotly(a_fig=new_fig_ripples, a_fig_context=figure_context)


In [None]:
# variable_name = 'total_congruent_direction_change_diff'
# variable_name = 'long_best_congruent_dir_bins_ratio'
# variable_name = 'long_best_total_congruent_direction_change'
variable_name = 'wcorr_diff'
# variable_name = 'long_best_wcorr'
# 'color':'is_user_annotated_epoch'
# 'color': 'is_user_annotated_epoch', 
px_scatter_kwargs = {'x': 'delta_aligned_start_t', 'y': variable_name, 'color':"is_user_annotated_epoch", 'title': f"'{variable_name}'", 'labels': {'session_name': 'Session', 'time_bin_size': 'tbin_size', 'is_user_annotated_epoch':'user_sel'}} # , 'color': 'time_bin_size', 'range_y': [-1.0, 1.0], 
# hist_kwargs = dict(color="time_bin_size")
hist_kwargs = dict(color="is_user_annotated_epoch") # , histnorm='probability density'
new_fig_ripples, figure_context = plotly_pre_post_delta_scatter(data_results_df=concatenated_ripple_df, out_scatter_fig=None, histogram_bins=histogram_bins,
                        px_scatter_kwargs=px_scatter_kwargs, histogram_variable_name=variable_name, hist_kwargs=hist_kwargs, forced_range_y=None,
                        time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end), is_dark_mode=is_dark_mode)
_extras_output_dict = {}
_extras_output_dict["y_mid_line"] = new_fig_ripples.add_hline(y=0.0, line=dict(color="rgba(0.8,0.8,0.8,.75)", width=2), row='all', col='all')
new_fig_ripples.update_layout(fig_size_kwargs)
new_fig_ripples.show()

figure_context = figure_context.adding_context_if_missing(num_sessions=num_sessions, plot_type='scatter+hist', comparison='pre-post-delta', variable_name=variable_name)
figure_out_paths = save_plotly(a_fig=new_fig_ripples, a_fig_context=figure_context)


## Stats Tests

In [None]:
from pyphoplacecellanalysis.Pho2D.statistics_plotting_helpers import _perform_stats_tests

stats_variable_name: str = 'P_Short'
# stats_variable_name: str = 'short_best_direction_change_bin_ratio'
# stats_variable_name: str = 'short_best_wcorr'

shuffle_results, p_value, f_value, (dof1, dof2), (variance1, variance2) = _perform_stats_tests(deepcopy(concatenated_ripple_df), stats_variable_name=stats_variable_name)


In [None]:
# Show that wcorr in both periods is higher than shuffles

stats_variable_name: str = 'P_Short'
stats_variable_name = 'short_best_wcorr'
# stats_variable_name = 'long_best_wcorr'
# stats_variable_name = 'long_best_wcorr'



shuffle_results, p_value, f_value, (dof1, dof2), (variance1, variance2) = _perform_stats_tests(deepcopy(concatenated_ripple_df), stats_variable_name=stats_variable_name)


# stats_variable_name: "short_best_wcorr" -- actual_diff_means: -0.00983910691641765
# stats_variable_name: short_best_wcorr
# Statistics=72308.00, p=0.73
# Do not Reject Null Hypothesis (No significant difference between two samples)
# Variance 1: 0.1395112660465373
# Variance 2: 0.18436187114204847
# Degree of freedom 1: 395
# Degree of freedom 2: 359
# F-statistic: 0.756725157877388
# p-value: 0.003419223265796241

# stats_variable_name: "long_best_wcorr" -- actual_diff_means: -0.0028337579937901397
# stats_variable_name: long_best_wcorr
# Statistics=71529.00, p=0.93
# Do not Reject Null Hypothesis (No significant difference between two samples)
# Variance 1: 0.1659575407149896
# Variance 2: 0.20687539745971859
# Degree of freedom 1: 395
# Degree of freedom 2: 359
# F-statistic: 0.8022101359215698
# p-value: 0.016221081810852238

In [None]:
## User non-selected:
scatter_title = f'user_approved_ripple_df Several Sessions {variable_name}'
# variable_name = 'wcorr_abs_diff'
px_scatter_kwargs = {'x': 'delta_aligned_start_t', 'y': variable_name, 'title': scatter_title, 'range_y': [0.0, 1.0], 'labels': {'session_name': 'Session', 'time_bin_size': 'tbin_size'}} # , 'color': 'time_bin_size'
new_fig_ripples, figure_context = plotly_pre_post_delta_scatter(data_results_df=deepcopy(user_approved_ripple_df), out_scatter_fig=None, histogram_bins=histogram_bins,
                        px_scatter_kwargs=px_scatter_kwargs, histogram_variable_name=variable_name, forced_range_y=None,
                        time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end), is_dark_mode=is_dark_mode)
_extras_output_dict["y_mid_line"] = new_fig_ripples.add_hline(y=0.5, line=dict(color="rgba(0.8,0.8,0.8,.75)", width=2), row='all', col='all')
new_fig_ripples.update_layout(fig_size_kwargs)
new_fig_ripples

In [None]:
# IDEA: The ones with clear replays (diagonal sequences in the decoded posteriors) are by definiition ambiguous, because there's not much difference between the long/short decoders.


In [None]:
## User non-selected:
scatter_title = f'Non-selected Several Sessions {variable_name}'
# variable_name = 'wcorr_abs_diff'
px_scatter_kwargs = {'x': 'delta_aligned_start_t', 'y': variable_name, 'title': scatter_title, 'range_y': [0.0, 1.0], 'labels': {'session_name': 'Session', 'time_bin_size': 'tbin_size'}} # , 'color': 'time_bin_size'
new_fig_ripples, figure_context = plotly_pre_post_delta_scatter(data_results_df=deepcopy(user_rejected_ripple_df), out_scatter_fig=None, histogram_bins=histogram_bins,
                        px_scatter_kwargs=px_scatter_kwargs, histogram_variable_name=variable_name, forced_range_y=None,
                        time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end))
new_fig_ripples.update_layout(fig_size_kwargs)
new_fig_ripples


In [None]:
# Laps test
concatenated_ripple_df = deepcopy(all_sessions_MultiMeasure_laps_df)

scatter_title = 'Several Sessions'
variable_name = 'wcorr_abs_diff'
px_scatter_kwargs = {'x': 'delta_aligned_start_t', 'y': variable_name, 'title': scatter_title, 'range_y': [0.0, 1.0], 'labels': {'session_name': 'Session', 'time_bin_size': 'tbin_size'}} 
new_fig_ripples, figure_context = plotly_pre_post_delta_scatter(data_results_df=deepcopy(concatenated_ripple_df), out_scatter_fig=None, histogram_bins=histogram_bins,
                        px_scatter_kwargs=px_scatter_kwargs, histogram_variable_name=variable_name, forced_range_y=None,
                        time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end))
new_fig_ripples.update_layout(fig_size_kwargs)
new_fig_ripples


In [None]:
num_unique_sessions: int = len(all_sessions_laps_df['session_name'].unique())
num_unique_sessions


In [None]:
num_unique_sessions: int = len(all_sessions_ripple_df['session_name'].unique())
num_unique_sessions

# Plotting functions

In [None]:
from pyphoplacecellanalysis.General.Model.Configs.LongShortDisplayConfig import PlottingHelpers
# from pyphoplacecellanalysis.SpecificResults.AcrossSessionResults import plot_across_sessions_scatter_results

# Example usage:
all_session_figures = plot_across_sessions_scatter_results(collected_outputs_directory, concatenated_laps_df=all_sessions_laps_df, concatenated_ripple_df=all_sessions_ripple_df,
                                                           enabled_time_bin_sizes=[0.03, 0.10],
                                                           earliest_delta_aligned_t_start=earliest_delta_aligned_t_start, latest_delta_aligned_t_end=latest_delta_aligned_t_end,
                                                           laps_title_prefix=f"Laps", ripple_title_prefix=f"Ripples", save_figures=True, figure_save_extension=['.html','.png'], is_dark_mode=is_dark_mode)
fig_laps, fig_ripples = all_session_figures[0]
# fig_laps.update_layout(fig_size_kwargs)
# fig_ripples.update_layout(fig_size_kwargs)

# fig_laps.show()
fig_ripples.show()
# fig_laps.write_html(f"../output/{TODAY_DAY_DATE}_AcrossSession_fig_laps.html")
# fig_ripples.write_html(f"../output/{TODAY_DAY_DATE}_AcrossSession_fig_ripples.html")


In [None]:
figure_context = figure_context.adding_context_if_missing(num_sessions=num_sessions, plot_type='scatter+hist', comparison='pre-post-delta', variable_name=variable_name)



In [None]:
fig_to_clipboard(fig_ripples)

In [None]:
## time_bin version:
all_time_bin_session_figures = plot_across_sessions_scatter_results(collected_outputs_directory, concatenated_laps_df=all_sessions_laps_time_bin_df, concatenated_ripple_df=all_sessions_ripple_time_bin_df,
                                                        #    enabled_time_bin_sizes=[0.03, 0.10],
                                                           earliest_delta_aligned_t_start=earliest_delta_aligned_t_start, latest_delta_aligned_t_end=latest_delta_aligned_t_end,
                                                           main_plot_mode='separate_row_per_session',
                                                           laps_title_prefix=f"Laps_per_time_bin", ripple_title_prefix=f"Ripples_per_time_bin", save_figures=True, figure_save_extension=['.html','.png'], is_dark_mode=is_dark_mode)
fig_time_bin_laps, fig_time_bin_ripples = all_time_bin_session_figures[0]
# fig_time_bin_laps.show()
fig_time_bin_ripples.show()

In [None]:
from pyphoplacecellanalysis.Pho2D.plotly.Extensions.plotly_helpers import plot_across_sessions_scatter_results

## Test collapsed histograms-only results:
histograms_only_all_time_bin_session_figures = plot_across_sessions_scatter_results(collected_outputs_directory, concatenated_laps_df=all_sessions_laps_time_bin_df, concatenated_ripple_df=all_sessions_ripple_time_bin_df,
                                                        #    enabled_time_bin_sizes=[0.03, 0.10],
                                                            # enabled_time_bin_sizes=[0.03, 0.058, 0.10], # [0.03 , 0.044, 0.058, 0.072, 0.086, 0.1]
                                                           earliest_delta_aligned_t_start=earliest_delta_aligned_t_start, latest_delta_aligned_t_end=latest_delta_aligned_t_end,
                                                           main_plot_mode='default',
                                                           laps_title_prefix=f"Laps_per_time_bin", ripple_title_prefix=f"Ripples_per_time_bin", save_figures=False, figure_save_extension=['.html','.png'])
histograms_only_fig_time_bin_laps, histograms_only_fig_time_bin_ripples = histograms_only_all_time_bin_session_figures[0]
# histograms_only_fig_time_bin_laps.show()
histograms_only_fig_time_bin_ripples.show()

# Matplotlib-based versions:

In [15]:
from pyphoplacecellanalysis.Pho2D.statistics_plotting_helpers import plot_histograms_across_sessions, plot_stacked_histograms
from pyphoplacecellanalysis.SpecificResults.PhoDiba2023Paper import _perform_dual_hist_plot
matplotlib.use('Qt5Agg')


_perform_dual_hist_plot = partial(
    _perform_dual_hist_plot,
    # time_delta_tuple=(earliest_delta_aligned_t_start, 0.0, latest_delta_aligned_t_end),
    legend_groups_to_solo=[0.025, 0.058], legend_groups_to_hide=None,
    # legend_groups_to_solo=None, legend_groups_to_hide=[0.03, 0.0444, 0.05],
)



## 2024-09-27 - Really goood Matplotlib plotting code
![image.png](attachment:image.png)

In [None]:
matplotlib.use('Qt5Agg')
# grainularity_desc: str = 'by-time-bin'
# laps_df: pd.DataFrame = all_sessions_laps_time_bin_df
# ripple_df: pd.DataFrame = all_sessions_ripple_time_bin_df
_out_figs_dict = {}
_laps_histogram_out, _ripple_histogram_out = _perform_dual_hist_plot(grainularity_desc='by-time-bin', laps_df=all_sessions_laps_time_bin_df, ripple_df=all_sessions_ripple_time_bin_df,
                                                                    #   legend_groups_to_solo=[0.025, 0.058], legend_groups_to_hide=None,
                                                                    #   legend_groups_to_solo=None, legend_groups_to_hide=[0.03, 0.0444, 0.05],
                                                                      )

_out_figs_dict[_laps_histogram_out.context.descriptor_str] = _laps_histogram_out.figures[0]
_out_figs_dict[_ripple_histogram_out.context.descriptor_str] = _ripple_histogram_out.figures[0]

# grainularity_desc: str = 'by-epoch'
# laps_df: pd.DataFrame = all_sessions_laps_df
# ripple_df: pd.DataFrame = all_sessions_ripple_df
_laps_histogram_out, _ripple_histogram_out = _perform_dual_hist_plot(grainularity_desc='by-epoch', laps_df=all_sessions_laps_df, ripple_df=all_sessions_ripple_df)
_out_figs_dict[_laps_histogram_out.context.descriptor_str] = _laps_histogram_out.figures[0]
_out_figs_dict[_ripple_histogram_out.context.descriptor_str] = _ripple_histogram_out.figures[0]
_out_figs_dict

In [None]:

# display(_laps_histogram_out)
# display(_ripple_histogram_out)
fig_to_clipboard(_laps_histogram_out.figures[0], bbox_inches='tight')


In [None]:
fig_to_clipboard(_ripple_histogram_out.figures[0], bbox_inches='tight')

In [None]:

fig_to_clipboard(_laps_histogram_out.figures[0], bbox_inches='tight')

In [None]:
fig_to_clipboard(_ripple_histogram_out.figures[0], bbox_inches='tight')

In [None]:
from pyphocorehelpers.plotting.media_output_helpers import figure_to_pil_image, vertical_image_stack, horizontal_image_stack, image_grid

# fig_img = figure_to_pil_image(a_fig=_ripple_histogram_out.figures[0])
# fig_img

all_img_list = []
laps_list = []
PBEs_list = []

for a_key, a_fig in _out_figs_dict.items():
    fig_img = figure_to_pil_image(a_fig=a_fig)

    if 'Laps' in a_key:
        laps_list.append(fig_img)
        
    if 'PBEs' in a_key:
        PBEs_list.append(fig_img)
        
    all_img_list.append(fig_img)
    
# all_img_list

# vertical_image_stack(laps_list)
combined_all_img = vertical_image_stack([
    horizontal_image_stack(laps_list),
    horizontal_image_stack(PBEs_list)
])
combined_all_img

copy_image_to_clipboard(combined_all_img)