In [1]:
%config IPCompleter.use_jedi = False
%pdb off
%load_ext autoreload
%autoreload 3
import sys
from copy import deepcopy
from typing import List, Dict, Tuple, Optional, Union, Callable
import pandas as pd
import re
from pathlib import Path
from datetime import datetime
import plotly.express as px

def find_csv_files(directory: str):
    directory_path = Path(directory) # Convert string path to a Path object
    return list(directory_path.glob('**/*.csv')) # Return a list of all .csv files in the directory and its subdirectories

# found_session_export_paths = [Path(v).resolve() for v in  ["C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0420PM-kdiba_pin01_one_fet11-01_12-58-54-(laps_marginals_df).csv",
# "C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0420PM-kdiba_pin01_one_fet11-01_12-58-54-(ripple_marginals_df).csv",
# "C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0645PM-kdiba_pin01_one_fet11-01_12-58-54-(laps_marginals_df).csv",
# "C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0645PM-kdiba_pin01_one_fet11-01_12-58-54-(ripple_marginals_df).csv",
# "C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0828PM-kdiba_pin01_one_fet11-01_12-58-54-(laps_marginals_df).csv",
# "C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0828PM-kdiba_pin01_one_fet11-01_12-58-54-(ripple_marginals_df).csv",
# ]]

def parse_filename(path: Path, debug_print:bool=False) -> Tuple[datetime, str, str]:
    """ 
    # # from the found_session_export_paths, get the most recently exported laps_csv, ripple_csv (by comparing `export_datetime`) for each session (`session_str`)
    # a_export_filename: str = "2024-01-12_0420PM-kdiba_pin01_one_fet11-01_12-58-54-(laps_marginals_df).csv"
    # export_datetime = "2024-01-12_0420PM"
    # session_str = "kdiba_pin01_one_fet11-01_12-58-54"
    # export_file_type = "(laps_marginals_df)" # .csv

    # # return laps_csv, ripple_csv
    # laps_csv = Path("C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0828PM-kdiba_pin01_one_fet11-01_12-58-54-(laps_marginals_df).csv").resolve()
    # ripple_csv = Path("C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0828PM-kdiba_pin01_one_fet11-01_12-58-54-(ripple_marginals_df).csv").resolve()

    """
    filename = path.stem   # Get filename without extension
    
    pattern = r"(.*_\d{2}\d{2}[APMF]{2})-(.*)-(\(.+\))"
    match = re.match(pattern, filename)
    
    if match is None:
        if debug_print:
            print(f'did not match pattern with time.')
        # day_date_only_pattern = r"(.*(?:_\d{2}\d{2}[APMF]{2})?)-(.*)-(\(.+\))"
        day_date_only_pattern = r"(\d{4}-\d{2}-\d{2})-(.*)-(\(.+\))" # 
        day_date_only_match = re.match(day_date_only_pattern, filename) # '2024-01-04-kdiba_gor01_one_2006-6-08_14-26'        
        if day_date_only_match is None:
            raise ValueError(f'Could not parse filename: {filename}')
        
        export_datetime_str, session_str, export_file_type = day_date_only_match.groups()
        # print(export_datetime_str, session_str, export_file_type)
        # parse the datetime from the export_datetime_str and convert it to datetime object
        export_datetime = datetime.strptime(export_datetime_str, "%Y-%m-%d")
    else:
        export_datetime_str, session_str, export_file_type = match.groups()
        # parse the datetime from the export_datetime_str and convert it to datetime object
        export_datetime = datetime.strptime(export_datetime_str, "%Y-%m-%d_%I%M%p")
        
    # Trim the brackets from the file type
    export_file_type = export_file_type[1:-1]
    return export_datetime, session_str, export_file_type


debug_print: bool = False

Automatic pdb calling has been turned OFF


In [2]:

## BEGIN
csv_files = find_csv_files("C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs")
found_session_export_paths = csv_files
found_session_export_paths

# parse all filenames and store in a list of tuples
parsed_paths = [(*parse_filename(p), p) for p in found_session_export_paths]
# sort the list by datetime, descending
parsed_paths.sort(reverse=True)

if debug_print:
    print(f'parsed_paths: {parsed_paths}')

sessions = {}
for export_datetime, session_str, file_type, path in parsed_paths:
    if session_str not in sessions:
        sessions[session_str] = {}
    
    # if this file type hasn't been set for this session, or if this file is more recent, set this file as the most recent
    if (file_type not in sessions[session_str]) or (sessions[session_str][file_type][-1] < export_datetime):
        sessions[session_str][file_type] = (path, export_datetime)
        
# now sessions is a dictionary where the key is the session_str and the value is another dictionary.
# This inner dictionary's key is the file type and the value is the most recent path for this combination of session and file type
# Thus, laps_csv and ripple_csv can be obtained from the dictionary for each session

final_sessions = {}
final_sessions_loaded_laps_dict = {}
final_sessions_loaded_ripple_dict = {}
for session_str, session_dict in sessions.items():
	final_sessions[session_str] = {}
	for file_type, (a_path, an_export_datetime) in session_dict.items():
		final_sessions[session_str][file_type] = a_path
		
	session_name = str(session_str)  # Extract session name from the filename
	if debug_print:
		print(f'processing session_name: {session_name}')

	laps_file = final_sessions[session_str]['laps_marginals_df']
	ripple_file = final_sessions[session_str]['ripple_marginals_df']

	laps_df = pd.read_csv(laps_file)
	laps_df['session_name'] = session_name # fixed session name for all rows in df
	final_sessions_loaded_laps_dict[session_str] = laps_df
	
	ripple_df = pd.read_csv(ripple_file)
	ripple_df['session_name'] = session_name # fixed session name for all rows in df
	final_sessions_loaded_ripple_dict[session_str] = ripple_df
	
final_sessions
# {'kdiba_gor01_one_2006-6-08_14-26-15': {'ripple_marginals_df': WindowsPath('C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-17_0540PM-kdiba_gor01_one_2006-6-08_14-26-15-(ripple_marginals_df).csv'),
#   'laps_marginals_df': WindowsPath('C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-17_0540PM-kdiba_gor01_one_2006-6-08_14-26-15-(laps_marginals_df).csv')},
#  'kdiba_gor01_one_2006-6-09_1-22-43': {'ripple_marginals_df': WindowsPath('C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0838PM-kdiba_gor01_one_2006-6-09_1-22-43-(ripple_marginals_df).csv'),
#   'laps_marginals_df': WindowsPath('C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0838PM-kdiba_gor01_one_2006-6-09_1-22-43-(laps_marginals_df).csv')},
#  'kdiba_pin01_one_fet11-01_12-58-54': {'ripple_marginals_df': WindowsPath('C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0828PM-kdiba_pin01_one_fet11-01_12-58-54-(ripple_marginals_df).csv'),
#   'laps_marginals_df': WindowsPath('C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs/2024-01-12_0828PM-kdiba_pin01_one_fet11-01_12-58-54-(laps_marginals_df).csv')}}

all_sessions_laps_df: pd.DataFrame = pd.concat(list(final_sessions_loaded_laps_dict.values()), axis='index', ignore_index=True)
all_sessions_ripple_df: pd.DataFrame = pd.concat(list(final_sessions_loaded_ripple_dict.values()), axis='index', ignore_index=True)
all_sessions_laps_df

Unnamed: 0.1,Unnamed: 0,P_LR,P_RL,P_Long,P_Short,lap_idx,lap_start_t,session_name
0,0,0.159733,0.840267,0.785922,0.214078,0,5.635867,kdiba_gor01_one_2006-6-08_14-26-15
1,1,0.812745,0.187255,0.892090,0.107910,1,31.862536,kdiba_gor01_one_2006-6-08_14-26-15
2,2,0.155300,0.844700,0.808182,0.191818,2,135.801698,kdiba_gor01_one_2006-6-08_14-26-15
3,3,0.868285,0.131715,0.894672,0.105328,3,161.458825,kdiba_gor01_one_2006-6-08_14-26-15
4,4,0.211556,0.788444,0.673036,0.326964,4,234.465983,kdiba_gor01_one_2006-6-08_14-26-15
...,...,...,...,...,...,...,...,...
1241,73,0.134574,0.865426,0.192422,0.807578,73,1131.634107,kdiba_gor01_two_2006-6-08_21-16-25
1242,74,0.894798,0.105202,0.309724,0.690276,74,1143.011285,kdiba_gor01_two_2006-6-08_21-16-25
1243,75,0.120073,0.879927,0.267458,0.732542,75,1157.693675,kdiba_gor01_two_2006-6-08_21-16-25
1244,76,0.807875,0.192125,0.332365,0.667635,76,1164.367187,kdiba_gor01_two_2006-6-08_21-16-25


In [5]:
def plot_across_sessions_results(directory, concatenated_laps_df, concatenated_ripple_df, save_figures=False, figure_save_extension='.png'):
    """ takes the directory containing the .csv pairs that were exported by `export_marginals_df_csv`
    Produces and then saves figures out the the f'{directory}/figures/' subfolder

    """
    if not isinstance(directory, Path):
        directory = Path(directory).resolve()
    assert directory.exists()
    print(f'plot_across_sessions_results(directory: {directory})')
    if save_figures:
        # Create a 'figures' subfolder if it doesn't exist
        figures_folder = Path(directory, 'figures')
        figures_folder.mkdir(parents=False, exist_ok=True)
        assert figures_folder.exists()
        print(f'\tfigures_folder: {figures_folder}')
    
    # Create an empty list to store the figures
    all_figures = []

    # Create a bubble chart for laps
    fig_laps = px.scatter(concatenated_laps_df, x='lap_start_t', y='P_Long', title=f"Laps - Session: {session_name}", color='session_name')
    # Create a bubble chart for ripples
    fig_ripples = px.scatter(concatenated_ripple_df, x='ripple_start_t', y='P_Long', title=f"Ripples - Session: {session_name}", color='session_name')

    if save_figures:
        # Save the figures to the 'figures' subfolder
        print(f'\tsaving figures...')
        fig_laps_name = Path(figures_folder, f"{session_name}_laps_marginal{figure_save_extension}").resolve()
        print(f'\tsaving "{fig_laps_name}"...')
        fig_laps.write_image(fig_laps_name)
        fig_ripple_name = Path(figures_folder, f"{session_name}_ripples_marginal{figure_save_extension}").resolve()
        print(f'\tsaving "{fig_ripple_name}"...')
        fig_ripples.write_image(fig_ripple_name)
    
    # Append both figures to the list
    all_figures.append((fig_laps, fig_ripples))
    
    return all_figures

# Example usage:
# directory = '/home/halechr/FastData/collected_outputs/'
# directory = r'C:\Users\pho\Desktop\collected_outputs'
directory = r'C:/Users/pho/repos/Spike3DWorkEnv/Spike3D/output/collected_outputs'
all_session_figures = plot_across_sessions_results(directory, concatenated_laps_df=all_sessions_laps_df, concatenated_ripple_df=all_sessions_ripple_df, save_figures=False)

# Show figures for all sessions
for fig_laps, fig_ripples in all_session_figures:
    fig_laps.show()
    fig_ripples.show()
    fig_laps.write_html("../output/2024-01-18_AcrossSession_fig_laps.html")
    fig_ripples.write_html("../output/2024-01-18_AcrossSession_fig_ripples.html")


plot_across_sessions_results(directory: C:\Users\pho\repos\Spike3DWorkEnv\Spike3D\output\collected_outputs)
