# Prototype code for summary analysis of sparsley labeled data

* 12/14/2024 Setup

In [None]:
import metadata
from os.path import join, exists
from os import walk
from glob import glob

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def find_cell_folders(data_folder: str) -> list:
    """"Finds the cell folders in a given data folder."""
    
    if not exists(data_folder):
        print(f"Path does not exist: {data_folder}")
        return []
    cell_folders = []
    for root, dirs, files in walk(data_folder):
        # print(f"Checking in: {root}")
        # print(f"Subdirectories: {dirs}")
        for d in dirs:
            if d.startswith('cell'):
                cell_folders.append(join(root, d))
    return cell_folders

def find_matching_files(folder: str, pattern: str = "pearson_awake_NREM_*.csv") -> list:
    """
    Finds files in a given folder that match a specific pattern.
    By default, matches files that start with 'pearson_awake_NREM_' and end with '.csv'.
    
    Parameters:
    - folder (str): The folder to search in.
    - pattern (str): The pattern to match (default: 'pearson_awake_NREM_*.csv').
    
    Returns:
    - list: A list of full paths to the matching files.
    """
    if not exists(folder):
        print(f"Path does not exist: {folder}")
        return []

    # Use glob to find matching files
    matching_files = glob(join(folder, pattern))
    return matching_files

## Compiling a single list for all the data

In [None]:
clean_data_paths = metadata.CLEAN_DATA_PATHS
cell_folders_nested = [find_cell_folders(data_folder) for data_folder in clean_data_paths]
cell_folders = [item for sublist in cell_folders_nested for item in sublist]
cell_folders

In [None]:
corr_file = find_matching_files(cell_folders[0])
corr_file

In [None]:
corr_data = pd.read_csv(corr_file[0])
corr_data.info()

In [None]:
sns.heatmap(corr_data[['awake pearson', 'NREM pearson']], cmap='coolwarm', center=0)