In [3]:
from phasic_tonic.detect_phasic import detect_phasic_v2
from phasic_tonic.helper import get_metadata
from phasic_tonic.runtime_logger import logger_setup
from phasic_tonic.utils import get_sequences, get_segments

import numpy as np
import pandas as pd
import pynapple as nap

from pathlib import Path
from tqdm.auto import tqdm
from scipy.io import loadmat

fs = 500

logger = logger_setup()

CONFIG_DIR = "/home/nero/phasic_tonic/data/dataset_loading.yaml"
DATASET_DIR = "/home/nero/datasets/preprocessed"
OUTPUT_DIR1 = "/home/nero/phasic_tonic/data/analysis_output/whole_posttrial5/"
OUTPUT_DIR2 = "/home/nero/phasic_tonic/data/analysis_output/segmented_posttrial5/"

def str_to_tuple(string):
    string = string.strip("()")
    parts = string.split(",")
    return tuple(map(int, parts))

def load_data(fname):
    loaded_data = np.load(fname)
    loaded_dict = {str_to_tuple(key): loaded_data[key] for key in loaded_data.files}
    return loaded_dict

compressed_datasets = list(Path(DATASET_DIR).glob('*.npz'))

len(compressed_datasets)

699

In [16]:
def compute_stats(rem_interval, phasic_interval, tonic_interval) -> pd.DataFrame:
        """
        Compute statistics for phasic and tonic REM periods.

        Returns
        -------
        pd.DataFrame
            DataFrame containing computed statistics.
        """
        stats = {
            "rem_start": [], "rem_end": [], "state": [],
            "num_bouts": [], "mean_duration": [],
            "total_duration": [], "percent_of_rem": []
        }

        for rem_idx in rem_interval:
            phasic = rem_idx.intersect(phasic_interval)
            tonic = rem_idx.intersect(tonic_interval)

            for state, intervals in [("phasic", phasic), ("tonic", tonic)]:
                _compute_interval_stats(rem_idx, state, intervals, stats)

        return pd.DataFrame(stats)

def _compute_interval_stats(rem_idx, state, intervals, stats):
    """
    Compute statistics for a given interval.
    """
    num_bouts = len(intervals)
    durations = np.diff(intervals, 1)
    total_duration = np.sum(durations)
    percent_of_rem = total_duration / rem_idx.tot_length()

    stats["rem_start"].append(int(rem_idx["start"].item()))
    stats["rem_end"].append(int(rem_idx["end"].item()))
    stats["state"].append(state)
    stats["num_bouts"].append(num_bouts)
    stats["mean_duration"].append(durations.mean())
    stats["total_duration"].append(total_duration)
    stats["percent_of_rem"].append(percent_of_rem)

In [31]:
per_trial_stats = []
per_epochs_stats = {
    'rat_id': [],
    'study_day': [],
    'condition': [],
    'treatment': [],
    'trial_num': [],
    'epoch_id' : [],
    'state' : [],
    'duration' : []
}

with tqdm(compressed_datasets) as datasets:
    for fname in datasets:
        metaname = str(fname.stem)

        datasets.set_postfix_str(metaname)
        metadata = get_metadata(metaname)

        rem_epochs = load_data(fname)

        if not rem_epochs:
            continue
        
        phrem = detect_phasic_v2(rem_epochs, fs)
        
        start, end = [], []
        rem_start, rem_end = [], []
        for rem_idx in phrem:
            rem_start.append(rem_idx[0])
            rem_end.append(rem_idx[1])

            for s, e in phrem[rem_idx]:
                start.append(s / fs)
                end.append(e / fs)
        
        rem_interval = nap.IntervalSet(rem_start, rem_end)
        phasic_interval = nap.IntervalSet(start, end)
        tonic_interval = rem_interval.set_diff(phasic_interval)

        df = compute_stats(rem_interval, phasic_interval, tonic_interval)

        if metadata['trial_num'] in ['5-0', '5-1', '5-2', '5-3']:
            a, b = metadata['trial_num'].split('-')
            metadata['trial_num'] = a + '.' + str(int(b)+1)

        for key in reversed(metadata):
            df.insert(0, column=key, value=metadata[key])
        
        per_trial_stats.append(df)
        
        # Save duration bouts
        for state, interval in [("phasic", phasic_interval), ("tonic", tonic_interval)]:
            for i, duration in enumerate(np.diff(interval, 1)):
                for condition in metadata.keys():
                    per_epochs_stats[condition].append(metadata[condition])
                per_epochs_stats['state'].append(state)
                per_epochs_stats['epoch_id'].append(i)
                per_epochs_stats['duration'].append(duration.item())

df_epochs = pd.DataFrame(per_epochs_stats)
df_trial = pd.concat(per_trial_stats, axis=0)

  0%|          | 0/699 [00:00<?, ?it/s]

  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.mean())
  ret = ret.dtype.type(ret / rcount)
  stats["mean_duration"].append(durations.m

In [32]:
df_epochs

Unnamed: 0,rat_id,study_day,condition,treatment,trial_num,epoch_id,state,duration
0,2,2,OR,2,5.4,0,phasic,0.992
1,2,2,OR,2,5.4,1,phasic,1.998
2,2,2,OR,2,5.4,2,phasic,2.048
3,2,2,OR,2,5.4,3,phasic,1.586
4,2,2,OR,2,5.4,0,tonic,36.996
...,...,...,...,...,...,...,...,...
8513,6,4,CON,4,3,0,phasic,3.518
8514,6,4,CON,4,3,1,phasic,3.118
8515,6,4,CON,4,3,0,tonic,5.458
8516,6,4,CON,4,3,1,tonic,23.352


In [33]:
df_trial

Unnamed: 0,rat_id,study_day,condition,treatment,trial_num,rem_start,rem_end,state,num_bouts,mean_duration,total_duration,percent_of_rem
0,2,2,OR,2,5.4,9121,9168,phasic,1,0.992000,0.992,0.021106
1,2,2,OR,2,5.4,9121,9168,tonic,2,23.004000,46.008,0.978894
2,2,2,OR,2,5.4,9430,9468,phasic,0,,0.000,0.000000
3,2,2,OR,2,5.4,9430,9468,tonic,1,38.000000,38.000,1.000000
4,2,2,OR,2,5.4,9738,9820,phasic,3,1.877333,5.632,0.068683
...,...,...,...,...,...,...,...,...,...,...,...,...
7,1,1,OD,4,5.3,8002,8034,tonic,2,15.376000,30.752,0.961000
0,5,3,OD,0,5.1,1866,1943,phasic,2,1.567000,3.134,0.040701
1,5,3,OD,0,5.1,1866,1943,tonic,3,24.622000,73.866,0.959299
0,6,4,CON,4,3,2305,2443,phasic,2,3.318000,6.636,0.048087


In [34]:
df_trial.to_csv(OUTPUT_DIR2+"phasic_tonic_per_rem_epochs.csv", index=False)
df_epochs.to_csv(OUTPUT_DIR2+"phasic_tonic_per_durations.csv", index=False)