In [1]:
from phasic_tonic.detect_phasic import detect_phasic, detect_phasic_v2
from phasic_tonic.DatasetLoader import DatasetLoader
from phasic_tonic.helper import get_metadata
from phasic_tonic.runtime_logger import logger_setup
from phasic_tonic.utils import get_sequences, load_data, preprocess

import math
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm.auto import tqdm
from scipy.io import loadmat

fs_cbd = 2500
fs_os = 2500
fs_rgs = 1000

targetFs = 500
n_down_cbd = fs_cbd/targetFs
n_down_rgs = fs_rgs/targetFs
n_down_os = fs_os/targetFs

logger = logger_setup()

CONFIG_DIR = "/home/nero/phasic_tonic/data/dataset_loading.yaml"
DATASET_DIR = "/home/nero/datasets/preprocessed"
OUTPUT_DIR1 = "/home/nero/phasic_tonic/data/analysis_output/whole_posttrial5/"
OUTPUT_DIR2 = "/home/nero/phasic_tonic/data/analysis_output/segmented_posttrial5/"

def half_round_up(n):
    if n - math.floor(n) < 0.5:
        return math.floor(n)
    else:
        return math.ceil(n)

compressed_datasets = list(Path(DATASET_DIR).glob('*.npz'))

Datasets = DatasetLoader(CONFIG_DIR)
mapped_datasets = Datasets.load_datasets()

len(compressed_datasets)

699

In [19]:
sleep_dict = {
    1:"W",
    3:"N",
    4:"I",
    5:"R"
    }

phasic_tonic_dict = {
    1:"W",
    3:"N",
    4:"I",
    6:"T",
    7:"P"
}

def array_to_string(array, mapping_dict):
    for e in np.unique(array):
        if e not in mapping_dict:
            mapping_dict[e] = "_"
    result = ""
    for x in array:
        result += mapping_dict[x]
    return result

# Whole post-trial 5

In [38]:
container = []

with tqdm(mapped_datasets) as mapped_tqdm:
    for name in mapped_tqdm:
        metadata = get_metadata(name)
        mapped_tqdm.set_postfix_str(name)
        states_fname, hpc_fname, _ = mapped_datasets[name]
        logger.debug("Loading: {0}".format(name))

        if metadata["treatment"] == 0 or metadata["treatment"] == 1:
            n_down = n_down_cbd
        elif metadata["treatment"] == 2 or metadata["treatment"] == 3:
            n_down = n_down_rgs
        elif metadata["treatment"] == 4:
            n_down = n_down_os
        
        # Load the LFP data
        lfpHPC = loadmat(hpc_fname)['HPC'].flatten()
        # Load the sleep states
        hypno = loadmat(states_fname)['states'].flatten()

        metadata["string_rem"] = array_to_string(np.nan_to_num(hypno), sleep_dict)
        metadata["string_phasic_tonic"] = ""

        # Skip if no REM epoch is detected
        if(not (np.any(hypno == 5))):
            logger.debug("No REM detected. Skipping.")
            continue
        elif(np.sum(np.diff(get_sequences(np.where(hypno == 5)[0]))) < 10):
            logger.debug("No REM longer than 10s. Skipping.")
            continue
        
        # Detect phasic intervals
        lfpHPC_down = preprocess(lfpHPC, n_down)
        phREM = detect_phasic(lfpHPC_down, hypno, targetFs)

        # Classify each REM time window as tonic or phasic event
        for rem_idx in phREM:
            rem_start, rem_end = rem_idx[0], rem_idx[1]
            phasic_idx = phREM[rem_idx]
            
            # Initialize the REM epoch as tonic states (6)
            hypno[rem_start:(rem_end+1)] = 6

            if phasic_idx:
                for s, e in phasic_idx:
                    # Round up the phasic timestamp if its fractional part is greater than 0.5
                    ph_start, ph_end = half_round_up(s/targetFs), half_round_up(e/targetFs)
                    
                    # Fill the phasic states as 1
                    hypno[ph_start:ph_end] = 7
            
        metadata["string_phasic_tonic"] = array_to_string(np.nan_to_num(hypno), phasic_tonic_dict)
        container.append(metadata)

df = pd.DataFrame(container)

  0%|          | 0/539 [00:00<?, ?it/s]

In [40]:
df.to_csv("string_analysis_hypno.csv")

# Segmented post-trial 5

In [72]:
def partition_string(s, part_size=2700):
    parts = [s[i*part_size:(i+1)*part_size] for i in range(4)]
    return parts

In [126]:
df_seg = df.copy()
df_seg.head()

Unnamed: 0,rat_id,study_day,condition,treatment,trial_num,string_rem,string_phasic_tonic
0,5,8,HC,0,3,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...
1,5,8,HC,0,2,WWWWWWWWWWWWWWWWWWWWWWWWWWWWNNNNNNNNNNNNNNNNNN...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWNNNNNNNNNNNNNNNNNN...
2,5,8,HC,0,4,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...
3,5,8,HC,0,5,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...
4,5,16,OR,1,5,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...


In [121]:
new_df = []
for i, row in df_seg.iterrows():
    if row['trial_num'] == '5':
        #print(len(row['string_rem']))
        parts_rem = partition_string(row['string_rem'])
        parts_phrem = partition_string(row['string_phasic_tonic'])
        for j, string_rem in enumerate(parts_rem):
            row['trial_num'] = '5.' + str(j+1)
            row['string_rem'] = string_rem
            row['string_phasic_tonic'] = parts_phrem[j]
            new_df.append(pd.DataFrame([row]))
    else:
        new_df.append(pd.DataFrame([row]))
# df2 = pd.concat(new_df)

10914
10938
9481
10801
10800
10807
10800
10800
10800
10800
10800
10800
10804
10803
10801
10800
10801
10802
10802
10800
10801
10820
10799
10800
10801
10800
10815
10800
10801
10800
10800
10800
10801
10799
10800
10800
10825
10799
10818
10789
10800
10796
10801
10805
9459
10797
10547
10800
10800
10799
10080
10799
10799
10800
10012
10800
10815
10799
10800
10800
10799
10758
10800
10471
10813
10827
10749
10168
10284
10195
10419
10799
10801
10799
10217
10838
10804
10805
10800
10800
10800
10762
10781
9617
10800
10834
10830
10913
10542
10928
10807
10243
10802
10914
10817
10806
10813
10812
10739
10800
10800
10349
10832


In [133]:
new_df2=pd.concat(new_df, join='inner')

In [137]:
new_df2.to_csv('string_analysis_hypno.csv')

# Latency

In [138]:
df_seg = pd.read_csv("/home/nero/phasic_tonic/data/analysis_output/segmented_posttrial5/string_analysis_hypno.csv", index_col=0)
df_whole = pd.read_csv("/home/nero/phasic_tonic/data/analysis_output/whole_posttrial5/string_analysis_hypno.csv", index_col=0)

In [178]:
def compute_latency(string):
    latencies = {}
    if ("P" in string) or ("T" in string):
        for state in ["W", "N", "I", "P", "T"]:
            latencies[state] = string.find(state)
    else:
        for state in ["W", "N", "I", "R"]:
            latencies[state] = string.find(state)
    return latencies

In [190]:
container = []
for i, row in df_seg.iterrows():
    latencies = compute_latency(row['string_rem'])
    for state in latencies:
        row[state] = latencies[state]
    row['P'] = row['string_phasic_tonic'].find("P")
    row['T'] = row['string_phasic_tonic'].find("T")
    container.append(row)
lat_df = pd.DataFrame(container)
lat_df.to_csv("string_analysis_latency.csv")

In [192]:
container = []
for i, row in df_whole.iterrows():
    latencies = compute_latency(row['string_rem'])
    for state in latencies:
        row[state] = latencies[state]
    row['P'] = row['string_phasic_tonic'].find("P")
    row['T'] = row['string_phasic_tonic'].find("T")
    container.append(row)
lat_df = pd.DataFrame(container)
lat_df.to_csv("string_analysis_latency.csv")