In [5]:
import sys
sys.path.append('/home/nero/AutomaticSleepScoring/Tuguldur') # change this to your local path

In [9]:
from pipeline.detect_phasic import detect_phasic, detect_phasic_v2
from pipeline.DatasetLoader import DatasetLoader
from pipeline.helper import get_metadata
from pipeline.runtime_logger import logger_setup
from pipeline.utils import get_sequences, preprocess

import math
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm.auto import tqdm
from scipy.io import loadmat

fs_cbd = 2500
fs_os = 2500
fs_rgs = 1000

targetFs = 500
n_down_cbd = fs_cbd/targetFs
n_down_rgs = fs_rgs/targetFs
n_down_os = fs_os/targetFs

logger = logger_setup()

CONFIG_DIR = "/home/nero/AutomaticSleepScoring/Tuguldur/data/dataset_loading.yaml"
DATASET_DIR = "/home/nero/datasets/preprocessed"
OUTPUT_DIR1 = "/home/nero/AutomaticSleepScoring/Tuguldur/data/analysis_output/whole_posttrial5/"
OUTPUT_DIR2 = "/home/nero/AutomaticSleepScoring/Tuguldur/data/analysis_output/segmented_posttrial5/"

def half_round_up(n):
    if n - math.floor(n) < 0.5:
        return math.floor(n)
    else:
        return math.ceil(n)

compressed_datasets = list(Path(DATASET_DIR).glob('*.npz'))

Datasets = DatasetLoader(CONFIG_DIR)
mapped_datasets = Datasets.load_datasets()

len(compressed_datasets)

def str_to_tuple(string):
    string = string.strip("()")
    parts = string.split(",")
    return tuple(map(int, parts))

def load_data(fname):
    loaded_data = np.load(fname)
    loaded_dict = {str_to_tuple(key): loaded_data[key] for key in loaded_data.files}
    return loaded_dict

In [10]:
from pipeline.utils import get_rem_epochs
states_fname, hpc_fname, _ = mapped_datasets['Rat1_SD1_OD_4_posttrial5']

# Load the LFP data
lfpHPC = loadmat(hpc_fname)['HPC'].flatten()
# Load the states
hypno = loadmat(states_fname)['states'].flatten()

rem_epochs = get_rem_epochs(lfpHPC, hypno, fs=2500)
for rem_idx in rem_epochs:
    print(f"{rem_idx[0], rem_idx[1]} : {rem_idx[1]-rem_idx[0]} ----> {rem_epochs[rem_idx].shape[0]//2500}")

(1194, 1435) : 241 ----> 242
(2460, 2528) : 68 ----> 69
(2552, 2631) : 79 ----> 80
(2676, 2756) : 80 ----> 81
(3266, 3313) : 47 ----> 48
(4168, 4454) : 286 ----> 287
(5090, 5125) : 35 ----> 36
(5886, 6053) : 167 ----> 168
(7379, 7428) : 49 ----> 50
(7850, 7866) : 16 ----> 17
(8002, 8034) : 32 ----> 33
(8628, 8732) : 104 ----> 105
(10204, 10485) : 281 ----> 282


In [11]:
sleep_dict = {
    1:"W",
    3:"N",
    4:"I",
    5:"R"
    }

phasic_tonic_dict = {
    1:"W",
    3:"N",
    4:"I",
    6:"T",
    7:"P"
}

def array_to_string(array, mapping_dict):
    for e in np.unique(array):
        if e not in mapping_dict:
            mapping_dict[e] = "_"
    result = ""
    for x in array:
        result += mapping_dict[x]
    return result

# Whole post-trial 5

In [12]:
container = []

with tqdm(mapped_datasets) as mapped_tqdm:
    for name in mapped_tqdm:
        metadata = get_metadata(name)
        mapped_tqdm.set_postfix_str(name)
        states_fname, hpc_fname, _ = mapped_datasets[name]
        logger.debug("Loading: {0}".format(name))

        if metadata["treatment"] == 0 or metadata["treatment"] == 1:
            n_down = n_down_cbd
        elif metadata["treatment"] == 2 or metadata["treatment"] == 3:
            n_down = n_down_rgs
        elif metadata["treatment"] == 4:
            n_down = n_down_os
        
        # Load the LFP data
        lfpHPC = loadmat(hpc_fname)['HPC'].flatten()
        # Load the sleep states
        hypno = loadmat(states_fname)['states'].flatten()

        metadata["string_rem"] = array_to_string(np.nan_to_num(hypno), sleep_dict)
        metadata["string_phasic_tonic"] = ""

        # Skip if no REM epoch is detected
        if(not (np.any(hypno == 5))):
            logger.debug("No REM detected. Skipping.")
            continue
        elif(np.sum(np.diff(get_sequences(np.where(hypno == 5)[0]))) < 10):
            logger.debug("No REM longer than 10s. Skipping.")
            continue
        
        # Detect phasic intervals
        lfpHPC_down = preprocess(lfpHPC, n_down)
        phREM = detect_phasic(lfpHPC_down, hypno, targetFs)

        # Classify each REM time window as tonic or phasic event
        for rem_idx in phREM:
            rem_start, rem_end = rem_idx[0], rem_idx[1]
            phasic_idx = phREM[rem_idx]
            
            # Initialize the REM epoch as tonic states (6)
            hypno[rem_start:(rem_end+1)] = 6

            if phasic_idx:
                for s, e in phasic_idx:
                    # Round up the phasic timestamp if its fractional part is greater than 0.5
                    ph_start, ph_end = half_round_up(s/targetFs), half_round_up(e/targetFs)
                    
                    # Fill the phasic states as 1
                    hypno[ph_start:ph_end] = 7
            
        metadata["string_phasic_tonic"] = array_to_string(np.nan_to_num(hypno), phasic_tonic_dict)
        container.append(metadata)

df = pd.DataFrame(container)

  0%|          | 0/539 [00:00<?, ?it/s]

In [13]:
df.to_csv(OUTPUT_DIR1+"string_analysis_hypno.csv")

# Segmented post-trial 5

In [14]:
def partition_string(s, part_size=2700):
    parts = [s[i*part_size:(i+1)*part_size] for i in range(4)]
    return parts

In [15]:
df_seg = df.copy()
df_seg.head()

Unnamed: 0,rat_id,study_day,condition,treatment,trial_num,string_rem,string_phasic_tonic
0,5,8,HC,0,3,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...
1,5,8,HC,0,2,WWWWWWWWWWWWWWWWWWWWWWWWWWWWNNNNNNNNNNNNNNNNNN...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWNNNNNNNNNNNNNNNNNN...
2,5,8,HC,0,4,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...
3,5,8,HC,0,5,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...
4,5,16,OR,1,5,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...,WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW...


In [None]:
new_df = []
for i, row in df_seg.iterrows():
    if row['trial_num'] == '5':
        #print(len(row['string_rem']))
        parts_rem = partition_string(row['string_rem'])
        parts_phrem = partition_string(row['string_phasic_tonic'])
        for j, string_rem in enumerate(parts_rem):
            row['trial_num'] = '5.' + str(j+1)
            row['string_rem'] = string_rem
            row['string_phasic_tonic'] = parts_phrem[j]
            new_df.append(pd.DataFrame([row]))
    else:
        new_df.append(pd.DataFrame([row]))
#df2 = pd.concat(new_df)

In [17]:
new_df2=pd.concat(new_df, join='inner')

In [19]:
new_df2.to_csv(OUTPUT_DIR2+'string_analysis_hypno.csv')

# Latency

In [20]:
df_seg = pd.read_csv(OUTPUT_DIR2+"string_analysis_hypno.csv", index_col=0)
df_whole = pd.read_csv(OUTPUT_DIR1+"string_analysis_hypno.csv", index_col=0)

In [21]:
def compute_latency(string):
    latencies = {}
    if ("P" in string) or ("T" in string):
        for state in ["W", "N", "I", "P", "T"]:
            latencies[state] = string.find(state)
    else:
        for state in ["W", "N", "I", "R"]:
            latencies[state] = string.find(state)
    return latencies

In [22]:
container = []
for i, row in df_seg.iterrows():
    latencies = compute_latency(row['string_rem'])
    for state in latencies:
        row[state] = latencies[state]
    row['P'] = row['string_phasic_tonic'].find("P")
    row['T'] = row['string_phasic_tonic'].find("T")
    container.append(row)
lat_df = pd.DataFrame(container)
lat_df.to_csv(OUTPUT_DIR2+"string_analysis_latency.csv")

In [23]:
container = []
for i, row in df_whole.iterrows():
    latencies = compute_latency(row['string_rem'])
    for state in latencies:
        row[state] = latencies[state]
    row['P'] = row['string_phasic_tonic'].find("P")
    row['T'] = row['string_phasic_tonic'].find("T")
    container.append(row)
lat_df = pd.DataFrame(container)
lat_df.to_csv(OUTPUT_DIR1+"string_analysis_latency.csv")