# MozzieDrome analysis
### Linhan Dong, Duvall Lab

### Generate files needed for further analysis and behavior classification

In [None]:
import os
import re
import h5py
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from natsort import natsorted
import pandas as pd

def getfile(file_path):
    with h5py.File(file_path, "r") as f:
        dset_names = list(f.keys())
        locations = f["tracks"][:].T
        node_names = [n.decode() for n in f["node_names"][:]]
        locations = fill_missing(locations)
        BODY_INDEX = 0
        body_loc = locations[:, BODY_INDEX, :, :]
        return body_loc

def fill_missing(Y, kind="linear"):
    initial_shape = Y.shape
    Y = Y.reshape((initial_shape[0], -1))
    for i in range(Y.shape[-1]):
        y = Y[:, i]
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])
        Y[:, i] = y
    Y = Y.reshape(initial_shape)
    return Y

def individual_velocity(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        for k in range(1, len(filter_input)):
            x1, y1 = filter_input[k - 1]  # Previous coordinates
            x2, y2 = filter_input[k]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        zero_series = pd.Series([0] * delay_frame)
        corrected_distances = pd.concat([zero_series, distances], ignore_index=True)
        all_individual_distances[column_name] = corrected_distances
        window_size = 300
        corrected_distances_array = corrected_distances.to_numpy()
        sum_windows = len(corrected_distances_array) // window_size
        distances_collapsed = np.array([np.sum(corrected_distances_array[i * window_size:(i + 1) * window_size]) for i in range(sum_windows)])
        distances_collapsed = distances_collapsed[1:360]
        all_collapsed_individual_distances[column_name] = distances_collapsed

def vector_length(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        window_size = 300
        start_frame = 300 - delay_frame
        vector_windows = (len(filter_input) + delay_frame - 241) // window_size
        for k in range(0, vector_windows):
            x1, y1 = filter_input[start_frame + k * window_size]  # Previous coordinates
            x2, y2 = filter_input[start_frame + (k+1) * window_size]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        all_vector_lengths[column_name] = distances

def max_velocity(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        for k in range(1, len(filter_input)):
            x1, y1 = filter_input[k - 1]  # Previous coordinates
            x2, y2 = filter_input[k]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        zero_series = pd.Series([0] * delay_frame)
        corrected_distances = pd.concat([zero_series, distances], ignore_index=True)
        all_individual_distances[column_name] = corrected_distances
        window_size = 300
        corrected_distances_array = corrected_distances.to_numpy()
        max_windows = len(corrected_distances_array) // window_size
        max_velocity_array = np.array([np.max(corrected_distances_array[i * window_size:(i + 1) * window_size]) for i in range(max_windows)])
        max_velocity = max_velocity_array[1:360]
        all_max_velocity[column_name] = max_velocity

folder_path = '/Users/donglinhan/Desktop/SLEAP/FinalH5/WT-DD2'
h5_files = os.listdir(folder_path)
h5_files = natsorted(h5_files)
all_individual_distances = pd.DataFrame()
all_collapsed_individual_distances = pd.DataFrame()
all_vector_lengths = pd.DataFrame(index = range(370))
all_max_velocity = pd.DataFrame()

for file_name in h5_files:
    if file_name.endswith('.h5'):
        file_parts = file_name.split('_')
        first_six_chars = file_parts[1][:6]
        delay = float(first_six_chars.lstrip("0"))
        print(delay)
        file_path = os.path.join(folder_path, file_name)
        print(file_name)
        individual_velocity(file_path, delay)
        vector_length(file_path, delay)
        max_velocity(file_path, delay)

all_collapsed_individual_distances.to_excel('WT_DD2_collapsed_distances_300.xlsx', index=False)
all_vector_lengths.to_excel('WT_DD2_vector_lengths_300.xlsx', index=False)
all_max_velocity.to_excel('WT_DD2_max_velocity_300.xlsx', index=False)

### Generate behavior classification

In [None]:
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

vector_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_vector_lengths_300.xlsx'
distance_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_collapsed_distances_300.xlsx'
max_velocity_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_max_velocity_300.xlsx'
distances = pd.read_excel(distance_input)
vector_lengths = pd.read_excel(vector_input)
max_velocities = pd.read_excel(max_velocity_input)
total = len(distances.columns)
all_behavior = pd.DataFrame()
for i, column_name in enumerate(distances.columns):
    behavior_list = []
    for k in range (0, 143):
        if distances[column_name][k] >= 175:
            behavior_list.append(2)
        elif vector_lengths[column_name][k] > 5 and distances[column_name][k] < 175:
            if max_velocities[column_name][k] > 4:
                behavior_list.append(2)
            else:
                behavior_list.append(1)
        else:
            behavior_list.append(0)
    behavior = np.array(behavior_list) 
    print(i)
    all_behavior[column_name] = behavior
all_behavior.to_excel('WT_DD2_all_behavior_modified_again.xlsx', index=False)

### All LD activity analysis

In [2]:
#LD acute vs sustained activity analysis
import pandas as pd
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

wt_behavior_input = '/Users/donglinhan/Desktop/SLEAP/WT_LDfull_all_behavior_modified_again.xlsx'
het_behavior_input = '/Users/donglinhan/Desktop/SLEAP/Het_LD_all_behavior_modified_again.xlsx'
hom_behavior_input = '/Users/donglinhan/Desktop/SLEAP/PDF_LDfull_all_behavior_modified_again.xlsx'
wt_behaviors_original = pd.read_excel(wt_behavior_input)
het_behaviors_original = pd.read_excel(het_behavior_input)
hom_behaviors_original = pd.read_excel(hom_behavior_input)
wt_behaviors = wt_behaviors_original.applymap(lambda x: 1 if x != 0 else 0)
het_behaviors = het_behaviors_original.applymap(lambda x: 1 if x != 0 else 0)
hom_behaviors = hom_behaviors_original.applymap(lambda x: 1 if x != 0 else 0)

wt_activity_input = '/Users/donglinhan/Desktop/SLEAP/WT_LDfull_all_collapsed_distances_300.xlsx'
het_activity_input = '/Users/donglinhan/Desktop/SLEAP/HET_LD_all_collapsed_distances_300.xlsx'
hom_activity_input = '/Users/donglinhan/Desktop/SLEAP/PDF_LDfull_all_collapsed_distances_300.xlsx'
wt_activities = pd.read_excel(wt_activity_input)
het_activities = pd.read_excel(het_activity_input)
hom_activities = pd.read_excel(hom_activity_input)

def calculate_end_index(activity_series):
    # Step 1: Identify the initiation of activity
    start_index = next((i for i, x in enumerate(activity_series) if x != 0), None)
    if start_index is None:
        return 0  # No activity in the series
    
    # Step 2: Identify the end of activity
    end_index = None
    for i in range(len(activity_series) - 48):
        if activity_series[i] != 0 and all(x == 0 for x in activity_series[i+1:i+49]):
            end_index = i
            break
    
    if end_index is None:
        end_index = next((i for i in range(len(activity_series) - 1, -1, -1) if activity_series[i] != 0), start_index)
    
    return end_index

def calculate_total_activity(activity_series):
    # Step 1: Identify the initiation of activity
    start_index = next((i for i, x in enumerate(activity_series) if x != 0), None)
    if start_index is None:
        return 0  # No activity in the series
    
    # Step 2: Identify the end of activity
    end_index = None
    for i in range(len(activity_series) - 48):
        if activity_series[i] != 0 and all(x == 0 for x in activity_series[i+1:i+49]):
            end_index = i
            break
    
    if end_index is None:
        end_index = next((i for i in range(len(activity_series) - 1, -1, -1) if activity_series[i] != 0), start_index)
    
    # Step 3: Sum the activity
    total_activity = sum(activity_series[start_index:end_index + 1])
    
    return total_activity

def calculate_end_index(activity_series):
    # Step 1: Identify the initiation of activity
    start_index = next((i for i, x in enumerate(activity_series) if x != 0), None)
    if start_index is None:
        return 0  # No activity in the series
    
    # Step 2: Identify the end of activity
    end_index = None
    for i in range(len(activity_series) - 48):
        if activity_series[i] != 0 and all(x == 0 for x in activity_series[i+1:i+49]):
            end_index = i
            break
    
    if end_index is None:
        end_index = next((i for i in range(len(activity_series) - 1, -1, -1) if activity_series[i] != 0), start_index)
    
    return end_index

def calculate_total_activity(activity_series):
    # Step 1: Identify the initiation of activity
    start_index = next((i for i, x in enumerate(activity_series) if x != 0), None)
    if start_index is None:
        return 0  # No activity in the series
    
    # Step 2: Identify the end of activity
    end_index = None
    for i in range(len(activity_series) - 48):
        if activity_series[i] != 0 and all(x == 0 for x in activity_series[i+1:i+49]):
            end_index = i
            break
    
    if end_index is None:
        end_index = next((i for i in range(len(activity_series) - 1, -1, -1) if activity_series[i] != 0), start_index)
    
    # Step 3: Sum the activity
    total_activity = sum(activity_series[start_index:end_index + 1])
    
    return total_activity

def longest_activity_bout(activity_series):
    longest_bout = 0
    current_bout = 0
    
    for activity in activity_series:
        if activity == 1:
            current_bout += 1
        else:
            if current_bout > longest_bout:
                longest_bout = current_bout
            current_bout = 0
    
    # Check the last bout
    if current_bout > longest_bout:
        longest_bout = current_bout
    
    return longest_bout

def average_activity_bout_length(activity_series):
    bout_lengths = []
    current_bout = 0
    
    for activity in activity_series:
        if activity == 1:
            current_bout += 1
        else:
            if current_bout > 0:
                bout_lengths.append(current_bout)
            current_bout = 0
    
    # Check the last bout
    if current_bout > 0:
        bout_lengths.append(current_bout)
    
    if bout_lengths:
        average_length = sum(bout_lengths) / len(bout_lengths)
    else:
        average_length = 0
        
    return average_length


WT_ZT_timepoints = {
    '0': range(0, 70),   
    '3': range(70, 150),  
    '6': range(150, 220), 
    '9': range(220, 300),
    '12': range(300, 380),
    '15': range(380, 460),
    '18': range(460, 530),
    '21': range(530, 600),
}

HET_ZT_timepoints = {
    '0': range(0, 60),   
    '3': range(60, 120),  
    '6': range(120, 180), 
    '9': range(180, 240),
    '12': range(240, 300),
    '15': range(300, 360),
    '18': range(360, 420),
    '21': range(420, 480),
}

HOM_ZT_timepoints = {
    '0': range(0, 58),   
    '3': range(58, 116),  
    '6': range(116, 174), 
    '9': range(174, 232),
    '12': range(232, 290),
    '15': range(290, 347),
    '18': range(347, 404),
    '21': range(404, 462),
}

wt_total_sustained_activity_2_6 = pd.DataFrame(index = range(100)) 
wt_total_sustained_activity_6_10 = pd.DataFrame(index = range(100)) 
wt_total_sustained_time = pd.DataFrame(index = range(100)) 
wt_total_longest_bout = pd.DataFrame(index = range(100)) 
wt_total_acute_activity = pd.DataFrame(index = range(100)) 
wt_total_average_bout = pd.DataFrame(index = range(100)) 
wt_total_walk = pd.DataFrame(index = range(100)) 
wt_total_flight = pd.DataFrame(index = range(100)) 
wt_total_flight_ratio = pd.DataFrame(index = range(100)) 
het_total_sustained_activity = pd.DataFrame(index = range(100)) 
het_total_sustained_activity_4 = pd.DataFrame(index = range(100)) 
het_total_sustained_time = pd.DataFrame(index = range(100)) 
het_total_longest_bout = pd.DataFrame(index = range(100)) 
het_total_acute_activity = pd.DataFrame(index = range(100)) 
het_total_average_bout = pd.DataFrame(index = range(100)) 
het_total_walk = pd.DataFrame(index = range(100)) 
het_total_flight = pd.DataFrame(index = range(100)) 
het_total_flight_ratio = pd.DataFrame(index = range(100)) 
hom_total_sustained_activity = pd.DataFrame(index = range(100)) 
hom_total_sustained_activity_4 = pd.DataFrame(index = range(100)) 
hom_total_sustained_time = pd.DataFrame(index = range(100)) 
hom_total_longest_bout = pd.DataFrame(index = range(100)) 
hom_total_acute_activity = pd.DataFrame(index = range(100)) 
hom_total_average_bout = pd.DataFrame(index = range(100)) 
hom_total_walk = pd.DataFrame(index = range(100)) 
hom_total_flight = pd.DataFrame(index = range(100)) 
hom_total_flight_ratio = pd.DataFrame(index = range(100)) 

for ZT, cols in WT_ZT_timepoints.items():
    wt_behavior_data = wt_behaviors.iloc[:, cols]
    wt_activity_data = wt_activities.iloc[:,cols]
    wt_behavior_type = wt_behaviors_original.iloc[:, cols]
    wt_walk = []
    wt_flight = []
    wt_flight_ratio = []
    wt_sustained_activity_2_6 = []
    wt_sustained_activity_6_10 = []
    wt_sustained_time = []
    wt_longest_bout = []
    wt_acute_activity = []
    wt_average_bout = []
    for i in wt_behavior_data:
        sustained_index = calculate_end_index(pd.Series(wt_behavior_data[i]))
        if wt_behavior_data[i][23:47].sum() > 0 and wt_behavior_data[i][0:23].sum() == 0: 
            sustained_time = calculate_total_activity(pd.Series(wt_behavior_data[i]))
            sustained_activity_2_6 = wt_activity_data[i][47:95].sum() 
            sustained_activity_6_10 = wt_activity_data[i][95:143].sum() 
            longest_bout = longest_activity_bout(pd.Series(wt_behavior_data[i]))
            average_bout = average_activity_bout_length(pd.Series(wt_behavior_data[i]))
            acute_activity = wt_activity_data[i][23:47].sum() 
            walk = wt_behavior_type[i][23:143].value_counts().get(1, 0)
            flight = wt_behavior_type[i][23:143].value_counts().get(2, 0)
            flight_ratio = flight/(walk + flight)
        elif wt_behavior_data[i][23:47].sum() == 0 and wt_behavior_data[i][0:23].sum() == 0: 
            sustained_time = None
            sustained_activity_2_6 = None
            sustained_activity_6_10 = None
            walk = wt_behavior_type[i][23:143].value_counts().get(1, 0)
            flight = wt_behavior_type[i][23:143].value_counts().get(2, 0)
            longest_bout = None
            average_bout = None
            walk = None
            flight = None
            flight_ratio = None
            acute_activity = wt_activity_data[i][23:47].sum() 
        else:
            sustained_time = None
            sustained_activity_2_6 = None
            sustained_activity_6_10 = None
            longest_bout = None
            average_bout = None
            walk = None
            flight = None
            flight_ratio = None
            acute_activity = None
        wt_sustained_time.append(sustained_time)
        wt_sustained_activity_2_6.append(sustained_activity_2_6)
        wt_sustained_activity_6_10.append(sustained_activity_6_10)
        wt_longest_bout.append(longest_bout)
        wt_average_bout.append(average_bout)
        wt_acute_activity.append(acute_activity)
        wt_walk.append(walk)
        wt_flight.append(flight)
        wt_flight_ratio.append(flight_ratio)
    wt_total_sustained_activity_2_6[ZT] = pd.Series(wt_sustained_activity_2_6)
    wt_total_sustained_activity_6_10[ZT] = pd.Series(wt_sustained_activity_6_10)
    wt_total_sustained_time[ZT] = pd.Series(wt_sustained_time)
    wt_total_longest_bout[ZT] = pd.Series(wt_longest_bout)
    wt_total_average_bout[ZT] = pd.Series(wt_average_bout)
    wt_total_acute_activity[ZT] = pd.Series(wt_acute_activity)
    wt_total_walk[ZT] = pd.Series(wt_walk)
    wt_total_flight[ZT] = pd.Series(wt_flight)
    wt_total_flight_ratio[ZT] = pd.Series(wt_flight_ratio)

for ZT, cols in HET_ZT_timepoints.items():
    het_behavior_data = het_behaviors.iloc[:, cols]
    het_activity_data = het_activities.iloc[:,cols]
    het_behavior_type = het_behaviors_original.iloc[:, cols]
    het_walk = []
    het_flight = []
    het_sustained_activity = []
    het_sustained_activity_4 = []
    het_sustained_time = []
    het_longest_bout = []
    het_acute_activity = []
    het_average_bout = []
    het_flight_ratio = []
    for i in het_behavior_data:
        sustained_index = calculate_end_index(pd.Series(het_behavior_data[i]))
        if het_behavior_data[i][23:47].sum() > 0 and het_behavior_data[i][0:23].sum() == 0: 
            sustained_time = calculate_total_activity(pd.Series(het_behavior_data[i]))
            sustained_activity = het_activity_data[i][47:sustained_index+1].sum() 
            sustained_activity_4 = het_activity_data[i][95:143].sum() 
            longest_bout = longest_activity_bout(pd.Series(het_behavior_data[i]))
            average_bout = average_activity_bout_length(pd.Series(het_behavior_data[i]))
            acute_activity = het_activity_data[i][23:47].sum() 
            walk = het_behavior_type[i][23:sustained_index+1].value_counts().get(1, 0)
            flight = het_behavior_type[i][23:sustained_index+1].value_counts().get(2, 0)
            flight_ratio = flight/(walk + flight)
        elif het_behavior_data[i][23:47].sum() == 0 and het_behavior_data[i][0:23].sum() == 0: 
            sustained_time = None
            sustained_activity = None
            sustained_activity_4 = None
            longest_bout = None
            average_bout = None
            walk = None
            flight = None
            flight_ratio = None
            acute_activity = het_activity_data[i][23:47].sum() 
        else:
            sustained_time = None
            sustained_activity = None
            sustained_activity_4 = None
            longest_bout = None
            average_bout = None
            acute_activity = None
            walk = None
            flight = None
            flight_ratio = None
        het_sustained_time.append(sustained_time)
        het_sustained_activity.append(sustained_activity)
        het_sustained_activity_4.append(sustained_activity_4)
        het_longest_bout.append(longest_bout)
        het_average_bout.append(average_bout)
        het_acute_activity.append(acute_activity)
        het_walk.append(walk)
        het_flight.append(flight)
        het_flight_ratio.append(flight_ratio)
    het_total_sustained_activity[ZT] = pd.Series(het_sustained_activity)
    het_total_sustained_activity_4[ZT] = pd.Series(het_sustained_activity_4)
    het_total_sustained_time[ZT] = pd.Series(het_sustained_time)
    het_total_longest_bout[ZT] = pd.Series(het_longest_bout)
    het_total_average_bout[ZT] = pd.Series(het_average_bout)
    het_total_acute_activity[ZT] = pd.Series(het_acute_activity)
    het_total_walk[ZT] = pd.Series(het_walk)
    het_total_flight[ZT] = pd.Series(het_flight)
    het_total_flight_ratio[ZT] = pd.Series(het_flight_ratio)

for ZT, cols in HOM_ZT_timepoints.items():
    hom_behavior_data = hom_behaviors.iloc[:, cols]
    hom_activity_data = hom_activities.iloc[:,cols]
    hom_behavior_type = hom_behaviors_original.iloc[:, cols]
    hom_walk = []
    hom_flight = []
    hom_sustained_activity = []
    hom_sustained_activity_4 = []
    hom_sustained_time = []
    hom_longest_bout = []
    hom_acute_activity = []
    hom_average_bout = []
    hom_flight_ratio = []
    for i in hom_behavior_data:
        sustained_index = calculate_end_index(pd.Series(hom_behavior_data[i]))
        if hom_behavior_data[i][23:47].sum() > 0 and hom_behavior_data[i][0:23].sum() == 0: 
            sustained_time = calculate_total_activity(pd.Series(hom_behavior_data[i]))
            sustained_activity = hom_activity_data[i][47:sustained_index+1].sum() 
            sustained_activity_4 = hom_activity_data[i][95:143].sum() 
            longest_bout = longest_activity_bout(pd.Series(hom_behavior_data[i]))
            average_bout = average_activity_bout_length(pd.Series(hom_behavior_data[i]))
            acute_activity = hom_activity_data[i][23:47].sum() 
            walk = hom_behavior_type[i][23:sustained_index+1].value_counts().get(1, 0)
            flight = hom_behavior_type[i][23:sustained_index+1].value_counts().get(2, 0)
            flight_ratio = flight/(walk + flight)
        elif hom_behavior_data[i][23:47].sum() == 0 and hom_behavior_data[i][0:23].sum() == 0: 
            sustained_time = None
            sustained_activity = None
            sustained_activity_4 = None
            longest_bout = None
            average_bout = None
            walk = None
            flight = None
            flight_ratio = None
            acute_activity = hom_activity_data[i][23:47].sum() 
        else:  
            sustained_time = None
            sustained_activity = None
            sustained_activity_4 = None
            longest_bout = None
            average_bout = None
            acute_activity = None
            walk = None
            flight = None
            flight_ratio = None
        hom_sustained_time.append(sustained_time)
        hom_sustained_activity.append(sustained_activity)
        hom_sustained_activity_4.append(sustained_activity_4)
        hom_longest_bout.append(longest_bout)
        hom_average_bout.append(average_bout)
        hom_acute_activity.append(acute_activity)
        hom_walk.append(walk)
        hom_flight.append(flight)
        hom_flight_ratio.append(flight_ratio)
    hom_total_sustained_activity[ZT] = pd.Series(hom_sustained_activity)
    hom_total_sustained_activity_4[ZT] = pd.Series(hom_sustained_activity_4)
    hom_total_sustained_time[ZT] = pd.Series(hom_sustained_time)
    hom_total_longest_bout[ZT] = pd.Series(hom_longest_bout)
    hom_total_average_bout[ZT] = pd.Series(hom_average_bout)
    hom_total_acute_activity[ZT] = pd.Series(hom_acute_activity)
    hom_total_walk[ZT] = pd.Series(hom_walk)
    hom_total_flight[ZT] = pd.Series(hom_flight)
    hom_total_flight_ratio[ZT] = pd.Series(hom_flight_ratio)

wt_total_sustained_activity_2_6_output_file_path = 'persistence_analysis/wt_ld_total_sustained_activity_2_6.xlsx'
wt_total_sustained_activity_6_10_output_file_path = 'persistence_analysis/wt_ld_total_sustained_activity_6_10.xlsx'
wt_total_sustained_time_output_file_path = 'persistence_analysis/wt_ld_total_sustained_time.xlsx'
wt_total_longest_bout_output_file_path = 'persistence_analysis/wt_ld_total_longest_bout.xlsx'
wt_total_average_bout_output_file_path = 'persistence_analysis/wt_ld_total_average_bout.xlsx'
wt_total_acute_activity_output_file_path = 'persistence_analysis/wt_ld_total_acute_activity.xlsx'
wt_total_walk_output_file_path = 'persistence_analysis/wt_ld_total_walk.xlsx'
wt_total_flight_output_file_path = 'persistence_analysis/wt_ld_total_flight.xlsx'
wt_total_flight_ratio_output_file_path = 'persistence_analysis/wt_ld_total_flight_ratio.xlsx'
het_total_sustained_activity_output_file_path = 'persistence_analysis/het_ld_total_sustained_activity.xlsx'
het_total_sustained_activity_4_output_file_path = 'persistence_analysis/het_ld_total_sustained_activity_last_4_min.xlsx'
het_total_sustained_time_output_file_path = 'persistence_analysis/het_ld_total_sustained_time.xlsx'
het_total_longest_bout_output_file_path = 'persistence_analysis/het_ld_total_longest_bout.xlsx'
het_total_average_bout_output_file_path = 'persistence_analysis/het_ld_total_average_bout.xlsx'
het_total_acute_activity_output_file_path = 'persistence_analysis/het_ld_total_acute_activity.xlsx'
het_total_walk_output_file_path = 'persistence_analysis/het_ld_total_walk.xlsx'
het_total_flight_output_file_path = 'persistence_analysis/het_ld_total_flight.xlsx'
het_total_flight_ratio_output_file_path = 'persistence_analysis/het_ld_total_flight_ratio.xlsx'
hom_total_sustained_activity_output_file_path = 'persistence_analysis/hom_ld_total_sustained_activity.xlsx'
hom_total_sustained_activity_4_output_file_path = 'persistence_analysis/hom_ld_total_sustained_activity_last_4_min.xlsx'
hom_total_sustained_time_output_file_path = 'persistence_analysis/hom_ld_total_sustained_time.xlsx'
hom_total_longest_bout_output_file_path = 'persistence_analysis/hom_ld_total_longest_bout.xlsx'
hom_total_average_bout_output_file_path = 'persistence_analysis/hom_ld_total_average_bout.xlsx'
hom_total_acute_activity_output_file_path = 'persistence_analysis/hom_ld_total_acute_activity.xlsx'
hom_total_walk_output_file_path = 'persistence_analysis/hom_ld_total_walk.xlsx'
hom_total_flight_output_file_path = 'persistence_analysis/hom_ld_total_flight.xlsx'
hom_total_flight_ratio_output_file_path = 'persistence_analysis/hom_ld_total_flight_ratio.xlsx'

wt_total_sustained_activity_2_6.to_excel(wt_total_sustained_activity_2_6_output_file_path, index=False)
wt_total_sustained_activity_6_10.to_excel(wt_total_sustained_activity_6_10_output_file_path, index=False)
wt_total_sustained_time.to_excel(wt_total_sustained_time_output_file_path, index=False)
wt_total_longest_bout.to_excel(wt_total_longest_bout_output_file_path, index=False)
wt_total_average_bout.to_excel(wt_total_average_bout_output_file_path, index=False)
wt_total_acute_activity.to_excel(wt_total_acute_activity_output_file_path, index=False)
wt_total_walk.to_excel(wt_total_walk_output_file_path, index=False)
wt_total_flight.to_excel(wt_total_flight_output_file_path, index=False)
wt_total_flight_ratio.to_excel(wt_total_flight_ratio_output_file_path, index=False)
het_total_sustained_activity.to_excel(het_total_sustained_activity_output_file_path, index=False)
het_total_sustained_activity_4.to_excel(het_total_sustained_activity_4_output_file_path, index=False)
het_total_sustained_time.to_excel(het_total_sustained_time_output_file_path, index=False)
het_total_longest_bout.to_excel(het_total_longest_bout_output_file_path, index=False)
het_total_average_bout.to_excel(het_total_average_bout_output_file_path, index=False)
het_total_acute_activity.to_excel(het_total_acute_activity_output_file_path, index=False)
het_total_walk.to_excel(het_total_walk_output_file_path, index=False)
het_total_flight.to_excel(het_total_flight_output_file_path, index=False)
het_total_flight_ratio.to_excel(het_total_flight_ratio_output_file_path, index=False)
hom_total_sustained_activity.to_excel(hom_total_sustained_activity_output_file_path, index=False)
hom_total_sustained_activity_4.to_excel(hom_total_sustained_activity_4_output_file_path, index=False)
hom_total_sustained_time.to_excel(hom_total_sustained_time_output_file_path, index=False)
hom_total_longest_bout.to_excel(hom_total_longest_bout_output_file_path, index=False)
hom_total_average_bout.to_excel(hom_total_average_bout_output_file_path, index=False)
hom_total_acute_activity.to_excel(hom_total_acute_activity_output_file_path, index=False)
hom_total_walk.to_excel(hom_total_walk_output_file_path, index=False)
hom_total_flight.to_excel(hom_total_flight_output_file_path, index=False)
hom_total_flight_ratio.to_excel(hom_total_flight_ratio_output_file_path, index=False)


In [None]:
import pandas as pd
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

behavior_input = '/Users/donglinhan/Desktop/SLEAP/WT_LDfull_all_collapsed_distances_300.xlsx'
behaviors = pd.read_excel(behavior_input)
ZT_timepoints = {
    '0': range(0, 70),   
    '3': range(70, 150),  
    '6': range(150, 220), 
    '9': range(220, 300),
    '12': range(300, 380),
    '15': range(380, 460),
    '18': range(460, 530),
    '21': range(530, 600),
}

total_activity = pd.DataFrame(index = range(100)) 
pre_CO2_activity = pd.DataFrame(index = range(100)) 
post_CO2_activity = pd.DataFrame(index = range(100)) 
post_CO2_first2_activity = pd.DataFrame(index = range(100)) 
post_CO2_last8_activity = pd.DataFrame(index = range(100)) 
post_CO2_last6_activity = pd.DataFrame(index = range(100)) 
post_CO2_last4_activity = pd.DataFrame(index = range(100)) 
post_CO2_last2_activity = pd.DataFrame(index = range(100)) 

for ZT, cols in ZT_timepoints.items():
    ZT_data = behaviors.iloc[:, cols]
    total_sum = []
    pre_CO2_sum = []
    post_CO2_sum = []
    post_CO2_first2_sum = []
    post_CO2_last8_sum = []
    post_CO2_last6_sum = []
    post_CO2_last4_sum = []
    post_CO2_last2_sum = []
    for i in ZT_data:
        total_sum.append(ZT_data[i][0:144].sum()/12)
        pre_CO2_sum.append(ZT_data[i][0:23].sum()/2)
        post_CO2_sum.append(ZT_data[i][23:144].sum()/10)
        post_CO2_first2_sum.append(ZT_data[i][23:47].sum()/2)
        post_CO2_last8_sum.append(ZT_data[i][47:144].sum()/8)
        post_CO2_last6_sum.append(ZT_data[i][71:144].sum()/6)
        post_CO2_last4_sum.append(ZT_data[i][95:144].sum()/4)
        post_CO2_last2_sum.append(ZT_data[i][119:144].sum()/2)
    total_activity[ZT] = pd.Series(total_sum)
    pre_CO2_activity[ZT] = pd.Series(pre_CO2_sum)
    post_CO2_activity[ZT] = pd.Series(post_CO2_sum)
    post_CO2_first2_activity[ZT] = pd.Series(post_CO2_first2_sum)
    post_CO2_last8_activity [ZT] = pd.Series(post_CO2_last8_sum)
    post_CO2_last6_activity [ZT] = pd.Series(post_CO2_last6_sum)
    post_CO2_last4_activity [ZT] = pd.Series(post_CO2_last4_sum)
    post_CO2_last2_activity [ZT] = pd.Series(post_CO2_last2_sum)
total_activity_df = pd.DataFrame(total_activity)
pre_CO2_activity_df = pd.DataFrame(pre_CO2_activity)
post_CO2_activity_df = pd.DataFrame(post_CO2_activity)
post_CO2_first2_activity_df = pd.DataFrame(post_CO2_first2_activity)
post_CO2_last8_activity_df = pd.DataFrame(post_CO2_last8_activity)
post_CO2_last6_activity_df = pd.DataFrame(post_CO2_last6_activity)
post_CO2_last4_activity_df = pd.DataFrame(post_CO2_last4_activity)
post_CO2_last2_activity_df = pd.DataFrame(post_CO2_last2_activity)

total_activity_output_file_path = 'wt_activities/total_activity.xlsx'
pre_CO2_activity_output_file_path = 'wt_activities/pre_CO2_activity.xlsx'
post_CO2_activity_output_file_path = 'wt_activities/post_CO2_activity.xlsx'
post_CO2_first2_activity_output_file_path = 'wt_activities/post_CO2_first2_activity.xlsx'
post_CO2_last8_activity_output_file_path = 'wt_activities/post_CO2_last8_activity.xlsx'
post_CO2_last6_activity_output_file_path = 'wt_activities/post_CO2_last6_activity.xlsx'
post_CO2_last4_activity_output_file_path = 'wt_activities/post_CO2_last4_activity.xlsx'
post_CO2_last2_activity_output_file_path = 'wt_activities/post_CO2_last2_activity.xlsx'

total_activity_df.to_excel(total_activity_output_file_path, index=False)
pre_CO2_activity_df.to_excel(pre_CO2_activity_output_file_path, index=False)
post_CO2_activity_df.to_excel(post_CO2_activity_output_file_path, index=False)
post_CO2_first2_activity_df.to_excel(post_CO2_first2_activity_output_file_path, index=False)
post_CO2_last8_activity_df.to_excel(post_CO2_last8_activity_output_file_path, index=False)
post_CO2_last6_activity_df.to_excel(post_CO2_last6_activity_output_file_path, index=False)
post_CO2_last4_activity_df.to_excel(post_CO2_last4_activity_output_file_path, index=False)
post_CO2_last2_activity_df.to_excel(post_CO2_last2_activity_output_file_path, index=False)



In [None]:
import pandas as pd
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

behavior_input = '/Users/donglinhan/Desktop/SLEAP/HET_LD_all_collapsed_distances_300.xlsx'
behaviors = pd.read_excel(behavior_input)
ZT_timepoints = {
    '0': range(0, 60),   
    '3': range(60, 120),  
    '6': range(120, 180), 
    '9': range(180, 240),
    '12': range(240, 300),
    '15': range(300, 360),
    '18': range(360, 420),
    '21': range(420, 480),
}

total_activity = {}
pre_CO2_activity = {}
post_CO2_activity = {}
post_CO2_first2_activity = {}
post_CO2_last8_activity = {}
post_CO2_last6_activity = {}
post_CO2_last4_activity = {}
post_CO2_last2_activity = {}

for ZT, cols in ZT_timepoints.items():
    ZT_data = behaviors.iloc[:, cols]
    total_sum = []
    pre_CO2_sum = []
    post_CO2_sum = []
    post_CO2_first2_sum = []
    post_CO2_last8_sum = []
    post_CO2_last6_sum = []
    post_CO2_last4_sum = []
    post_CO2_last2_sum = []
    for i in ZT_data:
        total_sum.append(ZT_data[i][0:144].sum()/12)
        pre_CO2_sum.append(ZT_data[i][0:23].sum()/2)
        post_CO2_sum.append(ZT_data[i][23:144].sum()/10)
        post_CO2_first2_sum.append(ZT_data[i][23:47].sum()/2)
        post_CO2_last8_sum.append(ZT_data[i][47:144].sum()/8)
        post_CO2_last6_sum.append(ZT_data[i][71:144].sum()/6)
        post_CO2_last4_sum.append(ZT_data[i][95:144].sum()/4)
        post_CO2_last2_sum.append(ZT_data[i][119:144].sum()/2)
    total_activity[ZT] = total_sum
    pre_CO2_activity[ZT] = pre_CO2_sum
    post_CO2_activity[ZT] = post_CO2_sum
    post_CO2_first2_activity[ZT] = post_CO2_first2_sum
    post_CO2_last8_activity [ZT] = post_CO2_last8_sum
    post_CO2_last6_activity [ZT] = post_CO2_last6_sum
    post_CO2_last4_activity [ZT] = post_CO2_last4_sum
    post_CO2_last2_activity [ZT] = post_CO2_last2_sum
total_activity_df = pd.DataFrame(total_activity)
pre_CO2_activity_df = pd.DataFrame(pre_CO2_activity)
post_CO2_activity_df = pd.DataFrame(post_CO2_activity)
post_CO2_first2_activity_df = pd.DataFrame(post_CO2_first2_activity)
post_CO2_last8_activity_df = pd.DataFrame(post_CO2_last8_activity)
post_CO2_last6_activity_df = pd.DataFrame(post_CO2_last6_activity)
post_CO2_last4_activity_df = pd.DataFrame(post_CO2_last4_activity)
post_CO2_last2_activity_df = pd.DataFrame(post_CO2_last2_activity)

total_activity_output_file_path = 'het_activities/total_activity.xlsx'
pre_CO2_activity_output_file_path = 'het_activities/pre_CO2_activity.xlsx'
post_CO2_activity_output_file_path = 'het_activities/post_CO2_activity.xlsx'
post_CO2_first2_activity_output_file_path = 'het_activities/post_CO2_first2_activity.xlsx'
post_CO2_last8_activity_output_file_path = 'het_activities/post_CO2_last8_activity.xlsx'
post_CO2_last6_activity_output_file_path = 'het_activities/post_CO2_last6_activity.xlsx'
post_CO2_last4_activity_output_file_path = 'het_activities/post_CO2_last4_activity.xlsx'
post_CO2_last2_activity_output_file_path = 'het_activities/post_CO2_last2_activity.xlsx'

total_activity_df.to_excel(total_activity_output_file_path, index=False)
pre_CO2_activity_df.to_excel(pre_CO2_activity_output_file_path, index=False)
post_CO2_activity_df.to_excel(post_CO2_activity_output_file_path, index=False)
post_CO2_first2_activity_df.to_excel(post_CO2_first2_activity_output_file_path, index=False)
post_CO2_last8_activity_df.to_excel(post_CO2_last8_activity_output_file_path, index=False)
post_CO2_last6_activity_df.to_excel(post_CO2_last6_activity_output_file_path, index=False)
post_CO2_last4_activity_df.to_excel(post_CO2_last4_activity_output_file_path, index=False)
post_CO2_last2_activity_df.to_excel(post_CO2_last2_activity_output_file_path, index=False)

In [None]:
import pandas as pd
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

behavior_input = '/Users/donglinhan/Desktop/SLEAP/PDF_LDfull_all_collapsed_distances_300.xlsx'
behaviors = pd.read_excel(behavior_input)
ZT_timepoints = {
    '0': range(0, 58),   
    '3': range(58, 116),  
    '6': range(116, 174), 
    '9': range(174, 232),
    '12': range(232, 290),
    '15': range(290, 347),
    '18': range(347, 404),
    '21': range(404, 462),
}

total_activity = pd.DataFrame(index = range(100)) 
pre_CO2_activity = pd.DataFrame(index = range(100)) 
post_CO2_activity = pd.DataFrame(index = range(100)) 
post_CO2_first2_activity = pd.DataFrame(index = range(100)) 
post_CO2_last8_activity = pd.DataFrame(index = range(100)) 
post_CO2_last6_activity = pd.DataFrame(index = range(100)) 
post_CO2_last4_activity = pd.DataFrame(index = range(100)) 
post_CO2_last2_activity = pd.DataFrame(index = range(100)) 

for ZT, cols in ZT_timepoints.items():
    ZT_data = behaviors.iloc[:, cols]
    total_sum = []
    pre_CO2_sum = []
    post_CO2_sum = []
    post_CO2_first2_sum = []
    post_CO2_last8_sum = []
    post_CO2_last6_sum = []
    post_CO2_last4_sum = []
    post_CO2_last2_sum = []
    for i in ZT_data:
        total_sum.append(ZT_data[i][0:144].sum()/12)
        pre_CO2_sum.append(ZT_data[i][0:23].sum()/2)
        post_CO2_sum.append(ZT_data[i][23:144].sum()/10)
        post_CO2_first2_sum.append(ZT_data[i][23:47].sum()/2)
        post_CO2_last8_sum.append(ZT_data[i][47:144].sum()/8)
        post_CO2_last6_sum.append(ZT_data[i][71:144].sum()/6)
        post_CO2_last4_sum.append(ZT_data[i][95:144].sum()/4)
        post_CO2_last2_sum.append(ZT_data[i][119:144].sum()/2)
    total_activity[ZT] = pd.Series(total_sum)
    pre_CO2_activity[ZT] = pd.Series(pre_CO2_sum)
    post_CO2_activity[ZT] = pd.Series(post_CO2_sum)
    post_CO2_first2_activity[ZT] = pd.Series(post_CO2_first2_sum)
    post_CO2_last8_activity [ZT] = pd.Series(post_CO2_last8_sum)
    post_CO2_last6_activity [ZT] = pd.Series(post_CO2_last6_sum)
    post_CO2_last4_activity [ZT] = pd.Series(post_CO2_last4_sum)
    post_CO2_last2_activity [ZT] = pd.Series(post_CO2_last2_sum)
total_activity_df = pd.DataFrame(total_activity)
pre_CO2_activity_df = pd.DataFrame(pre_CO2_activity)
post_CO2_activity_df = pd.DataFrame(post_CO2_activity)
post_CO2_first2_activity_df = pd.DataFrame(post_CO2_first2_activity)
post_CO2_last8_activity_df = pd.DataFrame(post_CO2_last8_activity)
post_CO2_last6_activity_df = pd.DataFrame(post_CO2_last6_activity)
post_CO2_last4_activity_df = pd.DataFrame(post_CO2_last4_activity)
post_CO2_last2_activity_df = pd.DataFrame(post_CO2_last2_activity)

total_activity_output_file_path = 'pdf_activities/total_activity.xlsx'
pre_CO2_activity_output_file_path = 'pdf_activities/pre_CO2_activity.xlsx'
post_CO2_activity_output_file_path = 'pdf_activities/post_CO2_activity.xlsx'
post_CO2_first2_activity_output_file_path = 'pdf_activities/post_CO2_first2_activity.xlsx'
post_CO2_last8_activity_output_file_path = 'pdf_activities/post_CO2_last8_activity.xlsx'
post_CO2_last6_activity_output_file_path = 'pdf_activities/post_CO2_last6_activity.xlsx'
post_CO2_last4_activity_output_file_path = 'pdf_activities/post_CO2_last4_activity.xlsx'
post_CO2_last2_activity_output_file_path = 'pdf_activities/post_CO2_last2_activity.xlsx'

total_activity_df.to_excel(total_activity_output_file_path, index=False)
pre_CO2_activity_df.to_excel(pre_CO2_activity_output_file_path, index=False)
post_CO2_activity_df.to_excel(post_CO2_activity_output_file_path, index=False)
post_CO2_first2_activity_df.to_excel(post_CO2_first2_activity_output_file_path, index=False)
post_CO2_last8_activity_df.to_excel(post_CO2_last8_activity_output_file_path, index=False)
post_CO2_last6_activity_df.to_excel(post_CO2_last6_activity_output_file_path, index=False)
post_CO2_last4_activity_df.to_excel(post_CO2_last4_activity_output_file_path, index=False)
post_CO2_last2_activity_df.to_excel(post_CO2_last2_activity_output_file_path, index=False)

### All DD activity analysis

In [106]:
#DD activity
import pandas as pd
import h5py
import numpy as np
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

wt_behavior_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_all_behavior_modified_again.xlsx'
het_behavior_input = '/Users/donglinhan/Desktop/SLEAP/Het_DD2_all_behavior_modified_again.xlsx'
hom_behavior_input = '/Users/donglinhan/Desktop/SLEAP/PDF_DD2_all_behavior_modified_again.xlsx'
wt_behaviors_original = pd.read_excel(wt_behavior_input)
het_behaviors_original = pd.read_excel(het_behavior_input)
hom_behaviors_original = pd.read_excel(hom_behavior_input)
wt_behaviors = wt_behaviors_original.applymap(lambda x: 1 if x != 0 else 0)
het_behaviors = het_behaviors_original.applymap(lambda x: 1 if x != 0 else 0)
hom_behaviors = hom_behaviors_original.applymap(lambda x: 1 if x != 0 else 0)

wt_activity_input = '/Users/donglinhan/Desktop/SLEAP/WT_DD2_collapsed_distances_300.xlsx'
het_activity_input = '/Users/donglinhan/Desktop/SLEAP/HET_DD2_all_collapsed_distances_300.xlsx'
hom_activity_input = '/Users/donglinhan/Desktop/SLEAP/PDF_DD2_all_collapsed_distances_300.xlsx'
wt_activities = pd.read_excel(wt_activity_input)
het_activities = pd.read_excel(het_activity_input)
hom_activities = pd.read_excel(hom_activity_input)

def calculate_end_index(activity_series):
    start_index = next((i for i, x in enumerate(activity_series) if x != 0), None)
    if start_index is None:
        return 0  
    end_index = None
    for i in range(len(activity_series) - 48):
        if activity_series[i] != 0 and all(x == 0 for x in activity_series[i+1:i+49]):
            end_index = i
            break
    if end_index is None:
        end_index = next((i for i in range(len(activity_series) - 1, -1, -1) if activity_series[i] != 0), start_index)
    return end_index

def calculate_total_activity_5(activity_series):
    start_index = next((i for i, x in enumerate(activity_series) if x != 0), None)
    if start_index is None:
        return 0  
    end_index = None
    for i in range(len(activity_series) - 48):
        if activity_series[i] != 0 and all(x == 0 for x in activity_series[i+1:i+49]):
            end_index = i
            break
    if end_index is None:
        end_index = next((i for i in range(len(activity_series) - 1, -1, -1) if activity_series[i] != 0), start_index)
    total_activity = sum(activity_series[start_index:end_index + 1])
    return total_activity

def longest_activity_bout(activity_series):
    longest_bout = 0
    current_bout = 0
    for activity in activity_series:
        if activity == 1:
            current_bout += 1
        else:
            if current_bout > longest_bout:
                longest_bout = current_bout
            current_bout = 0
    if current_bout > longest_bout:
        longest_bout = current_bout
    return longest_bout

def average_activity_bout_length(activity_series):
    bout_lengths = []
    current_bout = 0
    for activity in activity_series:
        if activity == 1:
            current_bout += 1
        else:
            if current_bout > 0:
                bout_lengths.append(current_bout)
            current_bout = 0
    if current_bout > 0:
        bout_lengths.append(current_bout)
    if bout_lengths:
        average_length = sum(bout_lengths) / len(bout_lengths)
    else:
        average_length = 0
    return average_length

WT_CT_timepoints = {
    '0': range(0, 30),   
    '3': range(30, 60),  
    '6': range(60, 90), 
    '9': range(90, 120),
    '12': range(120, 150),
    '15': range(150, 180),
    '18': range(180, 210),
    '21': range(210, 240),
}

HET_CT_timepoints = {
    '0': range(0, 20),   
    '3': range(20, 40),  
    '6': range(40, 60), 
    '9': range(60, 80),
    '12': range(80, 100),
    '15': range(100, 120),
    '18': range(120, 140),
    '21': range(140, 160),
}

HOM_CT_timepoints = {
    '0': range(0, 29),   
    '3': range(29, 58),  
    '6': range(58, 87), 
    '9': range(87, 116),
    '12': range(116, 145),
    '15': range(145, 174),
    '18': range(174, 203),
    '21': range(203, 232),
}

wt_total_sustained_activity = pd.DataFrame(index = range(100)) 
wt_total_sustained_time = pd.DataFrame(index = range(100)) 
wt_total_longest_bout = pd.DataFrame(index = range(100)) 
wt_total_acute_activity = pd.DataFrame(index = range(100)) 
wt_total_average_bout = pd.DataFrame(index = range(100)) 
het_total_sustained_activity = pd.DataFrame(index = range(100)) 
het_total_sustained_time = pd.DataFrame(index = range(100)) 
het_total_longest_bout = pd.DataFrame(index = range(100)) 
het_total_acute_activity = pd.DataFrame(index = range(100)) 
het_total_average_bout = pd.DataFrame(index = range(100)) 
hom_total_sustained_activity = pd.DataFrame(index = range(100)) 
hom_total_sustained_time = pd.DataFrame(index = range(100)) 
hom_total_longest_bout = pd.DataFrame(index = range(100)) 
hom_total_acute_activity = pd.DataFrame(index = range(100)) 
hom_total_average_bout = pd.DataFrame(index = range(100)) 

for CT, cols in WT_CT_timepoints.items():
    wt_behavior_data = wt_behaviors.iloc[:, cols]
    wt_activity_data = wt_activities.iloc[:,cols]
    wt_sustained_activity = []
    wt_sustained_time = []
    wt_longest_bout = []
    wt_acute_activity = []
    wt_average_bout = []
    for i in wt_behavior_data:
        sustained_index = calculate_end_index(pd.Series(wt_behavior_data[i]))
        if wt_behavior_data[i][23:47].sum() > 0 and wt_behavior_data[i][0:23].sum() == 0: 
            sustained_time = sustained_index
            sustained_activity = wt_activity_data[i][23:sustained_index+1].sum() 
            longest_bout = longest_activity_bout(pd.Series(wt_behavior_data[i]))
            average_bout = average_activity_bout_length(pd.Series(wt_behavior_data[i]))
            acute_activity = wt_activity_data[i][23:47].sum() 
        elif wt_behavior_data[i][23:47].sum() == 0 and wt_behavior_data[i][0:23].sum() == 0: 
            sustained_time = None
            sustained_activity = None
            longest_bout = None
            average_bout = None
            acute_activity = wt_activity_data[i][23:47].sum() 
        else:
            sustained_time = None
            sustained_activity = None
            longest_bout = None
            average_bout = None
            acute_activity = None
        wt_sustained_time.append(sustained_time)
        wt_sustained_activity.append(sustained_activity)
        wt_longest_bout.append(longest_bout)
        wt_average_bout.append(average_bout)
        wt_acute_activity.append(acute_activity)
    wt_total_sustained_activity[CT] = pd.Series(wt_sustained_activity)
    wt_total_sustained_time[CT] = pd.Series(wt_sustained_time)
    wt_total_longest_bout[CT] = pd.Series(wt_longest_bout)
    wt_total_average_bout[CT] = pd.Series(wt_average_bout)
    wt_total_acute_activity[CT] = pd.Series(wt_acute_activity)

for CT, cols in HET_CT_timepoints.items():
    het_behavior_data = het_behaviors.iloc[:, cols]
    het_activity_data = het_activities.iloc[:,cols]
    het_sustained_activity = []
    het_sustained_time = []
    het_longest_bout = []
    het_acute_activity = []
    het_average_bout = []
    for i in het_behavior_data:
        sustained_index = calculate_end_index(pd.Series(het_behavior_data[i]))
        if het_behavior_data[i][23:47].sum() > 0 and het_behavior_data[i][0:23].sum() == 0: 
            sustained_time = sustained_index
            sustained_activity = het_activity_data[i][23:sustained_index+1].sum() 
            longest_bout = longest_activity_bout(pd.Series(het_behavior_data[i]))
            average_bout = average_activity_bout_length(pd.Series(het_behavior_data[i]))
            acute_activity = het_activity_data[i][23:47].sum() 
        elif het_behavior_data[i][23:47].sum() == 0 and het_behavior_data[i][0:23].sum() == 0: 
            sustained_time = None
            sustained_activity = None
            longest_bout = None
            average_bout = None
            acute_activity = het_activity_data[i][23:47].sum() 
        else:
            sustained_time = None
            sustained_activity = None
            longest_bout = None
            average_bout = None
            acute_activity = None
        het_sustained_time.append(sustained_time)
        het_sustained_activity.append(sustained_activity)
        het_longest_bout.append(longest_bout)
        het_average_bout.append(average_bout)
        het_acute_activity.append(acute_activity)
    het_total_sustained_activity[CT] = pd.Series(het_sustained_activity)
    het_total_sustained_time[CT] = pd.Series(het_sustained_time)
    het_total_longest_bout[CT] = pd.Series(het_longest_bout)
    het_total_average_bout[CT] = pd.Series(het_average_bout)
    het_total_acute_activity[CT] = pd.Series(het_acute_activity)

for CT, cols in HOM_CT_timepoints.items():
    hom_behavior_data = hom_behaviors.iloc[:, cols]
    hom_activity_data = hom_activities.iloc[:,cols]
    hom_sustained_activity = []
    hom_sustained_time = []
    hom_longest_bout = []
    hom_acute_activity = []
    hom_average_bout = []
    for i in hom_behavior_data:
        sustained_index = calculate_end_index(pd.Series(hom_behavior_data[i]))
        if hom_behavior_data[i][23:47].sum() > 0 and hom_behavior_data[i][0:23].sum() == 0: 
            sustained_time = sustained_index
            sustained_activity = hom_activity_data[i][23:sustained_index+1].sum() 
            longest_bout = longest_activity_bout(pd.Series(hom_behavior_data[i]))
            average_bout = average_activity_bout_length(pd.Series(hom_behavior_data[i]))
            acute_activity = hom_activity_data[i][23:47].sum() 
        elif hom_behavior_data[i][23:47].sum() == 0 and hom_behavior_data[i][0:23].sum() == 0: 
            sustained_time = None
            sustained_activity = None
            longest_bout = None
            average_bout = None
            acute_activity = hom_activity_data[i][23:47].sum() 
        else:
            sustained_time = None
            sustained_activity = None
            longest_bout = None
            average_bout = None
            acute_activity = None
        hom_sustained_time.append(sustained_time)
        hom_sustained_activity.append(sustained_activity)
        hom_longest_bout.append(longest_bout)
        hom_average_bout.append(average_bout)
        hom_acute_activity.append(acute_activity)
    hom_total_sustained_activity[CT] = pd.Series(hom_sustained_activity)
    hom_total_sustained_time[CT] = pd.Series(hom_sustained_time)
    hom_total_longest_bout[CT] = pd.Series(hom_longest_bout)
    hom_total_average_bout[CT] = pd.Series(hom_average_bout)
    hom_total_acute_activity[CT] = pd.Series(hom_acute_activity)

wt_total_sustained_activity_output_file_path = 'persistence_analysis/wt_dd_total_sustained_activity.xlsx'
wt_total_sustained_time_output_file_path = 'persistence_analysis/wt_dd_total_sustained_time.xlsx'
wt_total_longest_bout_output_file_path = 'persistence_analysis/wt_dd_total_longest_bout.xlsx'
wt_total_average_bout_output_file_path = 'persistence_analysis/wt_dd_total_average_bout.xlsx'
wt_total_acute_activity_output_file_path = 'persistence_analysis/wt_dd_total_acute_activity.xlsx'
het_total_sustained_activity_output_file_path = 'persistence_analysis/het_dd_total_sustained_activity.xlsx'
het_total_sustained_time_output_file_path = 'persistence_analysis/het_dd_total_sustained_time.xlsx'
het_total_longest_bout_output_file_path = 'persistence_analysis/het_dd_total_longest_bout.xlsx'
het_total_average_bout_output_file_path = 'persistence_analysis/het_dd_total_average_bout.xlsx'
het_total_acute_activity_output_file_path = 'persistence_analysis/het_dd_total_acute_activity.xlsx'
hom_total_sustained_activity_output_file_path = 'persistence_analysis/hom_dd_total_sustained_activity.xlsx'
hom_total_sustained_time_output_file_path = 'persistence_analysis/hom_dd_total_sustained_time.xlsx'
hom_total_longest_bout_output_file_path = 'persistence_analysis/hom_dd_total_longest_bout.xlsx'
hom_total_average_bout_output_file_path = 'persistence_analysis/hom_dd_total_average_bout.xlsx'
hom_total_acute_activity_output_file_path = 'persistence_analysis/hom_dd_total_acute_activity.xlsx'

wt_total_sustained_activity.to_excel(wt_total_sustained_activity_output_file_path, index=False)
wt_total_sustained_time.to_excel(wt_total_sustained_time_output_file_path, index=False)
wt_total_longest_bout.to_excel(wt_total_longest_bout_output_file_path, index=False)
wt_total_average_bout.to_excel(wt_total_average_bout_output_file_path, index=False)
wt_total_acute_activity.to_excel(wt_total_acute_activity_output_file_path, index=False)
het_total_sustained_activity.to_excel(het_total_sustained_activity_output_file_path, index=False)
het_total_sustained_time.to_excel(het_total_sustained_time_output_file_path, index=False)
het_total_longest_bout.to_excel(het_total_longest_bout_output_file_path, index=False)
het_total_average_bout.to_excel(het_total_average_bout_output_file_path, index=False)
het_total_acute_activity.to_excel(het_total_acute_activity_output_file_path, index=False)
hom_total_sustained_activity.to_excel(hom_total_sustained_activity_output_file_path, index=False)
hom_total_sustained_time.to_excel(hom_total_sustained_time_output_file_path, index=False)
hom_total_longest_bout.to_excel(hom_total_longest_bout_output_file_path, index=False)
hom_total_average_bout.to_excel(hom_total_average_bout_output_file_path, index=False)
hom_total_acute_activity.to_excel(hom_total_acute_activity_output_file_path, index=False)

### Calculate reaction time

In [None]:
import os
import re
import h5py
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from natsort import natsorted
import pandas as pd

def getfile(file_path):
    with h5py.File(file_path, "r") as f:
        dset_names = list(f.keys())
        locations = f["tracks"][:].T
        node_names = [n.decode() for n in f["node_names"][:]]
        locations = fill_missing(locations)
        BODY_INDEX = 0
        body_loc = locations[:, BODY_INDEX, :, :]
        return body_loc

def fill_missing(Y, kind="linear"):
    initial_shape = Y.shape
    Y = Y.reshape((initial_shape[0], -1))
    for i in range(Y.shape[-1]):
        y = Y[:, i]
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])
        Y[:, i] = y
    Y = Y.reshape(initial_shape)
    return Y


def individual_velocity(file_path, delay):
    body_loc = getfile(file_path)
    delay_frame = int(delay * 60)
    for i in range(0, body_loc.shape[2]):
        filter_input = body_loc[:,:,i]
        distance_list = []
        for k in range(1, len(filter_input)):
            x1, y1 = filter_input[k - 1]  # Previous coordinates
            x2, y2 = filter_input[k]      # Current coordinates
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            distance_list.append(distance)
        column_name = str(file_path) + str(i)
        print(column_name)
        distances = pd.Series(distance_list) 
        zero_series = pd.Series([0] * delay_frame)
        corrected_distances = pd.concat([zero_series, distances], ignore_index=True)
        all_individual_distances[column_name] = corrected_distances
        window_size = 300
        corrected_distances_array = corrected_distances.to_numpy()
        sum_windows = len(corrected_distances_array) // window_size
        distances_collapsed = np.array([np.sum(corrected_distances_array[i * window_size:(i + 1) * window_size]) for i in range(sum_windows)])
        all_collapsed_individual_distances[column_name] = distances_collapsed


folder_path = '/Users/donglinhan/Desktop/SLEAP/FinalH5/WT-LD-full'
h5_files = os.listdir(folder_path)
h5_files = natsorted(h5_files)
all_individual_distances = pd.DataFrame()
all_collapsed_individual_distances = pd.DataFrame()

for file_name in h5_files:
    if file_name.endswith('.h5'):
        file_parts = file_name.split('_')
        first_six_chars = file_parts[1][:6]
        delay = float(first_six_chars.lstrip("0"))
        print(delay)
        file_path = os.path.join(folder_path, file_name)
        print(file_name)
        individual_velocity(file_path, delay)

all_individual_distances.to_excel('WT-LDfull_individual_distances.xlsx', index=False)

In [None]:
import os
import re
import h5py
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from natsort import natsorted
import pandas as pd

input = '/Users/donglinhan/Desktop/SLEAP/WT-LDfull_individual_distances.xlsx'
raw_individual_distances = pd.read_excel(input)
window_size = 15

from scipy.signal import savgol_filter
smoothed_individual_distances = pd.DataFrame()
for c in raw_individual_distances.columns:
    smoothed_individual_distances[c] = savgol_filter(raw_individual_distances[c], 30, 3)

def no_repeats(numbers):
    first_numbers = []
    i = 0
    while i < len(numbers):
        start_num = numbers[i]  # Current number
        end_num = start_num     # End of the sequence
        while i + 1 < len(numbers) and numbers[i + 1] == end_num + 1:
            end_num = numbers[i + 1]
            i += 1
        first_numbers.append(start_num)
        i += 1  # Move to the next number
    return first_numbers

    
all_activity_starts = pd.DataFrame(index = range(50)) 
for d in smoothed_individual_distances.columns:
    print(d)
    if smoothed_individual_distances[d].sum() > 500:
        activity_starts = []
        for i in range(len(smoothed_individual_distances[d]) - window_size*2):
            window_pre_movement = smoothed_individual_distances[d][i:i+window_size].sum()
            window_post_movement = smoothed_individual_distances[d][i+window_size:i+window_size+window_size].sum()
            if window_post_movement > 3 * window_pre_movement and window_post_movement > 100:
                activity_starts.append(i+window_size)
    else:
        activity_starts = []
    activity_starts = no_repeats(activity_starts)
    all_activity_starts[d] = pd.Series(activity_starts)
    #all_activity_starts = all_activity_starts.append(pd.DataFrame([activity_starts], columns = str(d)), ignore_index=True)

all_activity_starts.to_excel('WT-LDfull_all_activity_starts_15frames.xlsx', index=False)

In [None]:
import pandas as pd

input = '/Users/donglinhan/Desktop/SLEAP/WT-LDfull_all_activity_starts_15frames.xlsx'
all_activity_starts = pd.read_excel(input)

all_reaction_time = pd.DataFrame()

for i in all_activity_starts.columns:
    start_times = all_activity_starts[i].dropna()
    if start_times.isna().all():
        reaction_time = None
    else:
        if 5400 < start_times.iloc[0] < 7200:
            reaction_time = None
        else:
            print(start_times)
            filtered_start_times = start_times[start_times >= 7200]
            if filtered_start_times.empty:
                reaction_time = None
            else:
                reaction_time = filtered_start_times.iloc[0]
                reaction_time = (reaction_time - 7200) * 1/60
                if reaction_time > 60:
                    reaction_time = None
    all_reaction_time = pd.concat([all_reaction_time, pd.DataFrame({'Exp': [i], 'Reaction Time': [reaction_time]})], ignore_index=True)

print(all_reaction_time)

output_file = 'WT-LDfull_all_reaction_times_15frames.xlsx'
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    all_reaction_time.to_excel(writer, index=False)

print(f'Reaction times saved to {output_file}')