In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import loadmat
from scipy import stats

import math

%matplotlib inline

In [None]:
filepath = '/home/ds/DCL/Defensive_states/session_data_ceiling/OF-EPM-PreExp-CD1-CD2-Ext-ExtCont-ExtHC_Export_26-May-2021_18-50-37.mat'
data = loadmat(filepath)

data.keys()

In [None]:
# Load the .mat file that contains the data from all sessions
filepath = '/home/ds/DCL/Defensive_states/session_data_ceiling/OF-EPM-PreExp-CD1-CD2-Ext-ExtCont-ExtHC_Export_26-May-2021_18-50-37.mat'
data = loadmat(filepath)

# Extract all column names. Each column name contains information about the mice ('Animal_ID')
# and about the date and the type of recording session, e.g.: '175_F4-31_190823_OF'
l_all_column_names = [s[0] for s in list(data['Headers'][0])]

# The data was recorded from the following sessions:
l_sessions = ['OF', 'EPM', 'PreExp', 'CD1', 'CD2', 'Ext', 'ExtCont', 'ExtHC']


all_dfs_all_sessions = []

for session in l_sessions:
    l_session_column_names = []
    l_session_column_ids = []
    all_dfs_of_one_session = []
    
    # Define the name of all columns that belong to the respective session
    l_session_column_names = [column_name for column_name in l_all_column_names if column_name.endswith(session)]
    l_session_column_ids = [l_all_column_names.index(column_name) for column_name in l_session_column_names]
    
    # Get a list of all animals that were recorded in this session and a list with the corresponding dates:
    #l_session_animals = [column_name[:column_name.find('_', column_name.find('_')+1)] for column_name in l_session_column_names]
    #l_session_dates = [column_name[column_name.find('_', column_name.find('_')+1)+1:column_name.rfind('_')] for column_name in l_session_column_names]
    
    # Extract the data for each animal individually
    for column_name in l_session_column_names:
        column_id = l_all_column_names.index(column_name)
        
        
        # Extract remaining metadata: animal_id and date
        animal_id = column_name[:column_name.find('_', column_name.find('_')+1)]
        date = column_name[column_name.find('_', column_name.find('_')+1)+1:column_name.rfind('_')]
        
        # Extract measurements, behaviors and events
        dict_animal = {# Timestamps:
                       'Times': data['Times'][:, column_id].tolist(),
                       
                       # Heart rate related measures:
                       'HeartRate': data['HeartRate'][:, column_id].tolist(),
                       # Low frequency band heart rate:
                       #'HR_Low_Amp': data['HeartRateWv']['Low'][0][0]['Amp'][0][0][:, column_id].tolist(),
                       'HR_Low_Signal': data['HR_Low_Signal'][:, column_id].tolist(),
                       #'HR_Low_Delta': data['HeartRateWv']['Low'][0][0]['Delta'][0][0][:, column_id].tolist(),
                       # Medium frequency band heart rate:
                       'HR_Med_Amp': data['HR_Medium_Amp'][:, column_id].tolist(),
                       #'HR_Med_Signal': data['HeartRateWv']['Medium'][0][0]['Signal'][0][0][:, column_id].tolist(),
                       'HR_Med_Delta': data['HR_Medium_Delta'][:, column_id].tolist(),
                       # High frequency band heart rate:
                       'HR_High_Amp': data['HR_High_Amp'][:, column_id].tolist(),
                       #'HR_High_Signal': data['HeartRateWv']['High'][0][0]['Signal'][0][0][:, column_id].tolist(),
                       #'HR_High_Delta': data['HeartRateWv']['High'][0][0]['Delta'][0][0][:, column_id].tolist(),

                       'Ceiling': data['Ceiling'][:, column_id].tolist(),
                       'DistanceToCeiling': data['DistanceToCeiling'][:, column_id].tolist(),
                       
            
                       # Motion-related measures:
                       'Motion': data['Motion'][:, column_id].tolist(),
                       # Low frequency band motion:
                       #'M_Low_Amp': data['MotionWv']['Low'][0][0]['Amp'][0][0][:, column_id].tolist(),
                       #'M_Low_Signal': data['MotionWv']['Low'][0][0]['Signal'][0][0][:, column_id].tolist(),         
                       #'M_Low_Delta': data['MotionWv']['Low'][0][0]['Delta'][0][0][:, column_id].tolist(),
                       # High frequency band motion:
                       #'M_High_Amp': data['MotionWv']['High'][0][0]['Amp'][0][0][:, column_id].tolist(),
                       #'M_High_Signal': data['MotionWv']['High'][0][0]['Signal'][0][0][:, column_id].tolist(),
                       #'M_High_Delta': data['MotionWv']['High'][0][0]['Delta'][0][0][:, column_id].tolist(),
                       # In addition - AreaExplored and Speed:
                       'AreaExplored': data['AreaExplored'][:, column_id].tolist(),
                       'Speed': data['Speed'][:, column_id].tolist(),
                       
                       # Temperature all four tail segments:
                       'Temperature': data['Temperature'][:, column_id].tolist(),
                       #'Temperature_s2': data['Temperature']['Second'][0][0][:, column_id].tolist(),
                       #'Temperature_s3': data['Temperature']['Third'][0][0][:, column_id].tolist(),
                       #'Temperature_s4': data['Temperature']['Fourth'][0][0][:, column_id].tolist(),
                       
                       # FastTemperature all four tail segments:
                       #'FastTemperature_s1': data['FastTemperature']['First'][0][0][:, column_id].tolist(),
                       #'FastTemperature_s2': data['FastTemperature']['Second'][0][0][:, column_id].tolist(),
                       #'FastTemperature_s3': data['FastTemperature']['Third'][0][0][:, column_id].tolist(),
                       #'FastTemperature_s4': data['FastTemperature']['Fourth'][0][0][:, column_id].tolist(),

                       # Behaviors
                       'Immobility': data['Behaviours']['Immobility'][0][0][:, column_id].tolist(),
                       'Rearing': data['Behaviours']['Rearing'][0][0][:, column_id].tolist(),
                       'StretchAttend': data['Behaviours']['StretchAttend'][0][0][:, column_id].tolist(),
                       'TailRattling': data['Behaviours']['TailRattling'][0][0][:, column_id].tolist(),
                       'Grooming': data['Behaviours']['Grooming'][0][0][:, column_id].tolist(),
                       'Flight': data['Behaviours']['Flight'][0][0][:, column_id].tolist(),
                       'HeadDips': data['Behaviours']['HeadDips'][0][0][:, column_id].tolist(),
                       'Remaining': data['Behaviours']['Remaining'][0][0][:, column_id].tolist(),
                       #'Struggle': data['Behaviours']['Struggle'][0][0][:, column_id].tolist(),  
                       'AreaBound': data['Behaviours']['AreaBound'][0][0][:, column_id].tolist(),

                       # Events:
                       'PureTone': data['Events']['PureTone'][0][0][:, column_id].tolist(), 
                       'WhiteNoise': data['Events']['WhiteNoise'][0][0][:, column_id].tolist(), 
                       'Shock': data['Events']['Shock'][0][0][:, column_id].tolist(),

                       # Exclusion ranges:
                       'ExclusionRanges': data['ExclusionRanges'][:, column_id].tolist()
            
                       }

        # Create DataFrame that contains all measurements from one animal of this session & add metadata    
        df_temp = pd.DataFrame(data = dict_animal)
        df_temp.reset_index(inplace = True)
        df_temp.rename(columns = {'index':'Bin'}, inplace = True)
        df_temp.insert(0, 'Session', session)
        df_temp.insert(1, 'Animal_ID', animal_id)
        df_temp.insert(2, 'Date', date)
        
        # Append to list of dfs from the other mice of this session that will be concatenated 
        all_dfs_of_one_session.append(df_temp)
        
    # Concatenate the dfs of all animals of this session and append it to the list of dfs of all sessions    
    df_one_session = pd.concat(all_dfs_of_one_session, ignore_index = True)
    all_dfs_all_sessions.append(df_one_session)
    
# Concatenate all session dfs to a single df that contains all data:
df = pd.concat(all_dfs_all_sessions, ignore_index = True)
#df = df.dropna()

df['Exclude'] = False

# Exclude unphysiological temperature values right away:
#df.loc[(df['Temperature_s1'] < 10) | (df['Temperature_s3'] < 10) | 
#       (df['Temperature_s1'] > 40) | (df['Temperature_s3'] > 40), 'Exclude'] = True

# Exclude time bins that are marked as to be excluded in ExclusionRanges (as nan):
#df.loc[df['Struggle'] == 1, 'Exclude'] = True
df.loc[df['ExclusionRanges'].isnull(), 'Exclude'] = True




# Some data has to be excluded from the analysis. Define the exclusion criteria:
#exclusion_criteria = [('Shock', 1), ('Outlier', True)]

#for criterion in exclusion_criteria:
#    column, value = criterion[0], criterion[1]
#    df.loc[df[column] == value, 'Exclude'] = True

# In addition, data exploration revealed some issues with the data. The following lines mark those data as 'to be excluded'
# linear decrease of HR at end of ExtHC after 2121.0 seconds
#df.loc[(df['Animal_ID'] == '175_F4-39') & (df['Session'] == 'ExtHC') & (df['Times'] > 2121.0), 'Exclude'] = True
# Strongly different Signal values (HR_Low and HR_Med) compared to all other mice:
df.loc[(df['Animal_ID'] == '175_F4-39') & (df['Session'] == 'ExtHC'), 'Exclude'] = True
# linear increase of HR at end of CD1 after 887.0 seconds
df.loc[(df['Animal_ID'] == '175_F4-25') & (df['Session'] == 'CD1') & (df['Times'] > 887.0), 'Exclude'] = True 
# Linear increase of Temperature at start of CD2  during first 187.0 seconds
df.loc[(df['Animal_ID'] == '175_F6-8') & (df['Session'] == 'CD2') & (df['Times'] < 187.0), 'Exclude'] = True 


# Behaviors are indicated in a single column to make plotting easier
df['behaviors'] = 'No score'

for behavior in ['Immobility', 'Rearing', 'StretchAttend', 'TailRattling', 'Grooming', 'Flight', 'Remaining', 'HeadDips', 'AreaBound']:
    df.loc[df[behavior] == 1, 'behaviors'] = behavior

df_temp = df[['Immobility', 'Rearing', 'StretchAttend', 'TailRattling', 'Grooming', 'Flight', 'Remaining', 'HeadDips', 'AreaBound']].copy()
df_temp['sum'] = df_temp.sum(axis=1)
l_idx_mutliple_behaviors = list(df_temp.loc[df_temp['sum'] > 1].index)
df.loc[l_idx_mutliple_behaviors,'behaviors'] = 'multiple'

df

## Add information about bout duration

# Add information about bout duration. These loops will take some time, so let´s print some information on the progress:
print('Done with most of the preprocessing - only bout durations are missing!\n')
print('...starting with bout duration processing now...')

# For this we need two additional functions: get_ & write_duration
def get_duration(behavior_to_match, idx, idx_max):
    duration = 0
    behavior = df.loc[idx, 'behaviors']
    while (behavior == behavior_to_match) & (idx <= idx_max):
        duration = duration + 1
        idx = idx + 1
        if idx <= idx_max:
            behavior = df.loc[idx, 'behaviors']
    return duration, idx

def write_duration(idx_start, idx_last, duration):
    bin_count = 1
    for idx in range(idx_start, idx_last):
        df.loc[idx, 'Bout_bin'] = bin_count
        df.loc[idx, 'Bout_duration'] = duration
        bin_count = bin_count + 1

# Bins that are not classified as belonging to a relevant behavior (e.g. being classified as 'Remaining', 'No score', or 'multiple') will have the following value
# We might think about using np.NaN instead of 0´s instead?! If both UMAP and HDBSCAN can handle this?
df['Bout_bin'] = 0
df['Bout_duration'] = 0

for Animal_ID in list(df['Animal_ID'].unique()):
    for Session in list(df.loc[df['Animal_ID'] == Animal_ID, 'Session'].unique()):
        print('Processing bout data of: ' + Animal_ID + ' during ' + Session)
        df_bouts = df.loc[(df['Animal_ID'] == Animal_ID) & (df['Session'] == Session)].copy()
        
        idx_start = df_bouts.index[0]
        idx_max = df_bouts.index[0] + df_bouts.shape[0] -1

        idx = idx_start

        while (idx >= idx_start) & (idx <= idx_max):
            behavior_to_match = df_bouts.loc[idx, 'behaviors']
            if behavior_to_match in ['Immobility', 'Rearing', 'StretchAttend', 'Grooming', 'Flight', 'TailRattling', 'HeadDips']: #, 'OpenRearing'
                duration, idx_last = get_duration(behavior_to_match, idx, idx_max)
                write_duration(idx, idx_last, duration)
                idx = idx_last
            else:
                idx = idx + 1
    
print('All done!')

## Identify and remove mice that don´t have recorded data for all sessions

In [None]:
dict_sessions_per_mouse = {}

for mouse in df['Animal_ID'].unique():
    dict_sessions_per_mouse[mouse] = list(df.loc[df['Animal_ID'] == mouse, 'Session'].unique())
    
l_mice_to_remove = []
l_mice_to_keep = []


sessions_to_check = ['OF', 'EPM', 'CD1', 'CD2']


for mouse in df['Animal_ID'].unique():
    l_session_check = []
    
    for session in sessions_to_check:
        l_session_check.append(session in dict_sessions_per_mouse[mouse])

    if all(l_session_check):
        l_mice_to_keep.append(mouse)

    else:
        l_mice_to_remove.append(mouse)

In [None]:
df.shape[0]

In [None]:
df = df.loc[df['Animal_ID'].isin(l_mice_to_keep)]
df = df.loc[df['Session'].isin(sessions_to_check)]
df.shape[0]

# Define min and max

In [None]:
df.columns

In [None]:
l_measures_to_scale =  ['HeartRate',
                        'HR_Low_Signal', #'HR_Low_Delta', 'HR_Low_Amp',
                        'HR_Med_Delta', 'HR_Med_Amp', #'HR_Med_Signal', 
                        'HR_High_Amp', #'HR_High_Signal', 'HR_High_Delta', 
                        'Ceiling', 'DistanceToCeiling',
                        'Motion',
                        #'M_Low_Signal', 'M_Low_Delta', 'M_Low_Amp',
                        #'M_High_Signal', 'M_High_Delta', 'M_High_Amp',
                        'AreaExplored', 'Speed',
                        'Temperature']
                        #'Temperature_s2', , 'Temperature_s4',
                        #'FastTemperature_s1', 'FastTemperature_s2', 'FastTemperature_s3','FastTemperature_s4']


min_maxes = {}

df_check_nans = df.dropna().copy()
for measure in l_measures_to_scale:
    
    # Get actual values of min and max
    min_ = np.percentile(df_check_nans.loc[df['Exclude'] == False, measure].values, 0.05)
    max_ = np.percentile(df_check_nans.loc[df['Exclude'] == False, measure].values, 99.95)
    
    # Pass them into the dictionary
    min_maxes[measure] = [min_, max_]

print('These are the calculated Min-Maxes for each measure:')
min_maxes

## Global scaling:

In [None]:
for measure in min_maxes.keys():
    min_, max_ = min_maxes[measure][0], min_maxes[measure][1]
    df.loc[(df[measure] >= min_) & (df[measure] <= max_), 'norm_' + measure] = (df[measure] - min_) / (max_ - min_)
    df.loc[df[measure] < min_, 'Exclude'] = True
    df.loc[df[measure] > max_, 'Exclude'] = True             

In [None]:
df['Animal_ID'].unique()

## Add Coefficient of variation with different sliding window sizes:

In [None]:
def CoVs_from_sliding_windows(array, half_window_size):
    l_CoVs = []
    for Bin in range(len(array)):
        if Bin < half_window_size:
            # Handle values at the beginning differently?
            start_bin = 0
            end_bin = Bin + half_window_size
            l_CoVs.append(stats.variation(array[:end_bin], nan_policy='omit'))

        elif Bin > (len(array)-half_window_size-1):
            # Handle values at the end differently?
            start_bin = Bin - half_window_size
            end_bin = len(array) - 1
            l_CoVs.append(stats.variation(array[start_bin:], nan_policy='omit'))

        else:
            start_bin = Bin - half_window_size
            end_bin = Bin + half_window_size
            l_CoVs.append(stats.variation(array[start_bin:end_bin], nan_policy='omit'))
        
    CoVs = np.array(l_CoVs)
    return CoVs

df['HR_CoV_10s'] = np.NaN

for mouse in df['Animal_ID'].unique():
    for session in df.loc[df['Animal_ID'] == mouse, 'Session'].unique():
        heart_rate_array = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'HeartRate'].values
        df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'HR_CoV_10s'] = CoVs_from_sliding_windows(heart_rate_array, 20)
        
# Scale it globally:
min_, max_ = df.loc[df['Exclude'] == False, 'HR_CoV_10s'].min(), df.loc[df['Exclude'] == False, 'HR_CoV_10s'].max()
df['norm_HR_CoV_10s'] = (df['HR_CoV_10s'] - min_) / (max_ - min_)


df['HR_CoV_5s'] = np.NaN

for mouse in df['Animal_ID'].unique():
    for session in df.loc[df['Animal_ID'] == mouse, 'Session'].unique():
        heart_rate_array = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'HeartRate'].values
        df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'HR_CoV_5s'] = CoVs_from_sliding_windows(heart_rate_array, 10)
        
# Scale it globally:
min_, max_ = df.loc[df['Exclude'] == False, 'HR_CoV_5s'].min(), df.loc[df['Exclude'] == False, 'HR_CoV_5s'].max()
df['norm_HR_CoV_5s'] = (df['HR_CoV_5s'] - min_) / (max_ - min_)


df['HR_CoV_2s'] = np.NaN

for mouse in df['Animal_ID'].unique():
    for session in df.loc[df['Animal_ID'] == mouse, 'Session'].unique():
        heart_rate_array = df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'HeartRate'].values
        df.loc[(df['Animal_ID'] == mouse) & (df['Session'] == session), 'HR_CoV_2s'] = CoVs_from_sliding_windows(heart_rate_array, 8)
        
# Scale it globally:
min_, max_ = df.loc[df['Exclude'] == False, 'HR_CoV_2s'].min(), df.loc[df['Exclude'] == False, 'HR_CoV_2s'].max()
df['norm_HR_CoV_2s'] = (df['HR_CoV_2s'] - min_) / (max_ - min_)

In [None]:
df

## Square-root transformation of AreaExplored after scaling:

#df['norm_Motion_sqrt'] = np.sqrt(df['norm_Motion'])

# Not done for Speed and AreaExplored to avoid loosing the physical meaning of this dimension:
df['norm_AreaExplored_sqrt'] = np.sqrt(df['norm_AreaExplored'])
df['norm_DistanceToCeiling_sqrt'] = np.sqrt(1 - df['norm_DistanceToCeiling'])
# df['norm_Speed_sqrt'] = np.sqrt(df['norm_Speed'])

## Keep all NaNs in order to visualize ranges of missing values

In [None]:
df_with_nans = df.copy()

# Now df can be cleared of NaNs:
df = df.dropna()
df.shape[0]

# Save the processed and scaled data

In [None]:
df.columns

In [None]:
df.shape[0]

In [None]:
df.to_csv('States_ceiling_reduced.csv')

In [None]:
plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), 'norm_HeartRate'], color='blue', alpha=0.75)


# Inspect distributions with calculated min - maxes:

In [None]:
MEASURE = 'norm_DistanceToCeiling'

BIN_COUNT = 1000

In [None]:
if MEASURE.startswith('norm_'):
    MIN, MAX = 0, 1
else:
    MIN, MAX = min_maxes[MEASURE][0], min_maxes[MEASURE][1]


BINWIDTH = (df.loc[df['Exclude'] == False, MEASURE].max() - df.loc[df['Exclude'] == False, MEASURE].min()) / 1000

fig = plt.figure(figsize=(18, 10), facecolor='white')
gs = fig.add_gridspec(2,2)

ax1 = fig.add_subplot(gs[0,0])
sns.histplot(data=df.loc[df['Exclude'] == False], x=MEASURE, bins=BIN_COUNT, ax=ax1)
plt.title('All data')
plt.axvline(x=MIN, linestyle='dashed', color='r')
plt.axvline(x=MAX, linestyle='dashed', color='r')


ax2 = fig.add_subplot(gs[0,1])
sns.histplot(data=df.loc[df['Exclude'] == False], x=MEASURE, bins=BIN_COUNT, ax=ax2) #, hue='Animal_ID', palette='colorblind', alpha=0.3, legend=False
plt.ylim(0,75) 
plt.title('All data - focus on less frequent values')
plt.axvline(x=MIN, linestyle='dashed', color='r')
plt.axvline(x=MAX, linestyle='dashed', color='r')

ax3 = fig.add_subplot(gs[1,0])    
sns.histplot(data=df.loc[df['Exclude'] == False], x=MEASURE, bins=BIN_COUNT, ax=ax3)
plt.ylim(0, 20)
plt.xlim(MIN - 25*BINWIDTH, MIN + 25*BINWIDTH)
plt.title('Focus on lower range')
plt.axvline(x=MIN, linestyle='dashed', color='r')


ax4 = fig.add_subplot(gs[1,1])    
sns.histplot(data=df.loc[df['Exclude'] == False], x=MEASURE, bins=BIN_COUNT, ax=ax4)
plt.ylim(0, 20)
plt.xlim(MAX - 25*BINWIDTH, MAX + 25*BINWIDTH)
plt.title('Focus on upper range')
plt.axvline(x=MAX, linestyle='dashed', color='r')

filename = 'Global_limits_for_' + MEASURE + '.pdf'
#plt.savefig(filename)

plt.tight_layout()
plt.show()



In [None]:
MEASURE = 'norm_HR_Med_Amp'

if MEASURE.startswith('norm_'):
    MIN, MAX = 0, 1
else:
    MIN, MAX = min_maxes[MEASURE][0], min_maxes[MEASURE][1]

fig = plt.figure(figsize=(15, 8), facecolor='white')
gs = fig.add_gridspec(1,1)

ax1 = fig.add_subplot(gs[0,0])
sns.histplot(data=df.loc[df['Exclude'] == False], x=MEASURE, bins=BIN_COUNT, ax=ax1, hue='Animal_ID')
plt.title('All data')
plt.axvline(x=MIN, linestyle='dashed', color='r')
plt.axvline(x=MAX, linestyle='dashed', color='r')

In [None]:
l_measures = ['norm_HeartRate', 'norm_HR_Low_Signal', 'norm_HR_Med_Delta',
'norm_HR_Med_Amp', 'norm_HR_High_Amp', 'norm_Ceiling',
'norm_DistanceToCeiling', 'norm_Motion', 'norm_AreaExplored',
'norm_Speed', 'norm_Temperature', 'norm_HR_CoV_10s',
'norm_HR_CoV_5s', 'norm_HR_CoV_2s']


dict_distributions = {'step': [],
                      'number_of_mice': [],
                      'dimension': []}

for dimension in l_measures:
    l_no_of_mice, l_steps, l_dimensions = [], [], []
    for step in np.linspace(0,1,21):
        l_steps.append(step.round(2))
        l_no_of_mice.append(df.loc[df[dimension] > step, 'Animal_ID'].unique().shape[0])
        l_dimensions.append(dimension)
    dict_distributions['step'] = dict_distributions['step'] + l_steps
    dict_distributions['number_of_mice'] = dict_distributions['number_of_mice'] + l_no_of_mice    
    dict_distributions['dimension'] = dict_distributions['dimension'] + l_dimensions    
        
df_distributions = pd.DataFrame(data=dict_distributions)

In [None]:
plt.figure(figsize=(18,10))

sns.pointplot(data=df_distributions, x='step', y='number_of_mice', hue='dimension', palette='Spectral', dodge=True)
plt.xlabel('threshold value', fontsize=15)
plt.ylabel('Number of mice with data exceeding threshold value', fontsize=15)

In [None]:
l_new_default

In [None]:
df_distributions.loc[(df_distributions['number_of_mice'] >= 9) & (df_distributions['step'] == 0.65), 'dimension'].unique()

In [None]:
l_new_default == l_check

In [None]:
type(df.loc[df['norm_HR_Low_Signal'] > 0.8, 'Animal_ID'].unique().shape[0])

In [None]:
df.columns

# Plot traces of all dimensions for each mouse and each recorded session

Excluded areas are left blank in the traces

In [None]:
df_nan_check = df_with_nans.copy()
df_nan_check['NaN_count'] = df_nan_check.iloc[:, 5:].isnull().sum(axis=1)
exclude_column_id = list(df_nan_check.columns).index('Exclude')
df_nan_check.loc[df_nan_check['NaN_count'] > 0, df_nan_check.columns[5:exclude_column_id]] = np.NaN 
df_nan_check.loc[df_nan_check['NaN_count'] > 0, df_nan_check.columns[exclude_column_id+1:-1]] = np.NaN 

## Temperatures of tail segments 1 and 3

In [None]:
fig = plt.figure(figsize=(60,100), facecolor='white')
gs = fig.add_gridspec(38, 7)


l_measures = ['norm_Temperature_s1', 'norm_Temperature_s3']
legend_title = 'Temperatures:'
l_labels = ['segment 1', 'segment 3']


row = 0
for mouse in df_nan_check['Animal_ID'].unique():
    for column in range(7):
        session = ['OF', 'EPM', 'PreExp', 'CD1', 'CD2', 'Ext', 'ExtCont'][column]
        if session == 'Ext':
            if df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[0]].shape[0] == 0:
                session = 'ExtHC'
        fig.add_subplot(gs[row, column])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[0]], color='blue', alpha=0.75, label=l_labels[0])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[1]], color='darkorange', alpha=0.75, label=l_labels[1])
        #plt.plot(df.loc[(df['Exclude'] == False) & (df['Animal_ID'] == mouse) & (df['Session'] == session), l_measures[2]], color='cyan', alpha=0.75, label=l_labels[2])
        #plt.plot(df.loc[(df['Exclude'] == False) & (df['Animal_ID'] == mouse) & (df['Session'] == session), l_measures[3]], color='k', alpha=0.5, label=l_labels[3])
        plt.ylim(0,1)
        if column == 0:
            plt.ylabel(mouse)
        if row == 0:
            if session == 'Ext':
                plt.title('Ext / ExtHC')
            elif session == 'ExtHC':
                plt.title('Ext / ExtHC')
            else:
                plt.title(session)
            
        if column == 6:
            plt.legend(title= legend_title, loc='center left', bbox_to_anchor=(1.2, 0.5))
        else:
            plt.legend('', frameon=False)
    row = row + 1

plt.savefig('Temperatures_all_sessions_v10_NaNs.png')
plt.show()

## Motion related measures: Speed, AreaExplored (sqrt. transformed), and Motion

In [None]:
fig = plt.figure(figsize=(60,100), facecolor='white')
gs = fig.add_gridspec(38, 7)


l_measures = ['norm_Speed', 'norm_AreaExplored_sqrt', 'norm_Motion']
legend_title = 'Motion measures:'
l_labels = ['Speed', 'AreaExplored sqrt.', 'Motion']


row = 0
for mouse in df_nan_check['Animal_ID'].unique():
    for column in range(7):
        session = ['OF', 'EPM', 'PreExp', 'CD1', 'CD2', 'Ext', 'ExtCont'][column]
        if session == 'Ext':
            if df.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[0]].shape[0] == 0:
                session = 'ExtHC'
        fig.add_subplot(gs[row, column])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[0]], color='blue', alpha=0.75, label=l_labels[0])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[1]], color='darkorange', alpha=0.75, label=l_labels[1])
        #plt.plot(df.loc[(df['Exclude'] == False) & (df['Animal_ID'] == mouse) & (df['Session'] == session), l_measures[2]], color='cyan', alpha=0.75, label=l_labels[2])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[2]], color='k', alpha=0.5, label=l_labels[2])
        plt.ylim(0,1)
        if column == 0:
            plt.ylabel(mouse)
        if row == 0:
            if session == 'Ext':
                plt.title('Ext / ExtHC')
            elif session == 'ExtHC':
                plt.title('Ext / ExtHC')
            else:
                plt.title(session)
            
        if column == 6:
            plt.legend(title= legend_title, loc='center left', bbox_to_anchor=(1.2, 0.5))
        else:
            plt.legend('', frameon=False)
    row = row + 1

plt.savefig('Speed_Area_Motion_all_sessions_v10_NaNs.png')
plt.show()

## HR related measures: HeartRate, Amplitude of the high frequency band (globally scaled), and the CoV with a sliding window of 10s

In [None]:
fig = plt.figure(figsize=(60, 100), facecolor='white')
gs = fig.add_gridspec(38, 7)


l_measures = ['norm_HR_High_Amp', 'norm_HR_CoV_10s', 'norm_HeartRate']
legend_title = 'HR_Measures:'
l_labels = ['High freq. Amp.', 'CoV 10s', 'HeartRate']


row = 0
for mouse in df_nan_check['Animal_ID'].unique():
    for column in range(7):
        session = ['OF', 'EPM', 'PreExp', 'CD1', 'CD2', 'Ext', 'ExtCont'][column]
        if session == 'Ext':
            if df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[0]].shape[0] == 0:
                session = 'ExtHC'
        fig.add_subplot(gs[row, column])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[0]], color='blue', alpha=0.75, label=l_labels[0])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[1]], color='darkorange', alpha=0.75, label=l_labels[1])
        #plt.plot(df.loc[(df['Exclude'] == False) & (df['Animal_ID'] == mouse) & (df['Session'] == session), l_measures[2]], color='cyan', alpha=0.75, label=l_labels[2])
        plt.plot(df_nan_check.loc[(df_nan_check['Exclude'] == False) & (df_nan_check['Animal_ID'] == mouse) & (df_nan_check['Session'] == session), l_measures[2]], color='k', alpha=0.5, label=l_labels[2])
        plt.ylim(0,1)
        if column == 0:
            plt.ylabel(mouse)
        if row == 0:
            if session == 'Ext':
                plt.title('Ext / ExtHC')
            elif session == 'ExtHC':
                plt.title('Ext / ExtHC')
            else:
                plt.title(session)
            
        if column == 6:
            plt.legend(title= legend_title, loc='center left', bbox_to_anchor=(1.2, 0.5))
        else:
            plt.legend('', frameon=False)
    row = row + 1

plt.savefig('HR_Amps_all_sessions_v10_NaNs.png')
plt.show()

## How much of the data is classified as behavioral events?

In [None]:
all_data = df.loc[df['Exclude'] == False].shape[0]

print('Percentage of datapoints for each behavioral category: \n')
for behavior in df['behaviors'].unique():
    bin_count = df.loc[(df['Exclude'] == False) & (df['behaviors'] == behavior)].shape[0]
    percentage = round(bin_count / all_data *100, 2)
    
    print(behavior + ': ' + str(percentage) + '% [' + str(bin_count) + ' bins]')
    

## *Side note:* still 'multiple' as behavioral score

In [None]:
df.loc[(df['behaviors'] == 'multiple') & (df['Flight'] == 0), ['Animal_ID', 'Session', 'Bin', 'Remaining', 'Immobility', 'Rearing', 'StretchAttend', 'Grooming', 'Flight', 'TailRattling']]