# __Analysis pipeline for Open Track experiments__ $ ^{M.S. Bachelor\ Thesis\ edition}$

### Disclaimer: 
This repository is intended to provide open source access to the code that was used for the analyses and does not contain the original files.

# __Preprocessing__

First of all, we will load all dependencies and specify all functions that will be used throughout this notebook. They could be organized in a custom written package or at least grouped in a single cell, but to keep it easier to continue working on this code, let´s keep them in place and clearly separated until we are happy with all the output we are getting

In [None]:
# import dependencies if not done already
%matplotlib inline

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import math

### Specify all functions

#### `get_time()`

In [None]:
def get_time(df):
    """calculate time for each frame in the dataframe
    input: d_extracted_files[mouse_ID]['exponential']['processed_DataFrame']
    output: d_extracted_files[mouse_ID]['exponential']['processed_DataFrame']['time']"""
    
    # Creating column 'time' in 'processed_DataFrame', setting base value to np.NaN
    df['time'] = np.NaN
    # Calculating time and adding it as a column to the df
    df['time'] = df['EarRight'].index/framerate
    
    return df
    print('get_time completed')
    # in future version: check for NaN

#### `identify_duplicates()`

In [None]:
def identify_duplicates(df):   
    # Check whether there are duplicates in the dataframe:
    l_indices = list(df.index)
    l_unique_indices = list(set(l_indices))
    
    # Enter duplicate-removal-code only if there are duplicates:
    if len(l_indices) != len(l_unique_indices):
        # Get all duplicated indices
        l_duplicates = []
        for index in l_unique_indices:
            if l_indices.count(index) > 1:
                l_duplicates.append(index)
        # Drop all rows with duplicated indices
        df.loc[l_duplicates, ('all', 'exclude')] = True

    return df

#### `get_center_of_gravity()`

In [None]:
def get_center_of_gravity(df):
    """Calculates Centroid and adds its coordinates and exclude information as new bodypart to the df"""
    
    # Calculates x-coordinate for CenterOfGravity
    df.loc[(df[('all', 'exclude')] == False) & (df[('EarRight', 'exclude')] == False) & (df[('EarLeft', 'exclude')] == False) & (df[('TailBase', 'exclude')] == False), 
       ('CenterOfGravity', 'x')] = (df.loc[df[('all', 'exclude')] == False, ('EarRight', 'x')] + 
                                      df.loc[df[('all', 'exclude')] == False, ('EarLeft', 'x')] + 
                                      df.loc[df[('all', 'exclude')] == False, ('TailBase', 'x')]) / 3
    
    # Calculates y-coordinate for CenterOfGravity
    df.loc[(df[('all', 'exclude')] == False) & (df[('EarRight', 'exclude')] == False) & (df[('EarLeft', 'exclude')] == False) & (df[('TailBase', 'exclude')] == False), 
       ('CenterOfGravity', 'y')] = (df.loc[df[('all', 'exclude')] == False, ('EarRight', 'y')] + 
                                      df.loc[df[('all', 'exclude')] == False, ('EarLeft', 'y')] + 
                                      df.loc[df[('all', 'exclude')] == False, ('TailBase', 'y')]) / 3

    # Determines whether row in CenterOfGravity coordinates needs to be excluded 
    df[('CenterOfGravity', 'exclude')] = False
    df.loc[(df[('CenterOfGravity', 'x')].isnull()) | (df[('CenterOfGravity', 'y')].isnull()), ('CenterOfGravity', 'exclude')] = True
    
    # Adds CenterOfGravity to l_bodyparts
    l_bodyparts.append('CenterOfGravity')
    return df

#### `exclude_frames()`

In [None]:
def exclude_frames(df):
    """excludes frames where the likelihood of correct tracking is lower than the threshold"""
    for bodypart in l_bodyparts:
        # create new column
        df.loc[:, (bodypart, 'exclude')] = False
        # mark all rows that don´t meet the criterion as to be excluded:
        df.loc[df[bodypart]['likelihood'] < DLC_likelihood_threshold, (bodypart, 'exclude')] = True
        # In addition, ensure that all duplicated frames are excluded, too:
        df.loc[df[('all', 'exclude')] == True, (bodypart, 'exclude')] = True
    return df

#### `normalize_coordinates()`

Including `rotate()` and `translate()`

In [None]:
def rotate(xy, theta):
    # https://en.wikipedia.org/wiki/Rotation_matrix#In_two_dimensions
    cos_theta, sin_theta = math.cos(theta), math.sin(theta)

    return (
        xy[0] * cos_theta - xy[1] * sin_theta,
        xy[0] * sin_theta + xy[1] * cos_theta
    )

# Correct for the shift of the rectangle from (0, 0):
def translate(xy, offset):
    return xy[0] + offset[0], xy[1] + offset[1]


def normalize_coordinates(df):
    # Extract all reference information:
    length = d_reference_coordinates[filename_avi]['length']
    width = d_reference_coordinates[filename_avi]['width']
    offset_to_standard = (-d_reference_coordinates[filename_avi]['offset_x'], -d_reference_coordinates[filename_avi]['offset_y'])
    offset_from_standard = (d_reference_coordinates[filename_avi]['offset_x'], d_reference_coordinates[filename_avi]['offset_y'])
    theta_to_standard = -d_reference_coordinates[filename_avi]['theta']

    
    maze_length_in_cm = 50
    length_in_px = length
    cm_per_px = maze_length_in_cm/length_in_px
    
    for bodypart in l_bodyparts + ['CenterOfGravity']:
        # Translate all coordinates into the standard space, with: x = 0px & y = 0px being the right end corner of the maze
        df[(bodypart, 'x_norm')] = translate(rotate((df[(bodypart,'x')], df[(bodypart,'y')]), theta_to_standard), offset_to_standard)[0]
        df[(bodypart, 'y_norm')] = translate(rotate((df[(bodypart,'x')], df[(bodypart,'y')]), theta_to_standard), offset_to_standard)[1]
        # In the standard space, convert all coordinates into cm and correct for the inversion - now: x = 0cm & y = 0cm is the left start corner of the maze
        df[(bodypart, 'x_norm_cm')] = 3 - (df[(bodypart, 'x_norm')] * cm_per_px)
        df[(bodypart, 'y_norm_cm')] = 50 - (df[(bodypart, 'y_norm')] * cm_per_px)

    return df

#### `get_speed_and_rolling_speed()`

In [None]:
def get_speed_and_rolling_speed(df):
    for bodypart in l_bodyparts:
        # Create columns
        df[(bodypart, 'speed_px_per_s')] = np.NaN
        df[(bodypart, 'rolling_speed_px_per_s')] = np.NaN

        # Calculate speed
        # Calculates speed from time passed & distance moved in reference to previous frame (ignoring all frames that are marked as to be excluded)
        # Limiation: since we have to exclude some frames, these calculations are not made frame by frame (yet for most)
        df.loc[(df[('all', 'exclude')] == False) & (df[(bodypart, 'exclude')] == False), (bodypart, 'speed_px_per_s')] = (((df.loc[(df[('all', 'exclude')] == False) & (df[(bodypart, 'exclude')] == False), (bodypart, 'x')].diff()**2 
                                                                                                                           + df.loc[(df[('all', 'exclude')] == False) & (df[(bodypart, 'exclude')] == False), (bodypart, 'y')].diff()**2)**(1/2)) 
                                                                                                                         / df.loc[(df[('all', 'exclude')] == False) & (df[(bodypart, 'exclude')] == False), 'time'].diff())

        df.loc[(df[('all', 'exclude')] == False) & (df[(bodypart, 'exclude')] == False), (bodypart, 'rolling_speed_px_per_s')] = df.loc[df[('all', 'exclude')] == False, 
                                                                                                                                       (bodypart, 'speed_px_per_s')].rolling(5, min_periods=3, center=True).mean()

    return df

#### `get_direction()`

In [None]:
def get_direction(df):
    df[('moving_towards_maze_end', '')] = False
    df.loc[(df[('EarRight', 'y_norm')] < df[('CenterOfGravity', 'y_norm')]) 
           & (df[('EarLeft', 'y_norm')] < df[('CenterOfGravity', 'y_norm')]), ('moving_towards_maze_end', '')] = True
    return df

#### `get_immobility()`

In [None]:
def get_immobility(df):
    """determine for individual bodyparts and whole head if they are immobile, add the information into the 'immobility' coulmn as boolean value
    input: d_extracted_files[mouse_ID]['exponential']['processed_DataFrame']
    output: d_extracted_files[mouse_ID]['exponential']['processed_DataFrame'][bodypart]['immobility']"""
            
    for bodypart in l_bodyparts:
        # create 'immobility' column and set base value to false
        df.loc[ :, (bodypart, 'immobility')] = False
        df.loc[df[(bodypart,'rolling_speed_px_per_s')] < immobility_threshold, (bodypart, 'immobility')] = True
    return df
    print('get_immobility is completed')

#### `get_gait_disruption_bouts()`

In [None]:
def get_gait_disruption_bouts(df):
    df[('GaitDisruption_bout', '')] = False
    df[('GaitDisruption_bout', 'count')] = np.NaN
    df[('GaitDisruption_bout', 'duration')] = np.NaN
    df[('GaitDisruption_bout', 'mean_x_norm_cm')] = np.NaN
    df[('GaitDisruption_bout', 'mean_y_norm_cm')] = np.NaN
    df[('GaitDisruption_bout', 'direction_bool')] = ''
    df[('GaitDisruption_bout', 'direction_mean')] = np.NaN
    
    
    l_timesteps = []
    for i in range(framerate):
        l_timesteps.append(i/framerate)

    time_gait_disruption = find_nearest(np.asarray(l_timesteps), TARGET_TIME_GAIT_DISRUPTION)
    frames_difference = l_timesteps.index(time_gait_disruption)

    gait_disruption_threshold_reached = df.loc[df['all_freezing_bodyparts_immobile'] == True, 'time'].iloc[np.where(np.round(df.loc[df['all_freezing_bodyparts_immobile'] == True, 'time'].diff(frames_difference).values, 7) == round(time_gait_disruption, 7))[0]].values

    # could still throw an error if only a single frame, because of slicing to find first and last index of each interval?    
    if gait_disruption_threshold_reached.shape[0] > 0:
        indices = np.asarray(df.loc[df['time'].isin(gait_disruption_threshold_reached)].index)
        lower_end = (indices+1)[:-1]
        upper_end = indices[1:]
        mask = lower_end < upper_end
        mask_last = np.concatenate([mask, np.array([True])])
        mask_first = np.concatenate([np.array([True]), mask])

        last_value_of_intervals = indices[mask_last]
        first_value_of_intervals = indices[mask_first]
        # Since this is only the first value that matches the criterion, we have to substract the corresponding frames 
        # where the criterion was already fulfilled, to get the actual first index of the interval:
        first_value_of_intervals = first_value_of_intervals-l_timesteps.index(find_nearest(np.asarray(l_timesteps), TARGET_TIME_GAIT_DISRUPTION))

        interval_ranges = np.column_stack([first_value_of_intervals,last_value_of_intervals])

        frames_prior_to_interval_start = l_timesteps.index(find_nearest(np.asarray(l_timesteps), TIME_OF_GAIT_BEFORE_DISRUPT))
        bout_count = 0

        if interval_ranges.shape[0] > 0:
            for first_idx, last_idx in interval_ranges:
                start_idx_gait_check = first_idx - frames_prior_to_interval_start
                if df.loc[start_idx_gait_check:first_idx-1, 'all_freezing_bodyparts_immobile'].any() == False:
                    bout_count = bout_count + 1
                    bout_duration = (df.loc[last_idx, 'time'] - df.loc[first_idx, 'time'])[0]
                    mean_pos_x_norm_cm = df.loc[first_idx:last_idx, ('CenterOfGravity', 'x_norm_cm')].mean()
                    mean_pos_y_norm_cm = df.loc[first_idx:last_idx, ('CenterOfGravity', 'y_norm_cm')].mean()
                    direction_bool = df.loc[first_idx:last_idx, ('moving_towards_maze_end', '')].all()
                    direction_mean = df.loc[first_idx:last_idx, ('moving_towards_maze_end', '')].mean()

                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', '')] = True
                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', 'count')] = bout_count
                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', 'duration')] = bout_duration
                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', 'mean_x_norm_cm')] = mean_pos_x_norm_cm
                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', 'mean_y_norm_cm')] = mean_pos_y_norm_cm
                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', 'direction_bool')] = direction_bool
                    df.loc[first_idx:last_idx, ('GaitDisruption_bout', 'direction_mean')] = direction_mean

    return df



#### `get_freezing_bouts()`

Including `find_nearest()`

In [None]:
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

def get_freezing_bouts(df):
    df[('Freezing_bout', '')] = False
    df[('Freezing_bout', 'count')] = np.NaN
    df[('Freezing_bout', 'duration')] = np.NaN
    df[('Freezing_bout', 'mean_x_norm_cm')] = np.NaN
    df[('Freezing_bout', 'mean_y_norm_cm')] = np.NaN

    df['all_freezing_bodyparts_immobile'] = df[[('EarRight', 'immobility'), ('EarLeft', 'immobility'), ('CenterOfGravity', 'immobility')]].all(axis=1)

    times_where_freezing_threshold_was_reached = df.loc[df['all_freezing_bodyparts_immobile'] == True, 'time'].iloc[np.where(df.loc[df['all_freezing_bodyparts_immobile'] == True, 'time'].diff(framerate).values == 1)[0]].values

    # could still throw an error if only a single frame, because of slicing to find first and last index of each interval?
    if times_where_freezing_threshold_was_reached.shape[0] > 0:
        indices = np.asarray(df.loc[df['time'].isin(times_where_freezing_threshold_was_reached)].index)
        lower_end = (indices+1)[:-1]
        upper_end = indices[1:]
        mask = lower_end < upper_end
        mask_last = np.concatenate([mask, np.array([True])])
        mask_first = np.concatenate([np.array([True]), mask])

        last_value_of_intervals = indices[mask_last]
        first_value_of_intervals = indices[mask_first]
        # Since this is only the first value that matches the criterion, we have to substract the corresponding frames 
        # where the criterion was already fulfilled, to get the actual first index of the interval:
        first_value_of_intervals = first_value_of_intervals-framerate

        interval_ranges = np.column_stack([first_value_of_intervals,last_value_of_intervals])

        bout_count = 0

        if interval_ranges.shape[0] > 0:
            for first_idx, last_idx in interval_ranges:
                    bout_count = bout_count + 1
                    bout_duration = (df.loc[last_idx, 'time'] - df.loc[first_idx, 'time'])[0]
                    mean_pos_x_norm_cm = df.loc[first_idx:last_idx, ('CenterOfGravity', 'x_norm_cm')].mean()
                    mean_pos_y_norm_cm = df.loc[first_idx:last_idx, ('CenterOfGravity', 'y_norm_cm')].mean()

                    df.loc[first_idx:last_idx, ('Freezing_bout', '')] = True
                    df.loc[first_idx:last_idx, ('Freezing_bout', 'count')] = bout_count
                    df.loc[first_idx:last_idx, ('Freezing_bout', 'duration')] = bout_duration
                    df.loc[first_idx:last_idx, ('Freezing_bout', 'mean_x_norm_cm')] = mean_pos_x_norm_cm
                    df.loc[first_idx:last_idx, ('Freezing_bout', 'mean_y_norm_cm')] = mean_pos_y_norm_cm

    return df


#### `get_session_averages()`

In [None]:
def get_session_averages(df):
    freezing_bout_count = df[('Freezing_bout', 'count')].unique().shape[0] - 1
    
    if freezing_bout_count > 0:
        df[('whole_session', 'percentage_time_spent_freezing')] = (df.loc[df[('Freezing_bout', '')] == True].shape[0] / df.shape[0]) * 100
        df[('whole_session', 'median_freezing_bout_duration')] = np.nanmedian(df[('Freezing_bout', 'duration')].unique())
        df[('whole_session', 'median_x_norm_cm_all_freezing_bouts')] = np.nanmedian(df[('Freezing_bout', 'mean_x_norm_cm')].unique())
        df[('whole_session', 'median_y_norm_cm_all_freezing_bouts')] = np.nanmedian(df[('Freezing_bout', 'mean_y_norm_cm')].unique())
    else:
        df[('whole_session', 'percentage_time_spent_freezing')] = 0
        df[('whole_session', 'median_freezing_bout_duration')] = np.NaN
        df[('whole_session', 'median_x_norm_cm_all_freezing_bouts')] = np.NaN
        df[('whole_session', 'median_y_norm_cm_all_freezing_bouts')] = np.NaN       
    
    gait_disruption_bout_count = df[('GaitDisruption_bout', 'count')].unique().shape[0] - 1
    
    if gait_disruption_bout_count > 0:
        df[('whole_session', 'percentage_time_spent_gait_disrupted')] = (df.loc[df[('GaitDisruption_bout', '')] == True].shape[0] / df.shape[0]) * 100
        df[('whole_session', 'median_gait_disruption_bout_duration')] = np.nanmedian(df[('GaitDisruption_bout', 'duration')].unique())
        df[('whole_session', 'median_x_norm_cm_all_gait_disruption_bouts')] = np.nanmedian(df[('GaitDisruption_bout', 'mean_x_norm_cm')].unique())
        df[('whole_session', 'median_y_norm_cm_all_gait_disruption_bouts')] = np.nanmedian(df[('GaitDisruption_bout', 'mean_y_norm_cm')].unique())
    else:
        df[('whole_session', 'percentage_time_spent_gait_disrupted')] = 0
        df[('whole_session', 'median_gait_disruption_bout_duration')] = np.NaN
        df[('whole_session', 'median_x_norm_cm_all_gait_disruption_bouts')] = np.NaN
        df[('whole_session', 'median_y_norm_cm_all_gait_disruption_bouts')] = np.NaN
    
    return df

## 1) Bl6 cohort

Computations are performed for the Bl6 cohort:

In [None]:
# Specify the directory that contains all files:
directory = os.getcwd() + '/bl6/'

In [None]:
# Get all .csv files to begin with:
l_filenames = [elem for elem in os.listdir(directory) if elem.endswith('.csv')]

### Quickly confirm, whether the files in l_filenames are the correct ones:

In [None]:
l_filenames

In [None]:
# der Wert ist noch arbiträr, ich prüf den aber nachdem der Code grundliegend steht.
immobility_threshold = 16

min_freezing_duration = 1

TIME_OF_GAIT_BEFORE_DISRUPT = 0.5
TARGET_TIME_GAIT_DISRUPTION = 0.2

# GroupID
groupID = 'bl6'

#likelihood threshold
DLC_likelihood_threshold = 0.9

# Load annotation data
# Make sure that the file is located in the current working directory
with open('reference_coordinates.p', 'rb') as fp:
    d_reference_coordinates = pickle.load(fp)

# Might have to be removed. Was required to compensate for some naming errors
l_files_to_rename = [('OpenTrack-210506_279_R1_1_Vid1.avi', 'OpenTrack-210506_279_R1_Vid1.avi'),
                     ('OpenTrack-210510_282_C1_2_Vid1.avi', 'OpenTrack-210510_282_C1_Vid1.avi'),
                     ('OpenTrack-210510_280_C2_2_Vid1.avi', 'OpenTrack-210510_280_C2_Vid1.avi')]

for old_key, new_key in l_files_to_rename:
    d_reference_coordinates[new_key] = d_reference_coordinates[old_key]
    del d_reference_coordinates[old_key]
    
l_keys_we_need = ['{}.avi'.format(elem[:elem.index('Vid1') +4]) for elem in os.listdir(os.getcwd() + '/bl6/') if elem.endswith('.csv')]

print('Are there reference coordinates for all .csv files?\n\n' + str(all([True for elem in l_keys_we_need if elem in list(d_reference_coordinates.keys())])))

In [None]:
# Specify all columns that shall be included into the master_df
col_in_master_df = [('mouse_ID', ('mouseID', '')),
                    ('group_ID', ('groupID', '')),
                    ('mazetype', ('mazetype', '')),
                    ('trialnumber', ('trialnumber', '')),
                    ('time', ('time', '')),
                    ('exclude', ('all', 'exclude')),
                    ('freezing', ('Freezing_bout', '')),
                    ('gaitdisruption', ('GaitDisruption_bout', '')),
                    ('freezing_bout_count', ('Freezing_bout', 'count')),
                    ('freezing_bout_duration', ('Freezing_bout', 'duration')),
                    ('freezing_bout_mean_x_norm_cm', ('Freezing_bout', 'mean_x_norm_cm')),
                    ('freezing_bout_mean_y_norm_cm', ('Freezing_bout', 'mean_y_norm_cm')),
                    ('gaitdisruption_bout_count', ('GaitDisruption_bout', 'count')),
                    ('gaitdisruption_bout_duration', ('GaitDisruption_bout', 'duration')),
                    ('gaitdisruption_bout_mean_x_norm_cm', ('GaitDisruption_bout', 'mean_x_norm_cm')),
                    ('gaitdisruption_bout_mean_y_norm_cm', ('GaitDisruption_bout', 'mean_y_norm_cm')),
                    ('gaitdisruption_bout_direction_bool', ('GaitDisruption_bout', 'direction_bool')),
                    ('gaitdisruption_bout_direction_mean', ('GaitDisruption_bout', 'direction_mean')),
                    ('CenterOfGravity_x_norm_cm', ('CenterOfGravity', 'x_norm_cm')),
                    ('CenterOfGravity_y_norm_cm', ('CenterOfGravity', 'y_norm_cm')),                    
                    ('CenterOfGravity_rolling_speed_px_per_s', ('CenterOfGravity', 'rolling_speed_px_per_s')),
                    ('Percentage_time_spent_freezing_session', ('whole_session', 'percentage_time_spent_freezing')),
                    ('Median_freezing_bout_duration_session', ('whole_session', 'median_freezing_bout_duration')),
                    ('Median_x_norm_cm_all_freezing_bouts_session', ('whole_session', 'median_x_norm_cm_all_freezing_bouts')),
                    ('Median_y_norm_cm_all_freezing_bouts_session', ('whole_session', 'median_y_norm_cm_all_freezing_bouts')),
                    ('Percentage_time_spent_gaitdisrupted_session', ('whole_session', 'percentage_time_spent_gait_disrupted')),
                    ('Median_gaitdisruption_bout_duration_session', ('whole_session', 'median_gait_disruption_bout_duration')),
                    ('Median_x_norm_cm_all_gaitdisruption_bouts_session', ('whole_session', 'median_x_norm_cm_all_gait_disruption_bouts')),
                    ('Median_y_norm_cm_all_gaitdisruption_bouts_session', ('whole_session', 'median_y_norm_cm_all_gait_disruption_bouts'))    
                   ]

### Loop over all files:

In [None]:
l_dfs_for_masterdf = []

l_freezing_bout_dfs = []
l_gait_disruption_bout_dfs = []
l_location_dfs = []

for filename in l_filenames:

    # Get the date of the recording
    date = filename[filename.index('-')+1:filename.index('_')]

    # Get the mouse_ID
    cropped_filename = filename[filename.index('_')+1:]
    mouse_ID = cropped_filename[:cropped_filename.index('_')]

    # Get the mazetype and trial count
    cropped_filename = cropped_filename[cropped_filename.index('_')+1:]
    session = cropped_filename[:cropped_filename.index('_')]
    trial = session[-1:]

    if 'e' in session or 'E'  in session:  
        mazetype = 'exponential'
    elif 't'  in session or 'T' in session:
        mazetype = 'triangle'
    elif 'r'  in session or 'R' in session:
        mazetype = 'rectangle'
    elif 'c'  in session or 'C' in session:
        mazetype = 'shortened_rectangle'
    
    # Get the name of the corresponding .avi file:
    filename_avi = 'OpenTrack-{}_{}_{}_Vid1.avi'.format(date, mouse_ID, session)
    
    if '{}_{}'.format(mouse_ID, session) in ['282_R3', '285_E3']:
        framerate = 57
    elif '{}_{}'.format(mouse_ID, session) == '273_R3':
        framerate = 54
    else:
        framerate = 42

    # Create a new key for each mouse (using mouse_id as key) and create another empty dict (first level) - only if it does not exist already
    d_session_data = {'filename_DLC_csv': filename,
                      'filename_avi': filename_avi,
                     'mouse_ID': mouse_ID,
                     'group_ID': groupID,
                     'mazetype': mazetype,
                     'trialnumber': trial,
                     'date': date,
                     'processed_DataFrame': pd.read_csv(directory + filename, skiprows=1, index_col=0, header=[0, 1])}

    #create a list with all bodyparts
    l_bodyparts = [elem[0] for elem in d_session_data['processed_DataFrame'].columns[::3]]
    
    # Add Meta data columns
    d_session_data['processed_DataFrame']['mouseID'] = mouse_ID
    d_session_data['processed_DataFrame']['groupID'] = groupID
    d_session_data['processed_DataFrame']['trialnumber'] = trial
    d_session_data['processed_DataFrame']['DateOfRecording'] = date
    d_session_data['processed_DataFrame']['mazetype'] = mazetype
    
    # Set exclusion by default to False for every frame:
    d_session_data['processed_DataFrame'][('all', 'exclude')] = False

    # Processing of the data
    # Get a timestamp for each frame
    d_session_data['processed_DataFrame'] = get_time(d_session_data['processed_DataFrame'])
    # Identify potential duplicates in the index col (error DLC?) and mark these frames as to be excluded
    d_session_data['processed_DataFrame'] = identify_duplicates(d_session_data['processed_DataFrame'])
    # Mark for each bodypart individually those frames, in which this bodypart has to be excluded, based on the DLC likelihood
    d_session_data['processed_DataFrame'] = exclude_frames(d_session_data['processed_DataFrame']) 
    # Calculate the coordinates of the CenterOfGravity
    d_session_data['processed_DataFrame'] = get_center_of_gravity(d_session_data['processed_DataFrame'])
    
    # Translate all cordinates into the reference space:
    if filename_avi in list(d_reference_coordinates.keys()):
        d_session_data['processed_DataFrame'] = normalize_coordinates(d_session_data['processed_DataFrame'])
    else:
        print('No reference coordinates for: {}'.format(filename_avi))
        
    # Calculate speed and rolling speed
    d_session_data['processed_DataFrame'] = get_speed_and_rolling_speed(d_session_data['processed_DataFrame'])
    # Identify in which frame the individual bodyparts are immobile (moving with less speed than the immobility_threshold)
    d_session_data['processed_DataFrame'] = get_immobility(d_session_data['processed_DataFrame'])
    # Identify freezing bouts
    d_session_data['processed_DataFrame'] = get_freezing_bouts(d_session_data['processed_DataFrame'])
    # Get the direction in which the mouse is moving:
    d_session_data['processed_DataFrame'] = get_direction(d_session_data['processed_DataFrame'])
    # Identify gait disruption bouts:
    d_session_data['processed_DataFrame'] = get_gait_disruption_bouts(d_session_data['processed_DataFrame'])   
    # Calculate some session averages:
    d_session_data['processed_DataFrame'] = get_session_averages(d_session_data['processed_DataFrame'])
        
    # Now that all processing is done, create a DF with all relevant columns and append it to the list of dfs for the master_df
    d_for_master_df = {}

    for key, col_in_processed_df in col_in_master_df:
        d_for_master_df[key] = d_session_data['processed_DataFrame'][col_in_processed_df].values

    master_df = pd.DataFrame(data=d_for_master_df)
    
    l_rows_freezing = []
    # len of uniques is only 1 if there is no freezing (all are np.nan)
    if len(master_df['freezing_bout_count'].unique()) == 1:
        l_rows_freezing.append(master_df.iloc[0].to_frame().T)
    else:
        for freezing_bout_idx in master_df['freezing_bout_count'].unique():
            # To exclude the nan in the list of unique values
            if freezing_bout_idx >= 0:
                l_rows_freezing.append(master_df.loc[master_df['freezing_bout_count'] == freezing_bout_idx].iloc[0].to_frame().T)


    df_freezing_bouts = pd.concat(l_rows_freezing, axis=0)


    l_rows_gait_disruption = []
    # len of uniques is only 1 if there is no freezing (all are np.nan)
    if len(master_df['gaitdisruption_bout_count'].unique()) == 1:
        l_rows_gait_disruption.append(master_df.iloc[0].to_frame().T)    
    else:    
        for gait_disruption_bout_idx in master_df['gaitdisruption_bout_count'].unique():
            # To exclude nan
            if gait_disruption_bout_idx >= 0:
                l_rows_gait_disruption.append(master_df.loc[master_df['gaitdisruption_bout_count'] == gait_disruption_bout_idx].iloc[0].to_frame().T)

    df_gait_disruption_bouts = pd.concat(l_rows_gait_disruption, axis=0)


    l_frame_count = []
    l_avg_speed = []

    for lower_border in np.linspace(0,49.5,100):
        l_frame_count.append(master_df.loc[(master_df['CenterOfGravity_y_norm_cm'] >= lower_border) & 
                                           (master_df['CenterOfGravity_y_norm_cm'] < lower_border + 0.5)].shape[0])

        l_avg_speed.append(master_df.loc[(master_df['CenterOfGravity_y_norm_cm'] >= lower_border) & 
                                         (master_df['CenterOfGravity_y_norm_cm'] < lower_border + 0.5), 'CenterOfGravity_rolling_speed_px_per_s'].mean())

    d_location_df = {'mouse_ID': [mouse_ID]*100,
                    'group_ID': [groupID]*100,
                    'mazetype': [mazetype]*100,
                    'trialnumber': [trial]*100,
                    'total_frames': [master_df.shape[0]]*100,
                    'distance_from_start_cm': np.linspace(0,49.5,100),
                    'frame_count': l_frame_count,
                    'average_speed': l_avg_speed}

    df_location = pd.DataFrame(d_location_df)
    
    l_freezing_bout_dfs.append(df_freezing_bouts)
    l_gait_disruption_bout_dfs.append(df_gait_disruption_bouts)
    l_location_dfs.append(df_location)
    
    # Save all results
    file_path_dicts = os.getcwd() + '/processed_dicts/'
    with open('{}{}_{}_{}_{}_processed_dict.p'.format(file_path_dicts, groupID, mouse_ID, mazetype, trial), 'wb') as io:
        pickle.dump(d_session_data, io, protocol=pickle.HIGHEST_PROTOCOL)
    
    file_path_dfs = os.getcwd() + '/master_dfs/'
    master_df.to_csv('{}{}_{}_{}_{}_master_df.csv'.format(file_path_dfs, groupID, mouse_ID, mazetype, trial))
    
    print('Done with {}'.format(filename))


    
df_all_freezing_bouts = pd.concat(l_freezing_bout_dfs)
df_all_freezing_bouts.reset_index(inplace=True, drop=True)
df_all_freezing_bouts.to_csv('master_freezing_bouts_df_{}.csv'.format(groupID))

df_all_gait_disruption_bouts = pd.concat(l_gait_disruption_bout_dfs)
df_all_gait_disruption_bouts.reset_index(inplace=True, drop=True)
df_all_gait_disruption_bouts.to_csv('master_gait_disruption_bouts_df_{}.csv'.format(groupID))

    
df_all_locations = pd.concat(l_location_dfs)
df_all_locations.reset_index(inplace=True, drop=True)
df_all_locations.to_csv('master_location_df_{}.csv'.format(groupID))

## 2) Parkinsonian cohort

In [None]:
# Specify the directory that contains all files:
directory = os.getcwd() + '/chat/'

In [None]:
# Get all .csv files to begin with:
l_filenames = [elem for elem in os.listdir(directory) if elem.endswith('.csv')]

Quickly confirm, whether the files in l_filenames are the correct ones:

In [None]:
l_filenames

In [None]:
# In order to make the group assignment possible
l_empty_vector = ['Chat-ires-cre-F5-9', 'Chat-ires-cre-F5-11', 'Chat-ires-cre-F5-17', 'chat-ires-cre-F5-17']
l_a53 = ['Chat-ires-cre-F5-10', 'Chat-ires-cre-F5-12', 'Chat-ires-cre-F5-18']

In [None]:
# der Wert ist noch arbiträr, ich prüf den aber nachdem der Code grundliegend steht.
immobility_threshold = 16

min_freezing_duration = 1
TIME_OF_GAIT_BEFORE_DISRUPT = 0.5
TARGET_TIME_GAIT_DISRUPTION = 0.2

#likelihood threshold
DLC_likelihood_threshold = 0.9

# Load annotation data
# Make sure that the file is located in the current working directory
with open('reference_coordinate_sawako.p', 'rb') as fp:
    d_reference_coordinates = pickle.load(fp)
    
# Might have to be removed. Was required to compensate for some naming errors    
l_files_to_rename = [('OpenTrack-210511_Chat-ires-cre-F5-11_C1_Vid1DLC_resnet152_CD_ExtMay11shuffle1_800000.avi', 'OpenTrack-210511_Chat-ires-cre-F5-11_C1_Vid1.avi'),
                     ('OpenTrack-210511_Chat-ires-cre-F5-9_C1_Vid1DLC_resnet152_CD_ExtMay11shuffle1_800000.avi', 'OpenTrack-210511_Chat-ires-cre-F5-9_C1_Vid1.avi')]

for old_key, new_key in l_files_to_rename:
    d_reference_coordinates[new_key] = d_reference_coordinates[old_key]
    del d_reference_coordinates[old_key]
    
l_keys_we_need = ['{}.avi'.format(elem[:elem.index('Vid1') +4]) for elem in os.listdir(os.getcwd() + '/chat/') if elem.endswith('.csv')]

print('Are there reference coordinates for all .csv files?\n\n' + str(all([True for elem in l_keys_we_need if elem in list(d_reference_coordinates.keys())])))

In [None]:
# Specify all columns that shall be included into the master_df
col_in_master_df = [('mouse_ID', ('mouseID', '')),
                    ('group_ID', ('groupID', '')),
                    ('mazetype', ('mazetype', '')),
                    ('trialnumber', ('trialnumber', '')),
                    ('time', ('time', '')),
                    ('exclude', ('all', 'exclude')),
                    ('freezing', ('Freezing_bout', '')),
                    ('gaitdisruption', ('GaitDisruption_bout', '')),
                    ('freezing_bout_count', ('Freezing_bout', 'count')),
                    ('freezing_bout_duration', ('Freezing_bout', 'duration')),
                    ('freezing_bout_mean_x_norm_cm', ('Freezing_bout', 'mean_x_norm_cm')),
                    ('freezing_bout_mean_y_norm_cm', ('Freezing_bout', 'mean_y_norm_cm')),
                    ('gaitdisruption_bout_count', ('GaitDisruption_bout', 'count')),
                    ('gaitdisruption_bout_duration', ('GaitDisruption_bout', 'duration')),
                    ('gaitdisruption_bout_mean_x_norm_cm', ('GaitDisruption_bout', 'mean_x_norm_cm')),
                    ('gaitdisruption_bout_mean_y_norm_cm', ('GaitDisruption_bout', 'mean_y_norm_cm')),
                    ('gaitdisruption_bout_direction_bool', ('GaitDisruption_bout', 'direction_bool')),
                    ('gaitdisruption_bout_direction_mean', ('GaitDisruption_bout', 'direction_mean')),
                    ('CenterOfGravity_x_norm_cm', ('CenterOfGravity', 'x_norm_cm')),
                    ('CenterOfGravity_y_norm_cm', ('CenterOfGravity', 'y_norm_cm')),                    
                    ('CenterOfGravity_rolling_speed_px_per_s', ('CenterOfGravity', 'rolling_speed_px_per_s')),
                    ('Percentage_time_spent_freezing_session', ('whole_session', 'percentage_time_spent_freezing')),
                    ('Median_freezing_bout_duration_session', ('whole_session', 'median_freezing_bout_duration')),
                    ('Median_x_norm_cm_all_freezing_bouts_session', ('whole_session', 'median_x_norm_cm_all_freezing_bouts')),
                    ('Median_y_norm_cm_all_freezing_bouts_session', ('whole_session', 'median_y_norm_cm_all_freezing_bouts')),
                    ('Percentage_time_spent_gaitdisrupted_session', ('whole_session', 'percentage_time_spent_gait_disrupted')),
                    ('Median_gaitdisruption_bout_duration_session', ('whole_session', 'median_gait_disruption_bout_duration')),
                    ('Median_x_norm_cm_all_gaitdisruption_bouts_session', ('whole_session', 'median_x_norm_cm_all_gait_disruption_bouts')),
                    ('Median_y_norm_cm_all_gaitdisruption_bouts_session', ('whole_session', 'median_y_norm_cm_all_gait_disruption_bouts'))    
                   ]

### Loop over all files:

In [None]:
l_dfs_for_masterdf = []

l_freezing_bout_dfs = []
l_gait_disruption_bout_dfs = []
l_location_dfs = []

for filename in l_filenames:

    # Get the date of the recording
    date = filename[filename.index('-')+1:filename.index('_')]

    # Get the mouse_ID
    cropped_filename = filename[filename.index('_')+1:]
    mouse_ID = cropped_filename[:cropped_filename.index('_')]
    
    if mouse_ID in l_empty_vector:
        groupID = 'empty_vector'
    elif mouse_ID in l_a53:
        groupID = 'a53'
    else:
        groupID = 'parkinsonian'
    
    # Get the mazetype and trial count
    cropped_filename = cropped_filename[cropped_filename.index('_')+1:]
    session = cropped_filename[:cropped_filename.index('_')]
    trial = session[-1:]

    if 'e' in session or 'E'  in session:  
        mazetype = 'exponential'
    elif 't'  in session or 'T' in session:
        mazetype = 'triangle'
    elif 'r'  in session or 'R' in session:
        mazetype = 'rectangle'
    elif 'c'  in session or 'C' in session:
        mazetype = 'shortened_rectangle'
    
    # Get the name of the corresponding .avi file:
    filename_avi = 'OpenTrack-{}_{}_{}_Vid1.avi'.format(date, mouse_ID, session)
    
    
    framerate = 42

    # Create a new key for each mouse (using mouse_id as key) and create another empty dict (first level) - only if it does not exist already
    d_session_data = {'filename_DLC_csv': filename,
                      'filename_avi': filename_avi,
                     'mouse_ID': mouse_ID,
                     'group_ID': groupID,
                     'mazetype': mazetype,
                     'trialnumber': trial,
                     'date': date,
                     'processed_DataFrame': pd.read_csv(directory + filename, skiprows=1, index_col=0, header=[0, 1])}

    #create a list with all bodyparts
    l_bodyparts = [elem[0] for elem in d_session_data['processed_DataFrame'].columns[::3]]
    
    # Add Meta data columns
    d_session_data['processed_DataFrame']['mouseID'] = mouse_ID
    d_session_data['processed_DataFrame']['groupID'] = groupID
    d_session_data['processed_DataFrame']['trialnumber'] = trial
    d_session_data['processed_DataFrame']['DateOfRecording'] = date
    d_session_data['processed_DataFrame']['mazetype'] = mazetype
    
    # Set exclusion by default to False for every frame:
    d_session_data['processed_DataFrame'][('all', 'exclude')] = False
    
    # Processing of the data
    # Get a timestamp for each frame
    d_session_data['processed_DataFrame'] = get_time(d_session_data['processed_DataFrame'])
    # Identify potential duplicates in the index col (error DLC?) and mark these frames as to be excluded
    d_session_data['processed_DataFrame'] = identify_duplicates(d_session_data['processed_DataFrame'])
    # Mark for each bodypart individually those frames, in which this bodypart has to be excluded, based on the DLC likelihood
    d_session_data['processed_DataFrame'] = exclude_frames(d_session_data['processed_DataFrame']) 
    # Calculate the coordinates of the CenterOfGravity
    d_session_data['processed_DataFrame'] = get_center_of_gravity(d_session_data['processed_DataFrame'])
    
    # Translate all cordinates into the reference space:
    if filename_avi in list(d_reference_coordinates.keys()):
        d_session_data['processed_DataFrame'] = normalize_coordinates(d_session_data['processed_DataFrame'])
    else:
        print('No reference coordinates for: {}'.format(filename_avi))
       
    # Calculate speed and rolling speed
    d_session_data['processed_DataFrame'] = get_speed_and_rolling_speed(d_session_data['processed_DataFrame'])
    # Identify in which frame the individual bodyparts are immobile (moving with less speed than the immobility_threshold)
    d_session_data['processed_DataFrame'] = get_immobility(d_session_data['processed_DataFrame'])
    # Identify freezing bouts
    d_session_data['processed_DataFrame'] = get_freezing_bouts(d_session_data['processed_DataFrame'])
    # Get the direction in which the mouse is moving:
    d_session_data['processed_DataFrame'] = get_direction(d_session_data['processed_DataFrame'])
    # Identify gait disruption bouts:
    d_session_data['processed_DataFrame'] = get_gait_disruption_bouts(d_session_data['processed_DataFrame'])
    # Calculate some session averages:
    d_session_data['processed_DataFrame'] = get_session_averages(d_session_data['processed_DataFrame'])
        
    # Now that all processing is done, create a DF with all relevant columns and append it to the list of dfs for the master_df
    d_for_master_df = {}

    for key, col_in_processed_df in col_in_master_df:
        d_for_master_df[key] = d_session_data['processed_DataFrame'][col_in_processed_df].values

    master_df = pd.DataFrame(data=d_for_master_df)
    
    l_rows_freezing = []
    # len of uniques is only 1 if there is no freezing (all are np.nan)
    if len(master_df['freezing_bout_count'].unique()) == 1:
        l_rows_freezing.append(master_df.iloc[0].to_frame().T)
    else:
        for freezing_bout_idx in master_df['freezing_bout_count'].unique():
            # To exclude the nan in the list of unique values
            if freezing_bout_idx >= 0:
                l_rows_freezing.append(master_df.loc[master_df['freezing_bout_count'] == freezing_bout_idx].iloc[0].to_frame().T)


    df_freezing_bouts = pd.concat(l_rows_freezing, axis=0)


    l_rows_gait_disruption = []
    # len of uniques is only 1 if there is no freezing (all are np.nan)
    if len(master_df['gaitdisruption_bout_count'].unique()) == 1:
        l_rows_gait_disruption.append(master_df.iloc[0].to_frame().T)    
    else:    
        for gait_disruption_bout_idx in master_df['gaitdisruption_bout_count'].unique():
            # To exclude nan
            if gait_disruption_bout_idx >= 0:
                l_rows_gait_disruption.append(master_df.loc[master_df['gaitdisruption_bout_count'] == gait_disruption_bout_idx].iloc[0].to_frame().T)

    df_gait_disruption_bouts = pd.concat(l_rows_gait_disruption, axis=0)

    l_frame_count = []
    l_avg_speed = []

    for lower_border in np.linspace(0,49.5,100):
        l_frame_count.append(master_df.loc[(master_df['CenterOfGravity_y_norm_cm'] >= lower_border) & 
                                           (master_df['CenterOfGravity_y_norm_cm'] < lower_border + 0.5)].shape[0])

        l_avg_speed.append(master_df.loc[(master_df['CenterOfGravity_y_norm_cm'] >= lower_border) & 
                                         (master_df['CenterOfGravity_y_norm_cm'] < lower_border + 0.5), 'CenterOfGravity_rolling_speed_px_per_s'].mean())

    d_location_df = {'mouse_ID': [mouse_ID]*100,
                    'group_ID': [groupID]*100,
                    'mazetype': [mazetype]*100,
                    'trialnumber': [trial]*100,
                    'total_frames': [master_df.shape[0]]*100,
                    'distance_from_start_cm': np.linspace(0,49.5,100),
                    'frame_count': l_frame_count,
                    'average_speed': l_avg_speed}

    df_location = pd.DataFrame(d_location_df)
    
    l_freezing_bout_dfs.append(df_freezing_bouts)
    l_gait_disruption_bout_dfs.append(df_gait_disruption_bouts)
    l_location_dfs.append(df_location)
    
    # Save all results
    file_path_dicts = os.getcwd() + '/processed_dicts/'
    with open('{}{}_{}_{}_{}_processed_dict.p'.format(file_path_dicts, 'parkinsonian', mouse_ID, mazetype, trial), 'wb') as io:
        pickle.dump(d_session_data, io, protocol=pickle.HIGHEST_PROTOCOL)
    
    file_path_dfs = os.getcwd() + '/master_dfs/'
    master_df.to_csv('{}{}_{}_{}_{}_master_df.csv'.format(file_path_dfs, 'parkinsonian', mouse_ID, mazetype, trial))
    
    print('Done with {}'.format(filename))
    
df_all_freezing_bouts = pd.concat(l_freezing_bout_dfs)
df_all_freezing_bouts.reset_index(inplace=True, drop=True)
df_all_freezing_bouts.to_csv('master_freezing_bouts_df_{}.csv'.format('parkinsonian'))

df_all_gait_disruption_bouts = pd.concat(l_gait_disruption_bout_dfs)
df_all_gait_disruption_bouts.reset_index(inplace=True, drop=True)
df_all_gait_disruption_bouts.to_csv('master_gait_disruption_bouts_df_{}.csv'.format('parkinsonian'))

    
df_all_locations = pd.concat(l_location_dfs)
df_all_locations.reset_index(inplace=True, drop=True)
df_all_locations.to_csv('master_location_df_{}.csv'.format('parkinsonian'))

# __Plotting & statistics__

In [None]:
# Import all dependencies
%matplotlib inline

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import seaborn as sns

## Load all files:

In [None]:
df_all_locations_bl6 = pd.read_csv('master_location_df_bl6.csv', index_col=0)
df_all_locations = pd.read_csv('master_location_df_parkinsonian.csv', index_col=0)
df_all_locations['trialnumber'] = 1
df_all_locations_all_mice = pd.concat([df_all_locations, df_all_locations_bl6], axis=0)
df_all_locations_all_mice.head()

In [None]:
df_all_freezing_bouts_bl6 = pd.read_csv('master_freezing_bouts_df_bl6.csv', index_col=0)
df_all_freezing_bouts = pd.read_csv('master_freezing_bouts_df_parkinsonian.csv', index_col=0)
df_all_freezing_bouts['trialnumber'] = 1
df_all_freezing_bouts_all_mice = pd.concat([df_all_freezing_bouts, df_all_freezing_bouts_bl6], axis=0)
df_all_freezing_bouts_all_mice.head()

In [None]:
df_all_gait_disruption_bouts_bl6 = pd.read_csv('master_gait_disruption_bouts_df_bl6.csv', index_col=0)
df_all_gait_disruption_bouts = pd.read_csv('master_gait_disruption_bouts_df_parkinsonian.csv', index_col=0)
df_all_gait_disruption_bouts['trialnumber'] = 1
df_all_gait_disruption_bouts_all_mice = pd.concat([df_all_gait_disruption_bouts, df_all_gait_disruption_bouts_bl6], axis=0)
df_all_gait_disruption_bouts_all_mice.head()

In [None]:
df_inwards_gait_disruption_bouts_all_mice = df_all_gait_disruption_bouts_all_mice[df_all_gait_disruption_bouts_all_mice['gaitdisruption_bout_direction_bool'] == False].copy()
df_outwards_gait_disruption_bouts_all_mice = df_all_gait_disruption_bouts_all_mice[df_all_gait_disruption_bouts_all_mice['gaitdisruption_bout_direction_bool'] == True].copy()

In [None]:
def get_total_bount_count(uniques):
    uniques = uniques[~np.isnan(uniques)]
    return uniques.shape[0]

def get_fuzziness(df_tmp, wall_end_position, half_window_size):
    mean_pos = df_tmp.loc[(df_tmp['CenterOfGravity_y_norm_cm'] >= wall_end_position - 10) &
                          (df_tmp['CenterOfGravity_y_norm_cm'] <= wall_end_position + 10), 'CenterOfGravity_y_norm_cm'].mean()
    
    median_pos = df_tmp.loc[(df_tmp['CenterOfGravity_y_norm_cm'] >= wall_end_position - 10) &
                            (df_tmp['CenterOfGravity_y_norm_cm'] <= wall_end_position + 10), 'CenterOfGravity_y_norm_cm'].median()

    std_dev = df_tmp.loc[(df_tmp['CenterOfGravity_y_norm_cm'] >= wall_end_position - 10) &
                         (df_tmp['CenterOfGravity_y_norm_cm'] <= wall_end_position + 10), 'CenterOfGravity_y_norm_cm'].std()

    bout_count = df_tmp.loc[(df_tmp['CenterOfGravity_y_norm_cm'] >= wall_end_position - 10) &
                            (df_tmp['CenterOfGravity_y_norm_cm'] <= wall_end_position + 10), 'CenterOfGravity_y_norm_cm'].shape[0]
    
    if bout_count < 3:
        mean_pos = np.NaN
        std_dev = np.NaN
    
    
    
    return mean_pos, median_pos, std_dev, bout_count


df_temp = df_all_gait_disruption_bouts_all_mice.copy()

d_data = {'mouse_ID': [],
                       'group_ID': [],
                       'mazetype': [],
                       'trialnumber': [], 
                       'count_freezing_bouts': [],
                       'count_gait_disruption_bouts_all': [],
                       'count_gait_disruption_bouts_in': [],
                       'count_gait_disruption_bouts_out': [],
                       'mean_freezing_bout_duration': [],
                       'mean_gait_disruption_bout_duration_all': [], 
                       'mean_gait_disruption_bout_duration_in': [], 
                       'mean_gait_disruption_bout_duration_out': [],
                       'percentage_of_time_spent_freezing': [],
                       'percentage_of_time_spent_gait_disrupted_all': [],
                       'mean_freezing_bouts_y_position': [],
                       'mean_gait_disruption_bouts_y_position_all': [],
                       'mean_gait_disruption_bouts_y_position_in': [], 
                       'mean_gait_disruption_bouts_y_position_out': [], 
                       'median_freezing_bouts_y_position': [],
                       'median_gait_disruption_bouts_y_position_all': [],
                       'median_gait_disruption_bouts_y_position_in': [], 
                       'median_gait_disruption_bouts_y_position_out': [], 
                       'wall_endzone_mean_freezing_bouts_y_position': [], 
                       'wall_endzone_mean_gait_disruption_bouts_y_position_all': [],
                       'wall_endzone_mean_gait_disruption_bouts_y_position_in': [], 
                       'wall_endzone_mean_gait_disruption_bouts_y_position_out': [],
                       'wall_endzone_median_freezing_bouts_y_position': [], 
                       'wall_endzone_median_gait_disruption_bouts_y_position_all': [],
                       'wall_endzone_median_gait_disruption_bouts_y_position_in': [], 
                       'wall_endzone_median_gait_disruption_bouts_y_position_out': [],
                       'wall_endzone_stddev_freezing_bouts_y_position': [], 
                       'wall_endzone_stddev_gait_disruption_bouts_y_position_all': [],
                       'wall_endzone_stddev_gait_disruption_bouts_y_position_in': [], 
                       'wall_endzone_stddev_gait_disruption_bouts_y_position_out': [],
                       'wall_endzone_count_freezing_bouts': [],
                       'wall_endzone_count_gait_disruption_bouts_all': [],
                       'wall_endzone_count_gait_disruption_bouts_in': [],
                       'wall_endzone_count_gait_disruption_bouts_out': [],
                      }


for mouse_id in df_temp['mouse_ID'].unique():
    for mazetype in df_temp.loc[df_temp['mouse_ID'] == mouse_id, 'mazetype'].unique():
        for trialnumber in df_temp.loc[(df_temp['mouse_ID'] == mouse_id) & (df_temp['mazetype'] == mazetype), 'trialnumber'].unique():
            df_temp_all = df_all_gait_disruption_bouts_all_mice.loc[(df_all_gait_disruption_bouts_all_mice['mouse_ID'] == mouse_id) &
                                                                 (df_all_gait_disruption_bouts_all_mice['mazetype'] == mazetype) &
                                                                 (df_all_gait_disruption_bouts_all_mice['trialnumber'] == trialnumber)].copy()
            
            df_temp_in = df_inwards_gait_disruption_bouts_all_mice.loc[(df_inwards_gait_disruption_bouts_all_mice['mouse_ID'] == mouse_id) &
                                                                       (df_inwards_gait_disruption_bouts_all_mice['mazetype'] == mazetype) &
                                                                       (df_inwards_gait_disruption_bouts_all_mice['trialnumber'] == trialnumber)].copy()
            
            df_temp_out = df_outwards_gait_disruption_bouts_all_mice.loc[(df_outwards_gait_disruption_bouts_all_mice['mouse_ID'] == mouse_id) &
                                                                         (df_outwards_gait_disruption_bouts_all_mice['mazetype'] == mazetype) &
                                                                         (df_outwards_gait_disruption_bouts_all_mice['trialnumber'] == trialnumber)].copy()
            
            df_temp_freezing = df_all_freezing_bouts_all_mice.loc[(df_all_freezing_bouts_all_mice['mouse_ID'] == mouse_id) &
                                                                  (df_all_freezing_bouts_all_mice['mazetype'] == mazetype) &
                                                                  (df_all_freezing_bouts_all_mice['trialnumber'] == trialnumber)].copy()
            
            
            d_data['mouse_ID'].append(mouse_id)
            d_data['group_ID'].append(df_temp_all['group_ID'].unique()[0])
            d_data['mazetype'].append(mazetype)
            d_data['trialnumber'].append(trialnumber)
            d_data['count_freezing_bouts'].append(get_total_bount_count(df_temp_freezing['freezing_bout_count'].unique()))
            d_data['count_gait_disruption_bouts_all'].append(get_total_bount_count(df_temp_all['gaitdisruption_bout_count'].unique()))
            d_data['count_gait_disruption_bouts_in'].append(get_total_bount_count(df_temp_in['gaitdisruption_bout_count'].unique()))
            d_data['count_gait_disruption_bouts_out'].append(get_total_bount_count(df_temp_out['gaitdisruption_bout_count'].unique()))
            d_data['mean_freezing_bout_duration'].append(df_temp_freezing['freezing_bout_duration'].mean())
            d_data['mean_gait_disruption_bout_duration_all'].append(df_temp_all['gaitdisruption_bout_duration'].mean())
            d_data['mean_gait_disruption_bout_duration_in'].append(df_temp_in['gaitdisruption_bout_duration'].mean())
            d_data['mean_gait_disruption_bout_duration_out'].append(df_temp_out['gaitdisruption_bout_duration'].mean())
            d_data['percentage_of_time_spent_freezing'].append(df_temp_freezing['Percentage_time_spent_freezing_session'].unique()[0])
            d_data['percentage_of_time_spent_gait_disrupted_all'].append(df_temp_all['Percentage_time_spent_gaitdisrupted_session'].unique()[0])
            d_data['mean_freezing_bouts_y_position'].append(df_temp_freezing['freezing_bout_mean_y_norm_cm'].mean())
            d_data['mean_gait_disruption_bouts_y_position_all'].append(df_temp_all['gaitdisruption_bout_mean_y_norm_cm'].mean())
            d_data['mean_gait_disruption_bouts_y_position_in'].append(df_temp_in['gaitdisruption_bout_mean_y_norm_cm'].mean())
            d_data['mean_gait_disruption_bouts_y_position_out'].append(df_temp_out['gaitdisruption_bout_mean_y_norm_cm'].mean())
            d_data['median_freezing_bouts_y_position'].append(df_temp_freezing['freezing_bout_mean_y_norm_cm'].mean())
            d_data['median_gait_disruption_bouts_y_position_all'].append(df_temp_all['gaitdisruption_bout_mean_y_norm_cm'].median())
            d_data['median_gait_disruption_bouts_y_position_in'].append(df_temp_in['gaitdisruption_bout_mean_y_norm_cm'].median())
            d_data['median_gait_disruption_bouts_y_position_out'].append(df_temp_out['gaitdisruption_bout_mean_y_norm_cm'].median())
            
            if mazetype == 'shortened_rectangle':
                wall_end_position = 27.8
            else:
                wall_end_position = 35
            
            mean_pos_freezing, median_pos_freezing, std_dev_freezing, bout_count_freezing = get_fuzziness(df_temp_freezing, wall_end_position, 10)
            mean_pos_gait_all, median_pos_gait_all, std_dev_gait_all, bout_count_gait_all = get_fuzziness(df_temp_all, wall_end_position, 10)
            mean_pos_gait_in, median_pos_gait_in, std_dev_gait_in, bout_count_gait_in = get_fuzziness(df_temp_in, wall_end_position, 10)
            mean_pos_gait_out, median_pos_gait_out, std_dev_gait_out, bout_count_gait_out = get_fuzziness(df_temp_out, wall_end_position, 10)
            
            d_data['wall_endzone_mean_freezing_bouts_y_position'].append(mean_pos_freezing)
            d_data['wall_endzone_mean_gait_disruption_bouts_y_position_all'].append(mean_pos_gait_all)
            d_data['wall_endzone_mean_gait_disruption_bouts_y_position_in'].append(mean_pos_gait_in)
            d_data['wall_endzone_mean_gait_disruption_bouts_y_position_out'].append(mean_pos_gait_out)
            d_data['wall_endzone_median_freezing_bouts_y_position'].append(median_pos_freezing)
            d_data['wall_endzone_median_gait_disruption_bouts_y_position_all'].append(median_pos_gait_all)
            d_data['wall_endzone_median_gait_disruption_bouts_y_position_in'].append(median_pos_gait_in)
            d_data['wall_endzone_median_gait_disruption_bouts_y_position_out'].append(median_pos_gait_out)
            d_data['wall_endzone_stddev_freezing_bouts_y_position'].append(std_dev_freezing)
            d_data['wall_endzone_stddev_gait_disruption_bouts_y_position_all'].append(std_dev_gait_all)
            d_data['wall_endzone_stddev_gait_disruption_bouts_y_position_in'].append(std_dev_gait_in)
            d_data['wall_endzone_stddev_gait_disruption_bouts_y_position_out'].append(std_dev_gait_out)
            d_data['wall_endzone_count_freezing_bouts'].append(bout_count_freezing)
            d_data['wall_endzone_count_gait_disruption_bouts_all'].append(bout_count_gait_all)
            d_data['wall_endzone_count_gait_disruption_bouts_in'].append(bout_count_gait_in)
            d_data['wall_endzone_count_gait_disruption_bouts_out'].append(bout_count_gait_out)
            
            
df_total_bout_counts = pd.DataFrame(data=d_data)      
df_total_bout_counts.head()

In [None]:
df_total_bout_counts_bl6  = df_total_bout_counts.loc[df_total_bout_counts['group_ID'] == 'bl6'].copy()
df_total_bout_counts_bl6.head()

# Habituation effects:

In [None]:
l_data_cols = ['count_freezing_bouts', 'count_gait_disruption_bouts_all', 'count_gait_disruption_bouts_in', 'count_gait_disruption_bouts_out',
               'mean_freezing_bout_duration', 'mean_gait_disruption_bout_duration_all', 'mean_gait_disruption_bout_duration_in', 'mean_gait_disruption_bout_duration_out', 
               'percentage_of_time_spent_freezing', 'percentage_of_time_spent_gait_disrupted_all',
               'mean_freezing_bouts_y_position', 'mean_gait_disruption_bouts_y_position_all', 'mean_gait_disruption_bouts_y_position_in', 'mean_gait_disruption_bouts_y_position_out',
               'median_freezing_bouts_y_position', 'median_gait_disruption_bouts_y_position_all', 'median_gait_disruption_bouts_y_position_in', 'median_gait_disruption_bouts_y_position_out',
               
               
               'wall_endzone_mean_freezing_bouts_y_position', 'wall_endzone_mean_gait_disruption_bouts_y_position_all',
               'wall_endzone_mean_gait_disruption_bouts_y_position_in', 'wall_endzone_mean_gait_disruption_bouts_y_position_out',

               'wall_endzone_median_freezing_bouts_y_position', 'wall_endzone_median_gait_disruption_bouts_y_position_all',
               'wall_endzone_median_gait_disruption_bouts_y_position_in', 'wall_endzone_median_gait_disruption_bouts_y_position_out',               
               
               'wall_endzone_stddev_freezing_bouts_y_position', 'wall_endzone_stddev_gait_disruption_bouts_y_position_all', 
               'wall_endzone_stddev_gait_disruption_bouts_y_position_in', 'wall_endzone_stddev_gait_disruption_bouts_y_position_out', 
               
               'wall_endzone_count_freezing_bouts', 'wall_endzone_count_gait_disruption_bouts_all', 
               'wall_endzone_count_gait_disruption_bouts_in', 'wall_endzone_count_gait_disruption_bouts_out'
              ]

l_columns = ['trialnumber', 'mouse_ID', 'mazetype']

for mazetype in df_total_bout_counts_bl6['mazetype'].unique():
    for data_col in l_data_cols:
        df_temp = df_total_bout_counts_bl6.loc[df_total_bout_counts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
        df_temp.reset_index(inplace=True, drop=True)
        
        path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format(data_col)
        
        if os.path.isdir(path) == False:
            os.mkdir(path)
            
        figname = '{}_{}.png'.format(data_col, mazetype)
        plt.figure(figsize=(7,4), facecolor='white')
        sns.boxplot(data=df_temp, x='trialnumber', y=data_col, fliersize=0)
        sns.stripplot(data=df_temp, x='trialnumber', y=data_col, color='k')
        plt.ylim(0)
        plt.title(mazetype)
        plt.savefig(path + figname, dpi=300)
        plt.close()

        csv_name = figname.replace('png', 'csv')
        df_temp.loc[df_temp['trialnumber'] == 1, 'trialnumber'] = 'trial_1'
        df_temp.loc[df_temp['trialnumber'] == 2, 'trialnumber'] = 'trial_2'
        df_temp.loc[df_temp['trialnumber'] == 3, 'trialnumber'] = 'trial_3'
        df_temp.to_csv(path + csv_name)

In [None]:
l_columns = ['trialnumber', 'mouse_ID', 'mazetype']


data_col = 'freezing_bout_mean_y_norm_cm'

for mazetype in df_all_freezing_bouts_bl6['mazetype'].unique():
    df_temp = df_all_freezing_bouts_bl6.loc[df_all_freezing_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('all_freezing_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'all_freezing_bout_positions_{}.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')
    sns.stripplot(data=df_temp, y='trialnumber', x=data_col, hue='mouse_ID', orient='h', alpha=0.3)
    
    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    plt.title(mazetype)
    plt.savefig(path + figname, dpi=300)
    plt.close()

    csv_name = figname.replace('png', 'csv')
    df_temp.to_csv(path + csv_name)
        
for mazetype in df_all_freezing_bouts_bl6['mazetype'].unique():
    df_temp = df_all_freezing_bouts_bl6.loc[df_all_freezing_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('all_freezing_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'all_freezing_bout_positions_{}_no_stripplot.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')

    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    plt.title(mazetype)
    plt.savefig(path + figname, dpi=300)
    plt.close()

In [None]:
df_inwards_gait_disruption_bouts_bl6 = df_inwards_gait_disruption_bouts_all_mice.loc[df_inwards_gait_disruption_bouts_all_mice['group_ID'] == 'bl6']
df_outwards_gait_disruption_bouts_bl6 = df_outwards_gait_disruption_bouts_all_mice.loc[df_outwards_gait_disruption_bouts_all_mice['group_ID'] == 'bl6']

In [None]:
l_columns = ['trialnumber', 'mouse_ID', 'mazetype']

data_col = 'gaitdisruption_bout_mean_y_norm_cm'

for mazetype in df_all_gait_disruption_bouts_bl6['mazetype'].unique():
    df_temp = df_all_gait_disruption_bouts_bl6.loc[df_all_gait_disruption_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('all_gait_disruption_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'all_gait_disruption_bout_positions_{}.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')
    sns.stripplot(data=df_temp, y='trialnumber', x=data_col, hue='mouse_ID', orient='h', alpha=0.3)
    
    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    plt.title(mazetype)
    plt.savefig(path + figname, dpi=300)
    plt.close()


    csv_name = figname.replace('png', 'csv')
    df_temp.to_csv(path + csv_name)
        
for mazetype in df_all_gait_disruption_bouts_bl6['mazetype'].unique():
    df_temp = df_all_gait_disruption_bouts_bl6.loc[df_all_gait_disruption_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('all_gait_disruption_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'all_gait_disruption_bout_positions_{}_no_stripplot.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')

    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    plt.title(mazetype)
    plt.savefig(path + figname, dpi=300)
    plt.close()

In [None]:
l_columns = ['trialnumber', 'mouse_ID', 'mazetype']

data_col = 'gaitdisruption_bout_mean_y_norm_cm'

for mazetype in df_inwards_gait_disruption_bouts_bl6['mazetype'].unique():
    df_temp = df_inwards_gait_disruption_bouts_bl6.loc[df_inwards_gait_disruption_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('inwards_gait_disruption_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'inwards_gait_disruption_bout_positions_{}.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')
    sns.stripplot(data=df_temp, y='trialnumber', x=data_col, hue='mouse_ID', orient='h', alpha=0.3)
    
    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    #plt.legend('')
    plt.title(mazetype + ' - inwards only')
    plt.savefig(path + figname, dpi=300)
    plt.close()
    #plt.show()

    csv_name = figname.replace('png', 'csv')
    df_temp.to_csv(path + csv_name)
        
for mazetype in df_inwards_gait_disruption_bouts_bl6['mazetype'].unique():
    df_temp = df_inwards_gait_disruption_bouts_bl6.loc[df_inwards_gait_disruption_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('inwards_gait_disruption_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'inwards_gait_disruption_bout_positions_{}_no_stripplot.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')
    #sns.stripplot(data=df_temp, y='trialnumber', x=data_col, hue='mouse_ID', orient='h', alpha=0.3)

    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    #plt.legend('')
    plt.title(mazetype + ' - inwards only')
    plt.savefig(path + figname, dpi=300)
    plt.close()
    #plt.show()  

In [None]:
l_columns = ['trialnumber', 'mouse_ID', 'mazetype']

data_col = 'gaitdisruption_bout_mean_y_norm_cm'

for mazetype in df_outwards_gait_disruption_bouts_bl6['mazetype'].unique():
    df_temp = df_outwards_gait_disruption_bouts_bl6.loc[df_outwards_gait_disruption_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('outwards_gait_disruption_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'outwards_gait_disruption_bout_positions_{}.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')
    sns.stripplot(data=df_temp, y='trialnumber', x=data_col, hue='mouse_ID', orient='h', alpha=0.3)
    
    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    #plt.legend('')
    plt.title(mazetype + ' - outwards only')
    plt.savefig(path + figname, dpi=300)
    #plt.close()
    plt.show()

    csv_name = figname.replace('png', 'csv')
    df_temp.to_csv(path + csv_name)
        
for mazetype in df_outwards_gait_disruption_bouts_bl6['mazetype'].unique():
    df_temp = df_outwards_gait_disruption_bouts_bl6.loc[df_outwards_gait_disruption_bouts_bl6['mazetype'] == mazetype, [data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Habituation_effects/{}/'.format('outwards_gait_disruption_bout_positions')

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = 'outwards_gait_disruption_bout_positions_{}_no_stripplot.png'.format(mazetype)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.violinplot(data=df_temp, y='trialnumber', x=data_col, fliersize=0, orient='h')
    #sns.stripplot(data=df_temp, y='trialnumber', x=data_col, hue='mouse_ID', orient='h', alpha=0.3)

    if mazetype == 'shortened_rectangle':
        plt.vlines(x=27.8, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')
    else:
        plt.vlines(x=35, ymin=-0.5, ymax=2.5, color='magenta', linestyle='dashed')

    plt.xlim(0, 60)
    #plt.legend('')
    plt.title(mazetype + ' - outwards only')
    plt.savefig(path + figname, dpi=300)
    plt.close()
    #plt.show()  

# Effects per mazetype

In [None]:
df_total_bout_counts_bl6_only_trial1 = df_total_bout_counts_bl6.loc[df_total_bout_counts_bl6['trialnumber'] == 1].copy()
df_total_bout_counts_bl6_only_trial1.head()

In [None]:
l_data_cols = ['count_freezing_bouts', 'count_gait_disruption_bouts_all', 'count_gait_disruption_bouts_in', 'count_gait_disruption_bouts_out',
               'mean_freezing_bout_duration', 'mean_gait_disruption_bout_duration_all', 'mean_gait_disruption_bout_duration_in', 'mean_gait_disruption_bout_duration_out', 
               'percentage_of_time_spent_freezing', 'percentage_of_time_spent_gait_disrupted_all',
               'mean_freezing_bouts_y_position', 'mean_gait_disruption_bouts_y_position_all', 'mean_gait_disruption_bouts_y_position_in', 'mean_gait_disruption_bouts_y_position_out',
               'median_freezing_bouts_y_position', 'median_gait_disruption_bouts_y_position_all', 'median_gait_disruption_bouts_y_position_in', 'median_gait_disruption_bouts_y_position_out',
               
               
               'wall_endzone_mean_freezing_bouts_y_position', 'wall_endzone_mean_gait_disruption_bouts_y_position_all',
               'wall_endzone_mean_gait_disruption_bouts_y_position_in', 'wall_endzone_mean_gait_disruption_bouts_y_position_out',

               'wall_endzone_median_freezing_bouts_y_position', 'wall_endzone_median_gait_disruption_bouts_y_position_all',
               'wall_endzone_median_gait_disruption_bouts_y_position_in', 'wall_endzone_median_gait_disruption_bouts_y_position_out',               
               
               'wall_endzone_stddev_freezing_bouts_y_position', 'wall_endzone_stddev_gait_disruption_bouts_y_position_all', 
               'wall_endzone_stddev_gait_disruption_bouts_y_position_in', 'wall_endzone_stddev_gait_disruption_bouts_y_position_out', 
               
               'wall_endzone_count_freezing_bouts', 'wall_endzone_count_gait_disruption_bouts_all', 
               'wall_endzone_count_gait_disruption_bouts_in', 'wall_endzone_count_gait_disruption_bouts_out'
              ]

#l_data_cols = ['total_freezing_bouts']

l_columns = ['mazetype', 'mouse_ID', 'trialnumber']

#for mazetype in df_total_bout_counts_bl6['mazetype'].unique():
for data_col in l_data_cols:
    df_temp = df_total_bout_counts_bl6_only_trial1[[data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Mazetype_effects/{}/'.format(data_col)

    if os.path.isdir(path) == False:
        os.mkdir(path)

    figname = '{}.png'.format(data_col)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.boxplot(data=df_temp, x='mazetype', y=data_col, fliersize=0, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
    sns.stripplot(data=df_temp, x='mazetype', y=data_col, color='k', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
    plt.ylim(0)
    plt.title('trial 1 data only')
    plt.savefig(path + figname, dpi=300)
    plt.close()
    #plt.show()

    csv_name = figname.replace('png', 'csv')
    df_temp.to_csv(path + csv_name)

In [None]:
l_columns = ['mazetype', 'mouse_ID', 'trialnumber']


data_col = 'freezing_bout_mean_y_norm_cm'

df_temp = df_all_freezing_bouts_bl6.loc[df_all_freezing_bouts_bl6['trialnumber'] == 1, [data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Mazetype_effects/{}/'.format('all_freezing_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'all_freezing_bout_positions.png'
plt.figure(figsize=(7,4), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, hue='mouse_ID', orient='h', alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 60)
#plt.legend('')
plt.title('trial 1 data only')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

In [None]:
df_all_gait_disruption_bouts_bl6_trial1 = df_all_gait_disruption_bouts_bl6.loc[(df_all_gait_disruption_bouts_bl6['gaitdisruption'] == True) &
                                                                               (df_all_gait_disruption_bouts_bl6['trialnumber'] == 1)].copy()

df_inwards_gait_disruption_bouts_bl6_trial1 = df_all_gait_disruption_bouts_bl6_trial1[df_all_gait_disruption_bouts_bl6_trial1['gaitdisruption_bout_direction_bool'] == False].copy()
df_outwards_gait_disruption_bouts_bl6_trial1 = df_all_gait_disruption_bouts_bl6_trial1[df_all_gait_disruption_bouts_bl6_trial1['gaitdisruption_bout_direction_bool'] == True].copy()

In [None]:
df_all_gait_disruption_bouts_bl6_trial1.shape[0] == df_inwards_gait_disruption_bouts_bl6_trial1.shape[0] + df_outwards_gait_disruption_bouts_bl6_trial1.shape[0]

In [None]:
l_columns = ['mazetype', 'mouse_ID', 'trialnumber']


data_col = 'gaitdisruption_bout_mean_y_norm_cm'

df_temp = df_all_gait_disruption_bouts_bl6_trial1[[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Mazetype_effects/{}/'.format('all_gait_disruption_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'all_gait_disruption_bout_positions.png'
plt.figure(figsize=(7,4), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, hue='mouse_ID', orient='h', alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 60)
#plt.legend('')
plt.title('trial 1 data only - all directions')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

In [None]:
l_columns = ['mazetype', 'mouse_ID', 'trialnumber']


data_col = 'gaitdisruption_bout_mean_y_norm_cm'

df_temp = df_inwards_gait_disruption_bouts_bl6_trial1[[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Mazetype_effects/{}/'.format('inwards_gait_disruption_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'inwards_gait_disruption_bout_positions.png'
plt.figure(figsize=(7,4), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, hue='mouse_ID', orient='h', alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 60)
#plt.legend('')
plt.title('trial 1 data only - inwards moving only')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

In [None]:
l_columns = ['mazetype', 'mouse_ID', 'trialnumber']


data_col = 'gaitdisruption_bout_mean_y_norm_cm'

df_temp = df_outwards_gait_disruption_bouts_bl6_trial1[[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Mazetype_effects/{}/'.format('outwards_gait_disruption_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'outwards_gait_disruption_bout_positions.png'
plt.figure(figsize=(7,4), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, hue='mouse_ID', orient='h', alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 60)
#plt.legend('')
plt.title('trial 1 data only - outwards moving only')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

# Effects between groups:

In [None]:
df_total_bout_counts_trial1_all_mice  = df_total_bout_counts.loc[df_total_bout_counts['trialnumber'] == 1].copy()
df_total_bout_counts_trial1_all_mice.head()

In [None]:
l_data_cols = ['count_freezing_bouts', 'count_gait_disruption_bouts_all', 'count_gait_disruption_bouts_in', 'count_gait_disruption_bouts_out',
               'mean_freezing_bout_duration', 'mean_gait_disruption_bout_duration_all', 'mean_gait_disruption_bout_duration_in', 'mean_gait_disruption_bout_duration_out', 
               'percentage_of_time_spent_freezing', 'percentage_of_time_spent_gait_disrupted_all',
               'mean_freezing_bouts_y_position', 'mean_gait_disruption_bouts_y_position_all', 'mean_gait_disruption_bouts_y_position_in', 'mean_gait_disruption_bouts_y_position_out',
               'median_freezing_bouts_y_position', 'median_gait_disruption_bouts_y_position_all', 'median_gait_disruption_bouts_y_position_in', 'median_gait_disruption_bouts_y_position_out',
               
               
               'wall_endzone_mean_freezing_bouts_y_position', 'wall_endzone_mean_gait_disruption_bouts_y_position_all',
               'wall_endzone_mean_gait_disruption_bouts_y_position_in', 'wall_endzone_mean_gait_disruption_bouts_y_position_out',

               'wall_endzone_median_freezing_bouts_y_position', 'wall_endzone_median_gait_disruption_bouts_y_position_all',
               'wall_endzone_median_gait_disruption_bouts_y_position_in', 'wall_endzone_median_gait_disruption_bouts_y_position_out',               
               
               'wall_endzone_stddev_freezing_bouts_y_position', 'wall_endzone_stddev_gait_disruption_bouts_y_position_all', 
               'wall_endzone_stddev_gait_disruption_bouts_y_position_in', 'wall_endzone_stddev_gait_disruption_bouts_y_position_out', 
               
               'wall_endzone_count_freezing_bouts', 'wall_endzone_count_gait_disruption_bouts_all', 
               'wall_endzone_count_gait_disruption_bouts_in', 'wall_endzone_count_gait_disruption_bouts_out'
              ]


#l_data_cols = ['total_freezing_bouts']

l_columns = ['group_ID', 'mouse_ID', 'mazetype']

#for mazetype in df_total_bout_counts_bl6['mazetype'].unique():
for data_col in l_data_cols:
    df_temp = df_total_bout_counts_trial1_all_mice[[data_col] + l_columns].copy()
    df_temp.reset_index(inplace=True, drop=True)

    path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Group_effects/{}/'.format(data_col)

    if os.path.isdir(path) == False:
        os.mkdir(path)
        #pass

    figname = '{}.png'.format(data_col)
    plt.figure(figsize=(7,4), facecolor='white')
    sns.boxplot(data=df_temp, x='mazetype', y=data_col, hue='group_ID', fliersize=0, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
    sns.stripplot(data=df_temp, x='mazetype', y=data_col, hue='group_ID', dodge=True, color='k', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
    plt.ylim(0)
    plt.xlim(-0.5,5.5)
    #plt.legend('')
    plt.title('trial 1 data only')
    plt.savefig(path + figname, dpi=300)
    plt.close()
    #plt.show()

    csv_name = figname.replace('png', 'csv')
    df_temp.to_csv(path + csv_name)

In [None]:
#Freezing
l_columns = ['group_ID', 'mouse_ID', 'mazetype']


data_col = 'freezing_bout_mean_y_norm_cm'

df_temp = df_all_freezing_bouts_all_mice.loc[df_all_freezing_bouts_all_mice['trialnumber'] == 1][[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Group_effects/{}/'.format('all_freezing_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'all_freezing_bout_positions.png'
plt.figure(figsize=(7,9), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', hue='group_ID', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, orient='h', color='k', hue='group_ID', dodge=True, alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 75)
plt.legend(loc='center right')
plt.title('trial 1 data only')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

In [None]:
l_columns = ['group_ID', 'mouse_ID', 'mazetype']


data_col = 'gaitdisruption_bout_mean_y_norm_cm'

df_temp = df_all_gait_disruption_bouts_all_mice.loc[df_all_gait_disruption_bouts_all_mice['trialnumber'] == 1][[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Group_effects/{}/'.format('all_gait_disruption_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'all_gait_disruption_bout_positions.png'
plt.figure(figsize=(7,9), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', hue='group_ID', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, orient='h', color='k', hue='group_ID', dodge=True, alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 75)
plt.legend(loc='center right')
plt.title('trial 1 data only - all directions')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

In [None]:
# Inwards

l_columns = ['group_ID', 'mouse_ID', 'mazetype']


data_col = 'gaitdisruption_bout_mean_y_norm_cm'

df_temp = df_inwards_gait_disruption_bouts_all_mice.loc[df_inwards_gait_disruption_bouts_all_mice['trialnumber'] == 1][[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Group_effects/{}/'.format('inwards_gait_disruption_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'inwards_gait_disruption_bout_positions.png'
plt.figure(figsize=(7,9), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', hue='group_ID', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, orient='h', color='k', hue='group_ID', dodge=True, alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 75)
plt.legend(loc='center right')
plt.title('trial 1 data only - inwards directions')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    

In [None]:
# Outwards
l_columns = ['group_ID', 'mouse_ID', 'mazetype']


data_col = 'gaitdisruption_bout_mean_y_norm_cm'

df_temp = df_outwards_gait_disruption_bouts_all_mice.loc[df_outwards_gait_disruption_bouts_all_mice['trialnumber'] == 1][[data_col] + l_columns].copy()
df_temp.reset_index(inplace=True, drop=True)

path = '/home/ds/DCL/M_Schellenberger/BSc_Thesis/Group_effects/{}/'.format('outwards_gait_disruption_bout_positions')

if os.path.isdir(path) == False:
    os.mkdir(path)
    #pass

figname = 'outwards_gait_disruption_bout_positions.png'
plt.figure(figsize=(7,9), facecolor='white')
sns.violinplot(data=df_temp, y='mazetype', x=data_col, fliersize=0, orient='h', hue='group_ID', order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])
sns.stripplot(data=df_temp, y='mazetype', x=data_col, orient='h', color='k', hue='group_ID', dodge=True, alpha=0.3, order=['shortened_rectangle', 'rectangle', 'triangle', 'exponential'])

plt.vlines(x=27.8, ymin=-0.5, ymax=0.5, color='magenta', linestyle='dashed')
plt.vlines(x=35, ymin=0.5, ymax=3.5, color='magenta', linestyle='dashed')

plt.xlim(0, 75)
plt.legend(loc='center right')
plt.title('trial 1 data only - outwards directions')
plt.savefig(path + figname, dpi=300)
#plt.close()
plt.show()

csv_name = figname.replace('png', 'csv')
df_temp.to_csv(path + csv_name)
        
    