# Elevated-Plus Maze Analysis

In [1]:
from analysis import analysis_utils as au
from IPython.core.interactiveshell import InteractiveShell
from multiprocessing import Process
from multiprocessing import Queue
import numpy as np
import os
import pandas as pd
import random
from scipy import stats
import seaborn as sns
import SigProc
import sys

In [2]:
%matplotlib inline
InteractiveShell.ast_node_interactivity = "all"

In [3]:
sns.set_style("darkgrid")

In [4]:
mouse_directory = os.path.expanduser("~") + "/Hen_Lab/Mice/EPM"

if not os.path.exists(mouse_directory):
    print("The mouse directory does not exist", file=sys.stderr)
    
raw_files = list()
for dir_name, subdir_list, file_list in os.walk(mouse_directory):
    for file_name in file_list:
        if file_name.endswith(".csv"):
            print("Full path of: {} is: {}".format(file_name, dir_name+"/"+file_name))
            raw_files.append(dir_name+"/"+file_name)

Full path of: activity_drd73.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd73_experiments/activity_drd73.csv
Full path of: behavior_drd73.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd73_experiments/behavior_drd73.csv
Full path of: behavior_drd87.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd87_experiments/behavior_drd87.csv
Full path of: Raw_EPM_drd87.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd87_experiments/Raw_EPM_drd87.csv
Full path of: activity_drd87.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd87_experiments/activity_drd87.csv
Full path of: activity_drd77.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd77_experiments/activity_drd77.csv
Full path of: behavior_drd77.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd77_experiments/behavior_drd77.csv
Full path of: behavior_drd46.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd46_experiments/behavior_drd46.csv
Full path of: Raw_EPM_drd46.csv is: /Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd46_experiments/Raw_E

In [5]:
# au.run_epm_analysis(raw_files)

In [6]:
RAW_DRD87 = "/Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd87_experiments/Raw_EPM_drd87.csv"
data = pd.read_csv(RAW_DRD87, header=None)
_, AUC_dataframe, cell_transients_dataframe = SigProc.detect_ca_transients_mossy(data, 2, 0.5, 0.2, 10)

In [7]:
au.neuron_line_plot(AUC_dataframe, "neuron6", "neuron9", "neuron52", "neuron38", "neuron45")

## TODO: Discuss and streamline the below functionality, turn it into a function, make sure the function is sound, and move it to `analysis_utils.py`

In [9]:
behavior_column_names = ['Trial_time', 'Recording_time', 'X_center', 'Y_center', 'Area', 'Areachange', 
                         'Elongation', 'Distance_moved', 'Velocity', 'Arena_centerpoint',
                         'Open1_centerpoint', 'Open2_centerpoint',
                         'Closed1_centerpoint', 'Closed2_centerpoint',
                         'OpenArms_centerpoint', 'ClosedArms_centerpoint', 'Result_1']

behavior_df = pd.read_csv('/Users/saveliyyusufov/Hen_Lab/Mice/EPM/drd87_experiments/behavior_drd87.csv', header=None)
behavior_df.columns = behavior_column_names
behavior_df = au.downsample_dataframe(behavior_df, 3)

# Define what constitutes as a running frame
VELOCITY_CUTOFF = 4;

# Adds "Running_frames" column to the end of the behavior Dataframe 
behavior_df["Running_frames"] = np.where(behavior_df["Velocity"] > VELOCITY_CUTOFF, 1, 0)

In [10]:
neuron_concated_behavior = AUC_dataframe.join(behavior_df, how="left")

## TODO: Make sure implementation of `compute_d_rate()` is sound, write-up documentation, and it move to `analysis_utils.py`

In [11]:
def compute_diff_rate(dataframe, neuron_activity_df, *behaviors, frame_rate=10):
    """Computes difference between the rates of two behaviors
    
    Args: 
        dataframe: the the concatenated pandas DataFrame of an animal's neuron 
        activity and corresponding behavior
        neuron_activity_df: the neuron activity dataframe for a given animal
        *behaviors: a single or ordered pair of behaviors to compute the difference
        rate for, e.g. "Running", e.g. "ClosedArms", "OpenArms"
        frame_rate: the framerate associated with the given data; default is 10
    
    Returns:
        a numpy array of all the means of the behavior vectors subtracted from the 
        corresponding means of the non-behavior vectors, all scaled by frame rate
    """
    if len(behaviors) == 1:  
        beh_vec = dataframe.loc[dataframe[behaviors[0]] != 0, neuron_activity_df.columns]
        no_beh_vec = dataframe.loc[dataframe[behaviors[0]] == 0, neuron_activity_df.columns]
        return frame_rate * (beh_vec.values.mean(axis=0) - no_beh_vec.values.mean(axis=0))
    elif len(behaviors) == 2:
        beh_vec = dataframe.loc[dataframe[behaviors[0]] != 0, neuron_activity_df.columns]
        no_beh_vec = dataframe.loc[dataframe[behaviors[1]] != 0, neuron_activity_df.columns]
        return frame_rate * (beh_vec.values.mean(axis=0) - no_beh_vec.values.mean(axis=0))

In [12]:
compute_diff_rate(neuron_concated_behavior, AUC_dataframe, "OpenArms_centerpoint")

array([-3.22913678e+00,  6.88127687e+00,  4.24303720e+00,  1.20377830e+01,
        5.60818177e+00, -6.28605466e-01,  1.31964119e+00, -2.75311179e+00,
       -1.93265625e+00,  8.08019113e-01, -2.54826552e+00,  2.82709094e-01,
        4.42529433e+00, -9.05077747e-01,  9.08735029e+00,  8.14527496e+00,
        2.13868593e+00, -2.83460255e+00, -1.69612088e+00, -2.63730957e+00,
       -4.77937726e-01,  3.42384581e+00,  2.08987768e+00, -8.69478785e-03,
        1.05448072e+01,  5.92345201e+00,  1.74614244e+00,  2.76959775e+00,
        3.01001871e+00, -1.09778478e+00,  1.46886462e+01,  4.85506879e+00,
       -2.63793914e+00,  1.19241197e+01, -1.55864945e-01,  1.05100133e+00,
       -1.46994728e+00, -3.02143841e+00,  3.62990349e-01,  2.44122728e+00,
       -1.77564654e+00, -5.58717494e-02,  2.69177468e+00, -3.05466015e+00,
       -2.11434173e+00,  2.98918014e+00,  4.20102116e+00, -1.58146664e+00,
        9.09082457e+00,  8.42395378e+00, -1.72391904e+00,  1.10573261e-01,
        1.21397142e+00,  

## TODO: Once all neuron (cell) selectivity code is proven to be sound, write-up documentation for `set_real_d_df()` and move it to `analysis_utils.py`

In [13]:
def set_real_diff_df(dataframe, neuron_activity_df, behavior):
    """Compute the real difference mean values for all neurons
    
    Args:
        dataframe: the concatenated pandas DataFrame of the neuron activity
        DataFrame and corresponding behavior DataFrame, for a given animal
        neuron_activity_df: the pandas DataFrame of neuron activity,
        for a given animal
        behavior: the behavior for which to compute the difference rate
        
    
    Returns:
        real_df: a pandas DataFrame of with one row of all the real difference
        values computed for all the neurons for a given animal
    """
    real_df = pd.DataFrame(columns=neuron_activity_df.columns, index=["d"])
    real_df.loc['d'] = compute_diff_rate(dataframe, neuron_activity_df, behavior)
    return real_df

In [14]:
real_diff_df = set_real_diff_df(neuron_concated_behavior, AUC_dataframe, "OpenArms_centerpoint")
real_diff_df

Unnamed: 0,neuron1,neuron2,neuron3,neuron4,neuron5,neuron6,neuron7,neuron8,neuron9,neuron10,...,neuron60,neuron61,neuron62,neuron63,neuron64,neuron65,neuron66,neuron67,neuron68,neuron69
d,-3.22914,6.88128,4.24304,12.0378,5.60818,-0.628605,1.31964,-2.75311,-1.93266,0.808019,...,9.17461,-1.65544,6.08556,0.810677,4.41841,8.53101,5.52316,1.23241,-1.93298,12.4688


# Bootstrap demonstration

In [15]:
df = pd.DataFrame({"neuron1":[1,2,3,4,5,6], "neuron2":[1,2,3,4,5,6], "neuron3":[1,2,3,4,5,6]})
df

Unnamed: 0,neuron1,neuron2,neuron3
0,1,1,1
1,2,2,2
2,3,3,3
3,4,4,4
4,5,5,5
5,6,6,6


## Prior:

In [16]:
import sklearn

%timeit sklearn.utils.shuffle(df)
df.reset_index(inplace=True, drop=True)
df

216 µs ± 4.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


Unnamed: 0,neuron1,neuron2,neuron3
0,1,1,1
1,2,2,2
2,3,3,3
3,4,4,4
4,5,5,5
5,6,6,6


## Current implementation:

In [17]:
df.loc[:, "neuron1":"neuron2"] = df.loc[:, "neuron1":"neuron2"].apply(lambda x: np.random.permutation(x))
df

Unnamed: 0,neuron1,neuron2,neuron3
0,4,4,1
1,3,1,2
2,5,2,3
3,6,6,4
4,2,5,5
5,1,3,6


In [18]:
def shuffle_worker(q, num_of_experiments, neuron_activity_df, neuron_and_behavior_df, behavior):
    """Helper function for shuffle()

    Given a certain number of experiments to simulate, this function will
    add a dataframe to a provided queue full of the amount of experiments 
    desired as obervations rows. 
    Note: This function is meant to be only be used as a helper function 
    for the shuffle() function

    Args:
        q: the blocking queue to which the resulting dataframe will be added to
        num_of_experiments: the number of experiments that will be simulated 
        and appended, as observations, to the dataframe to be returned
        neuron_activity_df: the neuron activity dataframe for a given mouse
        neuron_and_behavior_df: the concatenated neuron activity and behavior 
        dataframes for a given mouse 
        behavior: the specific behavior to simulate the experiments on
    """ 
    first_col = neuron_activity_df.columns[0]
    last_col = neuron_activity_df.columns[len(neuron_activity_df.columns)-1]
    shuffled_df = pd.DataFrame(columns=neuron_activity_df.columns)
    
    for index in range(num_of_experiments):
        neuron_and_behavior_df.loc[:, first_col:last_col] = neuron_and_behavior_df.loc[:, first_col:last_col].apply(lambda x: np.random.permutation(x))
        shuffled_df.loc[index] = compute_diff_rate(neuron_and_behavior_df, neuron_activity_df, behavior)

    q.put(shuffled_df)

In [19]:
def shuffle(total_experiments, neuron_and_behavior_df, neuron_activity_df, behavior):
    """Homebrewed resampling function for EPM Analysis
    
    Resampling function that gives the capability to "simulate"
    experiments using random shuffling of the observations for each 
    pandas dataframe. 
    
    Args: 
        total_experiments: the total amount of epxeriments to simulate via bootstrapping
        neuron_and_behavior_df: the concatenated neuron activity and behavior dataframes
        for a given animal
        neuron_activity_df: the neuron activity dataframe for a given animal
        behavior: the specific behavior to simulate the experiments on
    
    Returns: a (vertically) concatenated pandas DataFrame of all the shuffled DataFrames 
    that all the shuffle_worker processes produced
    """
    experiments_per_worker = total_experiments // os.cpu_count() 
    q = Queue()
    processes = []
    rets = []
    for _ in range(0, os.cpu_count()):
        p = Process(target=shuffle_worker, args=(q, experiments_per_worker, neuron_activity_df, neuron_and_behavior_df, behavior))
        processes.append(p)
        p.start()
    for p in processes:
        ret = q.get()  # will block
        rets.append(ret)
    for p in processes:
        p.join()

    return pd.concat(rets, ignore_index=True)

In [None]:
bootstrapped = shuffle(20000, neuron_concated_behavior, AUC_dataframe, "OpenArms_centerpoint")

In [None]:
bootstrapped

In [None]:
import scipy
sns.distplot(scipy.stats.zscore(bootstrapped["neuron6"]), color='m', fit=stats.norm)

## TODO: for `is_neuron_selective()`, make sure implementation is sound, write-up documentation, and move to analysis_utils.py

In [None]:
def is_neuron_selective(resampled_df, real_d_df, neuron, behavior_name, high_tail, low_tail):
    """Classifies a given neuron as selective or non-selective
    
    Classifies a given neuron as selective for a certain behavior, selective for 
    when that behavior is not performed, or non-selective. This is a custom function
    for carrying out a two-tailed hypothesis test.
    One can use this as a stand alone function to classify a single neuron for 
    a certain animal as either a <behavior> neuron, a "Non"-<behavior> neuron, or
    a "Non-selective" neuron.
    
    Args: 
        resampled_df: a resampled pandas DataFrame
        real_diff_df: a pandas DataFrame with one row that has the real difference of means
        values for a given animal and a corresponding behavior
        neuron: a single neuron of the neuron to classify use the 2-tailed hypothesis test
        behavior_name: the behavior to classify the neuron by, e.g. "Running" or "Non-Running"
        high_tail: the cutoff for the upper-tail of the distribution
        low_tail: the cutoff for the lower-tail of the distribution
    
    Returns:
        behavior_name, "Non-" + behavior_name, or "Non-selective" based on the result of the
        two-tailed hypothesis test 
    """
    if real_d_df[neuron]['d'] >= np.percentile(resampled_df[neuron], high_tail):
        return behavior_name
    elif real_d_df[neuron]['d'] <= np.percentile(resampled_df[neuron], low_tail):
        return "Non-" + behavior_name
    else: 
        return "Non-selective"

In [None]:
is_neuron_selective(bootstrapped, real_diff_df, "neuron50", "ClosedArms", 87.5, 5)

In [None]:
def classify_neurons_for_beh(resampled_df, real_diff_df, behavior_name, high_tail, low_tail):
    """Classifies a given set of neurons
    
    This function simply calls is_neuron_selective for all the neurons 
    for a given animal. 
    
    Args: 
        resampled_df: a resampled pandas DataFrame
        real_diff_df: a pandas DataFrame with one row that has the real difference of means
        behavior_name: the behavior to classify each neuron by, e.g. "Running" or "Non-Running"
        high_tail: the cutoff for the upper-tail of the distribution
        low_tail: the cutoff for the lower-tail of the distribution
    
    Returns: 
        neurons_dict: a dictionary of all the neurons of a given animal as the keys,
        with each key having a corresponding classifcation as its value
    """
    neurons_dict = {}
    for neuron in resampled_df.columns:
        neurons_dict[neuron] = is_neuron_selective(resampled_df, real_diff_df, neuron, behavior_name, high_tail, low_tail)

    return neurons_dict

In [None]:
classify_neurons_for_beh(bootstrapped, real_diff_df, "OpenArms_centerpoint", 87.5, 5)

In [20]:
def activity_by_neurons(concated_df, neuron_names, *behaviors, frame_rate=10):
    """Computes the neuron activity rates for given behaviors
    
    This function computes the rates for a given animal's activity and  
    neuron, given some set of behaviors.

    Args: 
        concated_df: a concatenated pandas DataFrame of the neuron activity and 
        the corresponding behavior, for a given animal.
        neuron_names: the names of the neurons whose rates are to be computed.
        behaviors: a list of the behaviors for which to compute the activity rates. 
        frame_rate: the framerate to multiply the rate by, default is 10.

    Returns: a pandas DataFrame of the neuron activity rates.
    """
    activity_df = pd.DataFrame(columns=behaviors)
    for behavior in behaviors:
        if behavior in concated_df.columns:
            activity_df.loc[:, behavior] = frame_rate * concated_df.loc[concated_df[behavior] != 0, neuron_names].mean()
        elif '&' in behavior:
            beh1 = behavior.split('&')[0]
            beh2 = behavior.split('&')[1]
            activity_df.loc[:, behavior] = frame_rate * concated_df.loc[(concated_df[beh1] != 0) & ((concated_df[beh2] != 0)), neuron_names].mean()
        elif '|' in behavior:
            beh1 = behavior.split('|')[0]
            beh2 = behavior.split('|')[1]
            activity_df.loc[:, behavior] = frame_rate * concated_df.loc[(concated_df[beh1] != 0) | ((concated_df[beh2] != 0)), neuron_names].mean()

    return activity_df

In [21]:
activity_by_neurons(neuron_concated_behavior, cell_transients_dataframe.columns, "ClosedArms_centerpoint&Running_frames", "ClosedArms_centerpoint|Running_frames")

Unnamed: 0,ClosedArms_centerpoint&Running_frames,ClosedArms_centerpoint|Running_frames
neuron1,7.062691,3.807243
neuron2,1.664502,1.770149
neuron3,1.202086,2.214553
neuron4,0.000000,1.618548
neuron5,0.000000,1.132691
neuron6,5.565445,1.233872
neuron7,5.120013,1.503125
neuron8,1.001087,1.641840
neuron9,11.975758,3.848747
neuron10,0.519312,0.902017


In [22]:
# x = pd.to_timedelta('0.1s')
# neuron_concated_behavior.loc[:, "TIME"] = pd.Series(x*i for i in (neuron_concated_behavior.index))
# grouped = neuron_concated_behavior.set_index("TIME").groupby(pd.Grouper(freq="1Min"))

In [40]:
neuron_concated_behavior.loc[neuron_concated_behavior["ClosedArms_centerpoint"] != 0] #loc[0:61, "Trial_time"]

Unnamed: 0,neuron1,neuron2,neuron3,neuron4,neuron5,neuron6,neuron7,neuron8,neuron9,neuron10,...,Velocity,Arena_centerpoint,Open1_centerpoint,Open2_centerpoint,Closed1_centerpoint,Closed2_centerpoint,OpenArms_centerpoint,ClosedArms_centerpoint,Result_1,Running_frames
154,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,1.234700,1,0,0,1,0,0,1,1,0
155,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,2.097000,1,0,0,1,0,0,1,1,0
419,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,11.427000,1,0,0,1,0,0,1,1,1
420,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,3.131800,1,0,0,1,0,0,1,1,0
421,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,2.477000,1,0,0,1,0,0,1,1,0
422,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,4.639000,1,0,0,1,0,0,1,1,1
423,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,2.733600,1,0,0,1,0,0,1,1,0
424,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,1.047200,1,0,0,1,0,0,1,1,0
425,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,3.180700,1,0,0,1,0,0,1,1,0
426,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,...,3.537300,1,0,0,1,0,0,1,1,0
