In [1]:
# load tidy dataset

import pandas as pd
import os.path
import numpy as np
from itertools import combinations
import glob

#import numpy as np
#import loadTidyCSV.py

def loadTidy(tidyData): 
    assert os.path.isfile(tidyData), "desired file does not exist" 
    df = pd.read_csv(tidyData, header = None, names = ["date", "animal", "session", "trial", "stimulus", "neuronID", "timePt", "CaSignal"])
    return df 

# helper function to return the number of neurons in the provided data frame
getNumNeurons = lambda df: len(np.unique(df['neuronID'].tolist()))

# pass in pre-filtered data set containing data for only one animal and the same session (ie SAME NEURONS!)
def getListsOfTrialIDs(df_animalSession):
    ## get trials for both stimuli
    df_anmlSessStimA = df_animalSession[df_animalSession['stimulus'] == stimA]
    df_anmlSessStimB = df_animalSession[df_animalSession['stimulus'] == stimB]
    print(stimA,stimB)

    ## get lists of trial numbers of each stimuli's presentations 
    trials_stimA = np.unique(df_anmlSessStimA['trial'].tolist())
    trials_stimB = np.unique(df_anmlSessStimB['trial'].tolist())
    print("trial IDs for each stimulus type",trials_stimA,trials_stimB)
    print(trials_stimA,trials_stimB)
    return (trials_stimA,trials_stimB)

## pass in a data frame with only a single animal and session    
def getNumTimePtsPerTrial(df_animalSession, trials_stimA, trials_stimB):

    #### get number of timePts in each trial selected above 
    ## (1 to 3 presentations of the same stimuli exist per session in Prabhat's data)
    numTimePtsPerTrial = np.empty((2,max(len(trials_stimA),len(trials_stimB))))
    numTimePtsPerTrial[:] = np.nan
    stimInd = 0;
    for thisStimTypeTrialNums in [trials_stimA, trials_stimB]:
        trialInd = 0
        for trial in thisStimTypeTrialNums:
            inds_thisTrial = (df_animalSession['trial']==trial)
            tmp_df_thisTrial = df_animalSession[inds_thisTrial] # gives all time points for all neurons
            numNeurons = getNumNeurons(tmp_df_thisTrial) 
            numTimePtsPerTrial[stimInd,trialInd] = np.sum(inds_thisTrial)/numNeurons 
            trialInd += 1
        stimInd += 1
    print(numTimePtsPerTrial) # rows are for stimuli type; cols are presentation of that stimulus
    return numTimePtsPerTrial

## test candidate comparisons based on whether the number of trials per session and approximate number of timePts match
def areNumTrialsPerStimulusEqual(numTimePtsPerTrial):
    
    ## no trials of either type --> discard this comparison for this animal/session   
    if np.all(np.isnan(numTimePtsPerTrial)):
        print("DISCARDED: neither stimulus type were found for this animal and session")
        return False  # skip to next session (WORK: handle this)
        
    ## different numbers of trials per stimuli/session --> discard this comparison for this animal/session 
    elif np.any(np.isnan(numTimePtsPerTrial)): 
        print("DISCARDED: mismatching numbers of trials per stimulus type for this animal/session")
        return False # skip to next session (WORK: handle this)

    ## FULFILLED here: condition that allows analysis to proceed to attempted data
    elif not np.any(np.isnan(numTimePtsPerTrial)): 
        print("trial numbers match")
    else:
        raise RuntimeError('unexpected trial comparison occurred')
        return False
    
    print("checking approx num of time points")

## input argument generated from getNumTimePtsPerTrial
def areNumTimePtsPerTrialSimilar(numTimePtsPerTrial):
    minTPs, maxTPs, meanTPs, stdTPs = timePtStats(numTimePtsPerTrial)             
    if (np.abs(minTPs-meanTPs) > (threshTPs_stdFromMean * np.abs(meanTPs-stdTPs))) \
        or (np.abs(maxTPs-meanTPs) > (threshTPs_stdFromMean * np.abs(meanTPs-stdTPs))): 
        print("DISCARDED: variance in trial length is above the user's threshold")
        return False # skip to next session (WORK: handle this)

    ### passed all criteria if it made it this far
    return True

## input argument created by getNumTimePtsPerTrial function
def timePtStats(numTimePtsPerTrial):
    minTPs = int(np.amin(numTimePtsPerTrial))
    maxTPs = int(np.amax(numTimePtsPerTrial))
    meanTPs = np.mean(numTimePtsPerTrial)
    stdTPs = np.std(numTimePtsPerTrial)
    print('min', minTPs)
    print('max', maxTPs)
    print('std', stdTPs)
    print('mean',meanTPs)
    print('|min-mean|=',np.abs(minTPs-meanTPs))
    print('|max-mean|=',np.abs(maxTPs-meanTPs))
    print('|mean-std|=',np.abs(meanTPs-stdTPs))
    print('thresh * |mean-std|=',(threshTPs_stdFromMean * np.abs(meanTPs-stdTPs)))
    return minTPs, maxTPs, meanTPs, stdTPs

In [2]:
################ concatenate all .csv files exported from matlab into single pandas dataframe df

# tidy csv file and dir (use makeTidy_Anderson.m to convert Ann's structure to csv)
tidyDataDir = '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/'
tidyDataFileTemplate = 'mouse'
tidyDataFileExt = '.csv'

# get all input files you want to add to the same dataset
dataFiles = np.sort(glob.glob(\
    "/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse*.csv"))
print("data files: \n", dataFiles)


print("\n loading and appending to prior pandas data frame")
dataLst = []
for file in dataFiles:
    print(file)
    dataLst.append(loadTidy(file))
df = pd.concat(dataLst)

print('finished loading')

############# ALL DATA STORED HERE IN DF
    

data files: 
 [ '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse1.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse2.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse3.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse4.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse5.csv']

 loading and appending to prior pandas data frame
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse1.csv
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse2.csv
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse3.csv
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAn

In [3]:
## survey/search data to prepare for split operation
metaStrs = [['dates','date'],['animals','animal'],['sessions','session'],['maxTrials','trial'],['stimuli','stimulus']]

print("searching over entire data set to get range of various IDs for data (used in subsequent loops)") 

## this dictionary holds useful info regarding the range of inputs to loop/search over subsequently
metaDct = {}
for a,b in metaStrs:
    print(a,b)
    metaDct[a] = np.unique(df[b].tolist())
print(metaDct)


searching over entire data set to get range of various IDs for data (used in subsequent loops)
dates date
animals animal
sessions session
maxTrials trial
stimuli stimulus
{'dates': array(['2017_05_00'], 
      dtype='<U10'), 'animals': array([1, 3, 4, 5, 7]), 'sessions': array([1, 2, 3]), 'maxTrials': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21]), 'stimuli': array(['USS', 'baseline', 'female', 'male', 'mineral oil odor',
       'peanut odor', 'pred odor', 'rat', 'tone', 'toy'], 
      dtype='<U16')}


In [4]:
######### USER PARAMETERS #########
threshTPs_stdFromMean = 1

In [None]:
# get all pairs of stimuli
stimCmbTpl = tuple(combinations(metaDct['stimuli'],2)) 

######### MAIN LOOP ##########
totalNumComparisons = 0;
for (stimA, stimB) in stimCmbTpl:
    print((stimA,stimB))
    
    ## get all data for both trial types
    indsBoth = (df['stimulus']==stimA) | (df['stimulus']==stimB)
    df_bothStimuli = df[indsBoth]

    #### select data by animals and sessions
    for animal in metaDct['animals']:
        print('animal: ', animal)
        for session in metaDct['sessions']:
            print('session:', session)
            
            ## return subselection of data where the same neurons were recorded
            inds_animalSession = (df_bothStimuli['animal'] == animal) & (df_bothStimuli['session'] == session)
            df_animalSession = df_bothStimuli[inds_animalSession]
            
            # get trial IDs matching 
            trials_stimA, trials_stimB = getListsOfTrialIDs(df_animalSession)
            
            #### skip this comparison if the data don't match in number of trials or approx number of timePts
            numTimePtsPerTrial = getNumTimePtsPerTrial(df_animalSession,trials_stimA,trials_stimB)
            if areNumTrialsPerStimulusEqual(numTimePtsPerTrial)==False:
                break            
            
            ## truncate longer trials to shortest trial and save to new df
            minTPs, maxTPs, meanTPs, stdTPs = timePtStats(numTimePtsPerTrial)
            truncLst = []
            for trial in np.concatenate((trials_stimA,trials_stimB)):
                tmp_inds_trunc = (df_animalSession['trial']==trial) & (df_animalSession['timePt'] < minTPs)
                tmp_df_trunc = df_animalSession[tmp_inds_trunc]
                truncLst.append(tmp_df_trunc)
            df_trunc = pd.concat(truncLst)
                    
            ## concatenate same cells 
            NUM_ROWS_INITIALIZE = 15 # this value shouldn't matter since it should expand if needed 
            neuronArr_anmlSess = np.empty((NUM_ROWS_INITIALIZE,minTPs-1)) # -1 for 0 indexing
            trials = np.unique(df_trunc['trial'].tolist())
            for trial in trials:
                neurons = np.unique(df_trunc['neuronID'].tolist())
                print("\n appending same neurons in trial: ", trial)
                print("num neurons in this trial: ",len(neurons))
                
                tmp_sameStimNeuronsArr = np.empty((len(neurons),minTPs-1)) # for 0 indexing
                for neuron in neurons:
                    tmp_neuronInds = (df_trunc['trial']==trial) & (df_trunc['neuronID']==neuron)
                    tmp_neuronSeries = df_trunc.loc[tmp_neuronInds,'CaSignal']
                    tmp_neuronVec = tmp_neuronSeries.as_matrix()
                    
                    tmp_sameStimNeuronsArr[neuron-1,:] = tmp_neuronVec 
   
            totalNumComparisons += 1    
            print('\n')
        print('########\n')

print('total number of comparisons: ', totalNumComparisons)

('USS', 'baseline')
animal:  1
session: 1
USS baseline
trial IDs for each stimulus type [ 5 10 16] [ 1  8 15]
[ 5 10 16] [ 1  8 15]
[[ 1117.  1116.  1117.]
 [  370.   363.  1476.]]
trial numbers match
checking approx num of time points
min 363
max 1476
std 415.865663406
mean 926.5
|min-mean|= 563.5
|max-mean|= 549.5
|mean-std|= 510.634336594
thresh * |mean-std|= 510.634336594
trial:  1

 appending same neurons in trial:  1
num neurons in this trial:  24
trial:  5

 appending same neurons in trial:  5
num neurons in this trial:  24
trial:  8

 appending same neurons in trial:  8
num neurons in this trial:  24
trial:  10

 appending same neurons in trial:  10
num neurons in this trial:  24
trial:  15

 appending same neurons in trial:  15
num neurons in this trial:  24
trial:  16

 appending same neurons in trial:  16
num neurons in this trial:  24


session: 2
USS baseline
trial IDs for each stimulus type [ 4 11 21] [ 1  7 15]
[ 4 11 21] [ 1  7 15]
[[ 1015.  1329.  1214.]
 [  689.   681

[[ 1267.  1127.  1123.]
 [ 1114.  1116.  1115.]]
trial numbers match
checking approx num of time points
min 1114
max 1267
std 55.3524063514
mean 1143.66666667
|min-mean|= 29.6666666667
|max-mean|= 123.333333333
|mean-std|= 1088.31426032
thresh * |mean-std|= 1088.31426032
trial:  4

 appending same neurons in trial:  4
num neurons in this trial:  30
trial:  7

 appending same neurons in trial:  7
num neurons in this trial:  30
trial:  9

 appending same neurons in trial:  9
num neurons in this trial:  30
trial:  11

 appending same neurons in trial:  11
num neurons in this trial:  30
trial:  17

 appending same neurons in trial:  17
num neurons in this trial:  30
trial:  19

 appending same neurons in trial:  19
num neurons in this trial:  30


session: 2
USS female
trial IDs for each stimulus type [ 6 12 16] [ 3 13 17]
[ 6 12 16] [ 3 13 17]
[[  918.   894.  1068.]
 [  893.   941.  1015.]]
trial numbers match
checking approx num of time points
min 893
max 1068
std 65.1905327142
mean 954



########

animal:  4
session: 1
USS male
trial IDs for each stimulus type [ 4 11] [ 6 14]
[ 4 11] [ 6 14]
[[ 1137.  1113.]
 [ 1120.  1116.]]
trial numbers match
checking approx num of time points
min 1113
max 1137
std 9.2870878105
mean 1121.5
|min-mean|= 8.5
|max-mean|= 15.5
|mean-std|= 1112.21291219
thresh * |mean-std|= 1112.21291219
trial:  4

 appending same neurons in trial:  4
num neurons in this trial:  56
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  56
trial:  11

 appending same neurons in trial:  11
num neurons in this trial:  56
trial:  14

 appending same neurons in trial:  14
num neurons in this trial:  56


session: 2
USS male
trial IDs for each stimulus type [5 9] [ 3 11]
[5 9] [ 3 11]
[[ 1133.  1124.]
 [ 1125.  1114.]]
trial numbers match
checking approx num of time points
min 1114
max 1133
std 6.74536878162
mean 1124.0
|min-mean|= 10.0
|max-mean|= 9.0
|mean-std|= 1117.25463122
thresh * |mean-std|= 1117.25463122
trial:  3

 appending same

trial:  4

 appending same neurons in trial:  4
num neurons in this trial:  36
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  36
trial:  9

 appending same neurons in trial:  9
num neurons in this trial:  36
trial:  15

 appending same neurons in trial:  15
num neurons in this trial:  36


session: 2
USS mineral oil odor
trial IDs for each stimulus type [3] [2]
[3] [2]
[[ 1251.]
 [ 1121.]]
trial numbers match
checking approx num of time points
min 1121
max 1251
std 65.0
mean 1186.0
|min-mean|= 65.0
|max-mean|= 65.0
|mean-std|= 1121.0
thresh * |mean-std|= 1121.0
trial:  2

 appending same neurons in trial:  2
num neurons in this trial:  35
trial:  3

 appending same neurons in trial:  3
num neurons in this trial:  35


session: 3
USS mineral oil odor
trial IDs for each stimulus type [ 3 14] [ 4 13]
[ 3 14] [ 4 13]
[[ 1115.  1120.]
 [ 1115.  1113.]]
trial numbers match
checking approx num of time points
min 1113
max 1120
std 2.5860201082
mean 1115.75
|min-mea

trial:  3

 appending same neurons in trial:  3
num neurons in this trial:  54
trial:  11

 appending same neurons in trial:  11
num neurons in this trial:  54
trial:  12

 appending same neurons in trial:  12
num neurons in this trial:  54


########

('USS', 'pred odor')
animal:  1
session: 1
USS pred odor
trial IDs for each stimulus type [ 5 10 16] []
[ 5 10 16] []
[[ 1117.  1116.  1117.]
 [   nan    nan    nan]]
DISCARDED: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  3
session: 1
USS pred odor
trial IDs for each stimulus type [ 4 11 17] []
[ 4 11 17] []
[[ 1267.  1127.  1123.]
 [   nan    nan    nan]]
DISCARDED: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  4
session: 1
USS pred odor
trial IDs for each stimulus type [ 4 11] [ 5 15]
[ 4 11] [ 5 15]
[[ 1137.  1113.]
 [ 1118.  1112.]]
trial numbers match
checking approx num of time points
min 1112
max 1137
std 10.0747208398
mean 1120.0
|min-mean

trial:  4

 appending same neurons in trial:  4
num neurons in this trial:  30
trial:  11

 appending same neurons in trial:  11
num neurons in this trial:  30
trial:  14

 appending same neurons in trial:  14
num neurons in this trial:  30
trial:  17

 appending same neurons in trial:  17
num neurons in this trial:  30
trial:  18

 appending same neurons in trial:  18
num neurons in this trial:  30


session: 2
USS rat
trial IDs for each stimulus type [ 6 12 16] [ 4  9 18]
[ 6 12 16] [ 4  9 18]
[[  918.   894.  1068.]
 [ 1005.  1111.  1102.]]
trial numbers match
checking approx num of time points
min 894
max 1111
std 85.3749898324
mean 1016.33333333
|min-mean|= 122.333333333
|max-mean|= 94.6666666667
|mean-std|= 930.958343501
thresh * |mean-std|= 930.958343501
trial:  4

 appending same neurons in trial:  4
num neurons in this trial:  35
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  35
trial:  9

 appending same neurons in trial:  9
num neurons in this tr

trial:  14

 appending same neurons in trial:  14
num neurons in this trial:  25
trial:  17

 appending same neurons in trial:  17
num neurons in this trial:  25
trial:  21

 appending same neurons in trial:  21
num neurons in this trial:  25


session: 3
USS tone
trial IDs for each stimulus type [ 6 13 16] [ 5  9 18]
[ 6 13 16] [ 5  9 18]
[[ 1139.  1140.  1232.]
 [ 1118.  1135.  1225.]]
trial numbers match
checking approx num of time points
min 1118
max 1232
std 45.6414163769
mean 1164.83333333
|min-mean|= 46.8333333333
|max-mean|= 67.1666666667
|mean-std|= 1119.19191696
thresh * |mean-std|= 1119.19191696
trial:  5

 appending same neurons in trial:  5
num neurons in this trial:  21
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  21
trial:  9

 appending same neurons in trial:  9
num neurons in this trial:  21
trial:  13

 appending same neurons in trial:  13
num neurons in this trial:  21
trial:  16

 appending same neurons in trial:  16
num neurons in thi

trial:  12

 appending same neurons in trial:  12
num neurons in this trial:  40
trial:  15

 appending same neurons in trial:  15
num neurons in this trial:  40


session: 3
USS tone
trial IDs for each stimulus type [ 1 12] [ 6 10]
[ 1 12] [ 6 10]
[[ 1015.  1053.]
 [ 1015.  1017.]]
trial numbers match
checking approx num of time points
min 1015
max 1053
std 16.1864140562
mean 1025.0
|min-mean|= 10.0
|max-mean|= 28.0
|mean-std|= 1008.81358594
thresh * |mean-std|= 1008.81358594
trial:  1

 appending same neurons in trial:  1
num neurons in this trial:  54
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  54
trial:  10

 appending same neurons in trial:  10
num neurons in this trial:  54
trial:  12

 appending same neurons in trial:  12
num neurons in this trial:  54


########

('USS', 'toy')
animal:  1
session: 1
USS toy
trial IDs for each stimulus type [ 5 10 16] [ 7  9 21]
[ 5 10 16] [ 7  9 21]
[[ 1117.  1116.  1117.]
 [ 1123.  1122.  1122.]]
trial numbers m



session: 2
USS toy
trial IDs for each stimulus type [3] [4]
[3] [4]
[[ 1251.]
 [ 1113.]]
trial numbers match
checking approx num of time points
min 1113
max 1251
std 69.0
mean 1182.0
|min-mean|= 69.0
|max-mean|= 69.0
|mean-std|= 1113.0
thresh * |mean-std|= 1113.0
trial:  3

 appending same neurons in trial:  3
num neurons in this trial:  35
trial:  4

 appending same neurons in trial:  4
num neurons in this trial:  35


session: 3
USS toy
trial IDs for each stimulus type [ 3 14] [ 6 12]
[ 3 14] [ 6 12]
[[ 1115.  1120.]
 [ 1115.  1118.]]
trial numbers match
checking approx num of time points
min 1115
max 1120
std 2.12132034356
mean 1117.0
|min-mean|= 2.0
|max-mean|= 3.0
|mean-std|= 1114.87867966
thresh * |mean-std|= 1114.87867966
trial:  3

 appending same neurons in trial:  3
num neurons in this trial:  20
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  20
trial:  12

 appending same neurons in trial:  12
num neurons in this trial:  20
trial:  14

 appendi

animal:  1
session: 1
baseline male
trial IDs for each stimulus type [ 1  8 15] [ 6 13 17]
[ 1  8 15] [ 6 13 17]
[[  370.   363.  1476.]
 [ 1118.  1125.  1116.]]
trial numbers match
checking approx num of time points
min 363
max 1476
std 416.562520318
mean 928.0
|min-mean|= 565.0
|max-mean|= 548.0
|mean-std|= 511.437479682
thresh * |mean-std|= 511.437479682
trial:  1

 appending same neurons in trial:  1
num neurons in this trial:  24
trial:  6

 appending same neurons in trial:  6
num neurons in this trial:  24
trial:  8

 appending same neurons in trial:  8
num neurons in this trial:  24
trial:  13

 appending same neurons in trial:  13
num neurons in this trial:  24
trial:  15

 appending same neurons in trial:  15
num neurons in this trial:  24
trial:  17

 appending same neurons in trial:  17
num neurons in this trial:  24


session: 2
baseline male
trial IDs for each stimulus type [ 1  7 15] [ 2 13 18]
[ 1  7 15] [ 2 13 18]
[[  689.   681.   685.]
 [ 1166.  1113.  1167.]]
trial n

In [18]:
test = [1,2,3,4]
print(len(test))

4


In [None]:
#                     tmp_trialInds = (data['trial']==trial) 
#                     print(np.sum(tmp_trialInds))
#                     tmp_mat = data[tmp_trialInds].as_matrix()
#                     print(tmp_mat)
#                     print(type(tmp_neuronConcatArr))
#                     np.append(tmp_neuronConcatArr,tmp_mat)
#                     print(tmp_neuronConcatArr)

In [None]:
testInds = (df['animal']==1) & (df['session']==1) & (df['stimulus']=='USS') & (df['trial']==5)
print(np.sum(testInds))

In [None]:
np.unique(df_anmlSessStimA[df_anmlSessStimA['trial'] == trialA].tolist())

In [None]:

# gb = df.groupby(['stimulusType']).get_group('rat')

stimGrouped = df.groupby('stimulusType')
ratGrpd = stimGrouped.get_group('rat')

print(ratGrpd)

print('reached end of file')

gb = df.groupby('stimulusType')

df_means = gb.apply(np.mean)
df_means

for stimType in gb

In [None]:
ratData = df.loc[df['stimulusType'] == 'rat',:]
ussData = df.loc[df['stimulusType'] == 'USS',:]

print(ussData)

# slicing
df_big_force = df.loc[df['impact force (mN)'] > 1000, :]

df = pd.concat((df_low, df_high), axis=1)

# Specify indices we want (note parentheses holding each Boolean)
inds = (df['food density'] == 'high') & (df['cross-sectional area (sq micron)'] > 2000)

# Pull out areas
df.loc[inds, 'cross-sectional area (sq micron)']

df.corr()


# Rename the impact force column
df = df.rename(columns={'impact force (mN)': 'impf'})



# Write out DataFrame
df.to_csv('xa_combined.csv', index=False)