In [2]:
# load tidy dataset

import pandas as pd
import os.path
import numpy as np
from itertools import combinations
import glob

#import numpy as np
#import loadTidyCSV.py

def loadTidy(tidyData): 
    assert os.path.isfile(tidyData), "desired file does not exist" 
    df = pd.read_csv(tidyData, header = None, names = ["date", "animal", "session", "trial", "stimulus", "neuronID", "timePt", "CaSignal"])
    return df 

# tidy csv file and dir (use makeTidy_Anderson.m to convert Ann's structure to csv)
tidyDataDir = '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/'
tidyDataFileTemplate = 'mouse'
tidyDataFileExt = '.csv'

# get all input files you want to add to the same dataset
dataFiles = np.sort(glob.glob(\
    "/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse*.csv"))
print("data files: \n", dataFiles)


print("\n loading and appending to prior pandas data frame")
dataLst = []
for file in dataFiles:
    print(file)
    dataLst.append(loadTidy(file))
df = pd.concat(dataLst)

print('finished loading')

############# ALL DATA STORED HERE IN DF
    

data files: 
 [ '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse1.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse2.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse3.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse4.csv'
 '/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse5.csv']

 loading and appending to prior pandas data frame
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse1.csv
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse2.csv
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAnalysis/data/2_tidyFormat/mouse3.csv
/home/orthogonull/a_MHR/a_research/a_gitResearch/git_ignored/imagingAn

In [3]:
## survey/search data to prepare for split operation
metaStrs = [['dates','date'],['animals','animal'],['sessions','session'],['maxTrials','trial'],['stimuli','stimulus']]

## this dictionary holds useful info regarding the range of inputs to loop/search over subsequently
metaDct = {}
for a,b in metaStrs:
    print(a,b)
    metaDct[a] = np.unique(df[b].tolist())
print(metaDct)


dates date
animals animal
sessions session
maxTrials trial
stimuli stimulus
{'dates': array(['2017_05_00'], 
      dtype='<U10'), 'animals': array([1, 3, 4, 5, 7]), 'sessions': array([1, 2, 3]), 'maxTrials': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21]), 'stimuli': array(['USS', 'baseline', 'female', 'male', 'mineral oil odor',
       'peanut odor', 'pred odor', 'rat', 'tone', 'toy'], 
      dtype='<U16')}


In [24]:
######### PARAMETERS #########
threshTPs_stdFromMean = 1

In [44]:
### get all pairs of stimuli
stimCmbTpl = tuple(combinations(metaDct['stimuli'],2)) 

######### MAIN LOOP ##########
totalNumComparisons = 0;
for (stimA, stimB) in stimCmbTpl:
    
    ## get all data for both trial types
    indsBoth = (df['stimulus']==stimA) | (df['stimulus']==stimB)
    df_bothStimuli = df[indsBoth]

    
    #### select data by animals and sessions
    for animal in metaDct['animals']:
        print('animal: ', animal)
        for session in metaDct['sessions']:
            print('session:', session)
            inds_animalSession = (df_bothStimuli['animal'] == animal) & (df_bothStimuli['session'] == session)
            df_animalSession = df_bothStimuli[inds_animalSession]
            
            ## get both stim
            df_anmlSessStimA = df_animalSession[df_animalSession['stimulus'] == stimA]
            df_anmlSessStimB = df_animalSession[df_animalSession['stimulus'] == stimB]
            print(stimA,stimB)
            
            ## get lists of trial numbers of each stimuli's presentations 
            trials_stimA = np.unique(df_anmlSessStimA['trial'].tolist())
            trials_stimB = np.unique(df_anmlSessStimB['trial'].tolist())
            print(trials_stimA,trials_stimB)
        
            #### get number of timePts in each trial selected above 
            ## (1 to 3 presentations of the same stimuli exist per session in Prabhat's data)
            numTimePtsPerTrial = np.empty((2,max(len(trials_stimA),len(trials_stimB))))
            numTimePtsPerTrial[:] = np.nan
            stimInd = 0;
            for thisStimTypeTrialNums in [trials_stimA, trials_stimB]:
                trialInd = 0
                for trial in thisStimTypeTrialNums:
                    inds_thisTrial = (df_animalSession['trial']==trial)
                    numTimePtsPerTrial[stimInd,trialInd] = np.sum(inds_thisTrial)
                    trialInd += 1
                stimInd += 1
            print(numTimePtsPerTrial) # rows are for stimuli type; cols are presentation of that stimulus            
            
            #### test and sort candidate comparisons based on whether the number of trials per session 
            ##      and approximate number of timePts match
            
            ## no trials of either type --> discard this comparison for this animal/session   
            if np.all(np.isnan(numTimePtsPerTrial)):
                print("discarded: neither stimulus type were found for this animal and session")
                break # skip to next session (WORK: handle this)
                
            ## different numbers of trials per stimuli/session --> discard this comparison for this animal/session 
            elif np.any(np.isnan(numTimePtsPerTrial)): 
                print("discarded: mismatching numbers of trials per stimulus type for this animal/session")
                break # skip to next session (WORK: handle this)
            
            ## FULFILLED here: condition that allows analysis to proceed to attempted data
            elif not np.any(np.isnan(numTimePtsPerTrial)): 
                print("trial numbers match")
            else:
                raise RuntimeError('unexpected trial comparison occurred')
            
            print("checking approx num of time points")
            
            #### discard this comparison for this animal/session if number of time points are too dissimilar
            minTPs = np.min(numTimePtsPerTrial) 
            maxTPs = np.max(numTimePtsPerTrial)
            meanTPs = np.mean(numTimePtsPerTrial)
            stdTPs = np.std(numTimePtsPerTrial)
            print('min', minTPs)
            print('max', maxTPs)
            print('std', stdTPs)
            print('mean',meanTPs)
            if (np.abs(minTPs-meanTPs) > (threshTPs_stdFromMean * np.abs(meanTPs-stdTPs))) \
                or (np.abs(maxTPs-meanTPs) > (threshTPs_stdFromMean * np.abs(meanTPs-stdTPs))): 
                print("discarded this comparison because variance in trial length is above the user's threshold")
                break # skip to next session (WORK: handle this)
            
            ########## data that makes it this far will be truncated to shortest trial prepared for svm
            
            ## truncate longer trials to shortest trial and save to new df
            truncLst = []
            for trial in np.concatenate((trials_stimA,trials_stimB)):
                tmp_inds_trunc = (df_animalSession['trial']==trial) & (df_animalSession['timePt'] < minTPs)
                tmp_df_trunc = df_animalSession[tmp_inds_trunc]
                truncLst.append(tmp_df_trunc)
            df_trunc = pd.concat(truncLst)
                    
            ## concatenate same cells 
            for data in [df_anmlSessStimA, df_anmlSessStimB]:
                numNeurons = np.unique(data['neuronID'].tolist())
                print(numNeurons)
                
            
            totalNumComparisons += 1    
            print('\n')
        print('########\n')

print('total number of comparisons: ', totalNumComparisons)

animal:  1
session: 1
USS baseline
[ 5 10 16] [ 1  8 15]
[[ 26808.  26784.  26808.]
 [  8880.   8712.  35424.]]
trial numbers match
checking approx num of time points
min 8712.0
max 35424.0
std 9980.77592174
mean 22236.0
discarded this comparison because variance in trial length is above the user's threshold
########

animal:  3
session: 1
USS baseline
[ 4 11 17] [ 1  8 15]
[[ 38010.  33810.  33690.]
 [ 20220.  20460.  20340.]]
trial numbers match
checking approx num of time points
min 20220.0
max 38010.0
std 7550.14072717
mean 27755.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30]


session: 2
USS baseline
[ 6 12 16] [ 1  8 15]
[[ 32130.  31290.  37380.]
 [ 23835.  25620.  23870.]]
trial numbers match
checking approx num of time points
min 23835.0
max 37380.0
std 4994.68585654
mean 29020.8333333
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 1

USS male
[6 9] [ 8 11]
[[ 41544.  40140.]
 [ 40284.  40716.]]
trial numbers match
checking approx num of time points
min 40140.0
max 41544.0
std 546.782406447
mean 40671.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36]


session: 2
USS male
[3] [7]
[[ 43785.]
 [ 38815.]]
trial numbers match
checking approx num of time points
min 38815.0
max 43785.0
std 2485.0
mean 41300.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35]


session: 3
USS male
[ 3 14] [ 1 16]
[[ 22300.  22400.]
 [ 22380.  22300.]]
trial numbers match
checking approx num of time points
min 22300.0
max 22400.0
std 45.5521678957
mean 22345.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 1

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]


session: 3
USS peanut odor
[ 6 10] [ 8 13]
[[ 52734.  52452.]
 [ 58233.  53251.]]
trial numbers match
checking approx num of time points
min 52452.0
max 58233.0
std 2364.64188621
mean 54167.5
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47]


########

animal:  5
session: 1
USS peanut odor
[6 9] [ 7 13]
[[ 41544.  40140.]
 [ 40572.  41184.]]
trial numbers match
checking approx num of time points
min 40140.0
max 41544.0
std 541.797009959
mean 40860.0
[ 1  

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35]


session: 3
USS rat
[ 3 14 17] [ 5 10 21]
[[ 34646.  36958.  39304.]
 [ 37332.  38896.  34884.]]
trial numbers match
checking approx num of time points
min 34646.0
max 39304.0
std 1781.10103276
mean 37003.3333333
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34]


########

animal:  4
session: 1
USS rat
[ 4 11] [ 2 16]
[[ 63672.  62328.]
 [ 69048.  63448.]]
trial numbers match
checking approx num of time points
min 62328.0
max 69048.0
std 2604.45157375
mean 64624.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 

USS tone
[ 3 14] [7 9]
[[ 22300.  22400.]
 [ 22380.  22540.]]
trial numbers match
checking approx num of time points
min 22300.0
max 22540.0
std 86.458082329
mean 22405.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]


########

animal:  7
session: 1
USS tone
[ 2 14] [ 6 11]
[[ 58710.  59223.]
 [ 57513.  57912.]]
trial numbers match
checking approx num of time points
min 57513.0
max 59223.0
std 667.77634729
mean 58339.5
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 51 52 53 54 55 56 57]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 51 52 53 54 55 56 57]


session: 2
USS tone
[ 8 12] [ 2 15]
[[ 40880.  40720.]
 [ 40920.  36880.]]
trial numbers match
checking approx num of time points
min 36880.0
max 



########

animal:  1
session: 1
baseline female
[ 1  8 15] [ 4 11 20]
[[  8880.   8712.  35424.]
 [ 27024.  29880.  26688.]]
trial numbers match
checking approx num of time points
min 8712.0
max 35424.0
std 10284.9606708
mean 22768.0
discarded this comparison because variance in trial length is above the user's threshold
########

animal:  3
session: 1
baseline female
[ 1  8 15] [ 7  9 19]
[[ 20220.  20460.  20340.]
 [ 33420.  33480.  33450.]]
trial numbers match
checking approx num of time points
min 20220.0
max 33480.0
std 6555.38900448
mean 26895.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30]


session: 2
baseline female
[ 1  8 15] [ 3 13 17]
[[ 23835.  25620.  23870.]
 [ 31255.  32935.  35525.]]
trial numbers match
checking approx num of time points
min 23835.0
max 35525.0
std 4608.14134621
mean 28840.0
[ 1  2  3  4  5  6  7  8  9 10 11 12 1

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34]


########

animal:  4
session: 1
baseline rat
[] [ 2 16]
[[    nan     nan]
 [ 69048.  63448.]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  5
session: 1
baseline rat
[] [ 2 16]
[[    nan     nan]
 [ 42156.  40356.]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  7
session: 1
baseline rat
[] [ 4 15]
[[    nan     nan]
 [ 58653.  58197.]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  1
session: 1
baseline tone
[ 1  8 15] [ 2 12 19]
[[  8880.   8712.  35424.]
 [ 27360.  26928.  27168.]]
trial numbers match
checking approx num of time points
min 8712.0
max 35424.0
std 10063.2471897
mean 22412.0
discarded this compari

female mineral oil odor
[] [ 3 16]
[[    nan     nan]
 [ 57684.  58083.]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  1
session: 1
female peanut odor
[ 4 11 20] []
[[ 27024.  29880.  26688.]
 [    nan     nan     nan]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  3
session: 1
female peanut odor
[ 7  9 19] []
[[ 33420.  33480.  33450.]
 [    nan     nan     nan]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  4
session: 1
female peanut odor
[] [7 9]
[[    nan     nan]
 [ 65576.  64512.]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  5
session: 1
female peanut odor
[] [ 7 13]
[[    nan     nan]
 [ 40572.  41184.]]
discarded: mismatching numbers of trials per stimulus type for this animal/session
########

animal:  7
session: 1
female peanut odor
[] [ 8 13]
[[    nan

KeyboardInterrupt: 

[ 6 10  5 15]


In [19]:
testInds = (df['animal']==1) & (df['session']==1) & (df['stimulus']=='USS') & (df['trial']==5)
print(np.sum(testInds))

26808


In [None]:
np.unique(df_anmlSessStimA[df_anmlSessStimA['trial'] == trialA].tolist())

In [17]:

# gb = df.groupby(['stimulusType']).get_group('rat')

stimGrouped = df.groupby('stimulusType')
ratGrpd = stimGrouped.get_group('rat')

print(ratGrpd)

print('reached end of file')

gb = df.groupby('stimulusType')

df_means = gb.apply(np.mean)
df_means

for stimType in gb

               date  animalNum  sessionNum  trialNum stimulusType  neuronID  \
36240    2017_05_00          1           1         3          rat         1   
36241    2017_05_00          1           1         3          rat         1   
36242    2017_05_00          1           1         3          rat         1   
36243    2017_05_00          1           1         3          rat         1   
36244    2017_05_00          1           1         3          rat         1   
36245    2017_05_00          1           1         3          rat         1   
36246    2017_05_00          1           1         3          rat         1   
36247    2017_05_00          1           1         3          rat         1   
36248    2017_05_00          1           1         3          rat         1   
36249    2017_05_00          1           1         3          rat         1   
36250    2017_05_00          1           1         3          rat         1   
36251    2017_05_00          1           1         3

In [28]:
ratData = df.loc[df['stimulusType'] == 'rat',:]
ussData = df.loc[df['stimulusType'] == 'USS',:]

print(ussData)

# slicing
df_big_force = df.loc[df['impact force (mN)'] > 1000, :]

df = pd.concat((df_low, df_high), axis=1)

# Specify indices we want (note parentheses holding each Boolean)
inds = (df['food density'] == 'high') & (df['cross-sectional area (sq micron)'] > 2000)

# Pull out areas
df.loc[inds, 'cross-sectional area (sq micron)']

df.corr()


# Rename the impact force column
df = df.rename(columns={'impact force (mN)': 'impf'})



# Write out DataFrame
df.to_csv('xa_combined.csv', index=False)

KeyError: 'stimulusType'