# Summary analysis: Skype vs fixed trajectory
## 4 experiments, 2 animals each responding to black disc moving on various paths

Link to [trajectories](../trajectories for closed loop vs open loop_01.ipynb) used for stimulus movement.

5 episodes, 10 minutes each:

1. closed loop (skype) interaction (not feeding coordinates)
2. open loop replay of previously recorded animal path
3. (2) but moving at constant speed (re-sampled path)
4. open loop circular path, constant speed
5. (4) but speed values taken from real animal path

generate each episode separately and then stitch together

The pattern is repeated 5-20 times per animal

**Result: Animals are significantly more attracted to a disk moving at realistic speed distribution compared to constant speed.**

In [None]:
%config InteractiveShellApp.pylab_import_all = False
%matplotlib inline
%pylab inline
%reload_ext autoreload
%autoreload 2

import sys
import os
import fnmatch

import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
import glob
from datetime import datetime
from scipy import stats

if 'startDirMaster' not in locals():
    startDirMaster=os.getcwd()

propsFn=startDirMaster+'\\props.csv'

props=pd.read_csv(propsFn, header=None, index_col=0, squeeze=True,delim_whitespace=True).to_dict()

base=props['BaseDir']
expFile=props['allExpFn']

RawDataDir = os.path.join(base,props['RawDataDir'])+'\\'
ProcessingDir = os.path.join(base,props['ProcessingDir'])+'\\Fig2B_boutPilot\\'
outputDir = os.path.join(base,props['outputDir'])+'\\'

if not os.path.isdir(ProcessingDir):
    os.makedirs(ProcessingDir)
if not os.path.isdir(outputDir):
    os.makedirs(outputDir)

os.chdir('..\\')
import functions.matrixUtilities_joh as mu
import matplotlib.pyplot as plt
import models.experiment as xp
import models.experiment_set as es
import functions.paperFigureProps as pfp
pfp.paper()
inToCm=2.54


In [None]:
info=pd.read_csv(expFile, sep=',')
info=info[info.stimulusProtocol=='pilot']
info

In [None]:

# collect meta information and save to new csv file for batch processing

#aviPath=[]
posPath=[]
PLPath=[]
expTimeAll=[]
bdGroupAll=[]
bgAll=[]
setAll=[]
inDishAll=[]

i=0
for index,row in info.iterrows():
    startDir=RawDataDir+row.path+'\\'
    #startDir='D:\\data\\b\\2017\\'+row.path+'\\'
    #if not os.path.isdir(startDir):
    #    startDir='E:\\b\\2017\\'+row.path+'\\'

    j=0
    for root, dirnames, filenames in os.walk(startDir):
        for filename in fnmatch.filter(filenames, '*PositionTxt_*.txt'):
            posPath.append(os.path.join(root, filename))
            try:
                PLPath.append(glob.glob(root+'PL*.txt')[0])
            except:
                PLPath.append([])
            bdGroupAll.append(row.bdGroup)
            bgAll.append(row.bd)
            currTime=datetime.strptime(filename[-23:-4], '%Y-%m-%dT%H_%M_%S')
            
            if j==0:
                setStart=currTime
                print('newSet')
            j += 1
            #print(j,currTime,setStart,(currTime-setStart).seconds/60.)
            indish=((currTime-setStart).seconds/60. ) + 10
            inDishAll.append(np.round(indish))
            expTimeAll.append(currTime)
            setAll.append(i)
    i+=1
    

info=pd.DataFrame({'txtPath': posPath})
info['epiDur']=10
info['bd']=bgAll
info['bdGroup']=bdGroupAll
info['pairList']=PLPath

info['epiDur'] = 10      # duration of individual episodes (default: 5 minutes)
info['episodes'] = -1   # number of episodes to process: -1 to load all episodes (default: -1)
info['inDish'] = inDishAll#np.arange(len(posPath))*120     # time in dish before experiments started (default: 10)
info['arenaDiameter_mm'] = 100 # arena diameter (default: 100 mm)
info['minShift'] = 60 # minimum number of seconds to shift for control IAD
info['episodePLcode'] = 0 # flag if first two characters of episode name encode animal pair matrix (default: 0)
info['recomputeAnimalSize'] = 0 # flag to compute animals size from avi file (takes time, default: 1)
info['SaveNeighborhoodMaps'] = 0 # flag to save neighborhood maps for subsequent analysis (takes time, default: 1)
info['computeLeadership'] = 0 # flag to compute leadership index (takes time, default: 1)
info['ComputeBouts'] = 0 # flag to compute swim bout frequency (takes time, default: 1)
info['set'] = setAll   # experiment set: can label groups of experiments (default: 0)
info['ProcessingDir']=ProcessingDir
info['outputDir']=outputDir
info['allowEpisodeSwitch']=1
info['expTime']=expTimeAll
info['pxPmm']=0
info.loc[info.set<4,'pxPmm']=8
info.loc[info.set>=4,'episodePLcode']=1
info.loc[info.set>=4,'epiDur']=5

csvFile=os.path.join(ProcessingDir,'Fig2bPilot.csv')
info.to_csv(csvFile,encoding='utf-8')

info

In [None]:
def readExperiment(keepData=False):
    tmp=es.experiment_set(csvFile=csvFile)
    if keepData:
        return tmp
    else:
        return 1

expSet=readExperiment(keepData=True)

In [None]:
csvPath = []
for f in [mu.splitall(x)[-1][:-4] for x in info.txtPath]:
    csvPath.append(glob.glob(ProcessingDir+f+'*siSummary*.csv')[0])


df=pd.DataFrame()
i=0
for fn in csvPath:
    print(fn)
    tmp=pd.read_csv(fn,index_col=0,sep=',')
    currSet=tmp.animalSet.values[0]
    tmp['animalSetCont']=i
    tmp['animalIndexCont']=tmp.animalIndex+(currSet*15)
    tmp['animalIndexCont2']=tmp.animalIndex+(i*15)
    
    if tmp.animalSet.values[0]<4:
        tmp=tmp[tmp['animalIndex']==0]
        tmp.animalIndex=tmp['animalIndexCont']+np.mod(i,2)
    #    tmp.animalIndex=np.mod(i,2)
        
        
    df=pd.concat([df,tmp])
    i+=1



#df['episode']='skype'
print('df shape',df.shape)

d=df.time
r=datetime(2017,1,1)
t2=[pd.to_datetime(x).replace(day=1,month=1)for x in df.time]

t3=[(x-r)/pd.Timedelta('1 hour') for x in t2]
df['t2']=t2
df['t3']=t3

df.head()

In [None]:
anList=df.animalIndexCont.unique()
IADsArray=np.zeros((2*anList.shape[0],df.epiNr.unique().shape[0],10))*np.nan
expList=df.animalSetCont.unique()
#expList=np.arange(8)
ai=0
for e in expList:
    dfTmp=df[df.animalSetCont==e]
    anList=dfTmp.animalIndexCont.unique()
    

    for a in anList:
        idx3=dfTmp.animalIndexCont==a
        pl=np.where(idx3)[0]
        print('computing: ',e,a,idx3.sum())
        ep=0
        for p in pl:
            expIAD=np.array(expSet.experiments[e].pair[p].IADs())
            expIADm=np.nanmean(expIAD,axis=1)

            for i in range(9):
                ShiftAttract=(expIADm[i]-expIADm[-1])/expIADm[i]
                IADsArray[ai,ep,i]=ShiftAttract
            ShiftAttract=(expIADm[:-1].mean()-expIADm[-1])/expIADm[:-1].mean()
            IADsArray[ai,ep,9]=ShiftAttract
            ep+=1
        ai+=1
                


In [None]:
IADsMean=IADsArray[:,:,9]
IADsArray=IADsArray[:,:,:9]
IADsArray.shape

In [None]:
np.isfinite(np.nanmean(IADsArray,axis=2)).sum(axis=1)

In [None]:
pfp.paper()
sns.set_palette('viridis',3)

fig, ax = plt.subplots(figsize=(6/inToCm,4.5/inToCm))

rav=IADsMean.ravel()

h=ax.hist(rav[np.isfinite(rav)],bins=20)
ax.set_xlabel('Control attraction in shifted pairs \n 29 minutes of data each')
ax.set_ylabel('Count')
ax.set_xticks([-.4,0,.4])
ax.set_xlim([-.5,.5])
#ax.text(.35,750,'Mean: '+"{:1.3f}".format(np.mean(IADsAll)),color='r')
#ax.text(.35,650,'Std: '+"{:1.3f}".format(np.std(IADsAll)))
#ax.axvline(0,linestyle=':',color='gray')
ax.axvline(np.mean(IADsArray),linestyle=':',color='r')

l,u=stats.t.interval(0.95, len(IADsArray.ravel())-1, loc=np.nanmean(IADsMean), scale=np.nanstd(IADsMean))
ax.axvline(u,linestyle=':',color='k')
ax.axvline(l,linestyle=':',color='k')
#ax.text(.35,550,'CI95: '+"{:0.3f}".format(l)+'-'+"{:1.3f}".format(u))
print(np.nanmean(IADsMean),np.nanstd(IADsMean),l,u)
sns.despine()

In [None]:
CI=np.zeros(100)
CIan=np.ones(100)
for i in range(100):
    CI[i]=np.nanstd(np.nanmean(IADsArray[:,:i+1,:],axis=1).mean(axis=1))
    CIan[i]=np.nanstd(np.nanmean(IADsMean[:,:i+1],axis=1))
plt.plot(CI*2,'.')
plt.plot(2*CIan,'.')

In [None]:
pfp.paper()
sns.set_palette('viridis',3)
fig, ax = plt.subplots(figsize=(6/inToCm,4.5/inToCm))

x=np.arange(1,400/5.)
ax.plot(x*5,
        2*np.nanstd(np.nanmean(IADsArray,axis=2))/np.sqrt(x),
        'r',
        label='Normal distribution',
       linewidth=2)

ax.plot(x[:10]*5,
        CI[:10]*2,
        '.k-',
        label='ShiftPair data',
        markerSize=5,
       alpha=1)

ax.legend()
ax.set_xlabel('Recording duration (Minutes)')
ax.set_ylabel('Attraction CI95 +/-')
ax.set_xlim([0,240])
ax.set_ylim([0,.3])
sns.despine()
ax.set_xticks(np.arange(0,250,60));


In [None]:
df.age.unique()

In [None]:
sns.tsplot(data=df, time="t3",value="si",unit="animalIndexCont2",condition='animalSet',estimator=np.nanmean,interpolate=False,err_style="ci_bars");
plt.xlim([8,22])
plt.ylim([0,.7])

In [None]:
sns.tsplot(data=df[df['episode'].str.contains('skype')], time="inDishTime",value="si",unit="animalIndexCont2",condition='animalSet',estimator=np.nanmean,interpolate=False,err_style="ci_bars");
plt.ylim([0,.7])

In [None]:
epiNames,df['epiNrOriginal']=np.unique(df.episode,return_inverse=True)
print(epiNames)
epiSort1=np.array([10,10,10,10,10,4,3,2,0,1])
epiSort0=np.array([10,10,10,10,10,4,3,2,1,0])
df['epiCorrect']=df.episode
df.loc[df.animalSet==4,'epiCorrect']=epiNames[epiSort0[df.loc[df.animalSet==4,'epiNrOriginal']]]
df.loc[df.animalSet==5,'epiCorrect']=epiNames[epiSort1[df.loc[df.animalSet==5,'epiNrOriginal']]]

In [None]:
idx=(df['inDishTime']<350) & (df['inDishTime']>60)  &(df.animalSet<4)
episodeNames=df['epiCorrect'].unique()
dfDR=df[idx]
tmp=dfDR.groupby(['epiCorrect','animalIndex'],sort=True)['si']
tmp2=tmp.mean().reset_index()

fig, ax = plt.subplots(figsize=(10,10))

plt.axhline(0)
sns.boxplot(x=tmp2['epiCorrect'],y=tmp2['si'],notch=True)
sns.swarmplot(x=tmp2['epiCorrect'],y=tmp2['si'],linewidth=1,edgecolor='gray')
plt.ylim([-.1,.6])

In [None]:
idx=(df['inDishTime']<240) & (df['inDishTime']>60) & (df.animalIndex<14) &(df.animalSet==4)
episodeNames=df['epiCorrect'].unique()
dfDR=df[idx]
tmp=dfDR.groupby(['epiCorrect','animalIndex'],sort=True)['si']
tmp2=tmp.mean().reset_index()

fig, ax = plt.subplots(figsize=(10,10))

plt.axhline(0)
sns.boxplot(x=tmp2['epiCorrect'],y=tmp2['si'],notch=True)
sns.swarmplot(x=tmp2['epiCorrect'],y=tmp2['si'],linewidth=1,edgecolor='gray')
plt.ylim([-.1,.6])

In [None]:
idx=(df['inDishTime']<240) & (df['inDishTime']>60) & (df.animalIndex<14) &(df.animalSet==5)
episodeNames=df['epiCorrect'].unique()
dfDR=df[idx]
tmp=dfDR.groupby(['epiCorrect','animalIndex'],sort=True)['si']
tmp2=tmp.mean().reset_index()

fig, ax = plt.subplots(figsize=(10,10))

plt.axhline(0)
sns.boxplot(x=tmp2['epiCorrect'],y=tmp2['si'],notch=True)
sns.swarmplot(x=tmp2['epiCorrect'],y=tmp2['si'],linewidth=1,edgecolor='gray')
plt.ylim([-.1,.6])

Attraction begins in the second episode and degrades after around 10 hours. The degradation could be habituation, fatigue or oxygen deprivation, these experiments have a lid. Limit subsequent analysis to a window of 50-550 minutes in arena.

## Mean attraction for each episode by day

## Average over all trials per episode (inflated n)

In [None]:
idx=(df['inDishTime']<60*5) & (df['inDishTime']>50)
df[idx].groupby('epiCorrect',sort=False)['si'].mean().plot.bar()
mn=df[idx].groupby('epiCorrect',sort=False)['si'].mean()
sd=df[idx].groupby('epiCorrect',sort=False)['si'].std()
plt.errorbar(range(5), mn,yerr=sd,fmt='o',color='black')
lims = plt.ylim()
plt.ylim([0, lims[1]]) 
plt.ylabel('shoaling index +/- SD')
plt.title('Attraction to black disk, closed loop skype vs. open loop \n open loop: real vs. constant speed \n n=8 animals x 5-10 trials each')

## Average all trials per animal per episode (now n = animals)

In [None]:
tmp=df[idx].groupby(['epiCorrect','animalSet','animalIndex'],sort=False)['si'].mean()
tmp=tmp.reset_index()

tmp["anID"] = tmp["animalSet"].map(str) + tmp["animalIndex"].map(str)
tmp.head()


In [None]:
plotOrder=[0,1,2,4,3]
labels, levels = pd.factorize(tmp.epiCorrect)

newLevels=['pair','nPnB','nPcS','cPcS','cPnB']
tmp['newEpi']=[newLevels[x] for x in labels]
tmp['epiNr']=labels
episodeNames=tmp['newEpi'].unique()

tmp.head()

## statistical analysis of differences between episode means

Statistics were computed in PRISM, using the pivoted table tmp2 below.

Note that skype condition effectively has half as much data as other stimuli bc. pair reciprocity.

Therefore, limit ANOVA and post-hoc tests to passive stimuli.

Using non-parametric ANOVA (friedman test) and Dunn post-hoc comparisons.

PRISM Results:

Table Analyzed	Data 1
	
Friedman test	
  P value	<0.0001
  Exact or approximate P value?	Approximate
  P value summary	****
  Are means signif. different? (P < 0.05)	Yes
  Number of groups	4
  Friedman statistic	67.07
	
Data summary	
  Number of treatments (columns)	4
  Number of subjects (rows)	38


Number of families	1				
Number of comparisons per family	6				
Alpha	0.05				
					
Dunn's multiple comparisons test	Rank sum diff.	Significant?	Summary	Adjusted P Value	
					
  fix_real_rSpeed vs. fix_real_cSpeed	64	Yes	****	<0.0001	B-C
  fix_real_rSpeed vs. fix_circ_rSpeed	22	No	ns	0.3037	B-D
  fix_real_rSpeed vs. fix_circ_cSpeed	82	Yes	****	<0.0001	B-E
  fix_real_cSpeed vs. fix_circ_rSpeed	-42	Yes	**	0.0011	C-D
  fix_real_cSpeed vs. fix_circ_cSpeed	18	No	ns	0.6585	C-E
  fix_circ_rSpeed vs. fix_circ_cSpeed	60	Yes	****	<0.0001	D-E
					
					
Test details	Rank sum 1	Rank sum 2	Rank sum diff.	n1	n2
					
  fix_real_rSpeed vs. fix_real_cSpeed	137	73	64	38	38
  fix_real_rSpeed vs. fix_circ_rSpeed	137	115	22	38	38
  fix_real_rSpeed vs. fix_circ_cSpeed	137	55	82	38	38
  fix_real_cSpeed vs. fix_circ_rSpeed	73	115	-42	38	38
  fix_real_cSpeed vs. fix_circ_cSpeed	73	55	18	38	38
  fix_circ_rSpeed vs. fix_circ_cSpeed	115	55	60	38	38

In [None]:
tmp2=tmp[['epiCorrect','si','anID']]
tmp2=tmp2.pivot_table(columns='epiCorrect',index='anID',values='si')
print(tmp2.shape)
tmp2.reset_index()

In [None]:
tmp2.reset_index().to_csv(outputDir+'Fig2B.csv')


In [None]:
LowestConditionCount=5 # lowest number of repeats (in the first experiment, other experiments have more)
CI95=CI[LowestConditionCount]*2
print(LowestConditionCount, ' episode repeats per animal')
print('CI95:',CI95)

In [None]:
tmp2=tmp[(tmp.animalIndex!=14)&(((tmp.epiCorrect==tmp.epiCorrect.values[0])&(tmp.animalIndex.isin(np.arange(1,100,2))))==False)]

sns.set_palette('Dark2',8)
co=sns.color_palette("Dark2", 8)

col=['gray',co[0],co[1],co[0],co[1]]

pfp.paper()
inToCm=2.54
fig, ax = plt.subplots(figsize=(9/inToCm,4/inToCm))
#sns.boxplot(x='newEpi',y='si',data=tmp,width=0.5,order=episodeNames[plotOrder],palette=col,ax=ax,labels=['hi'])

#mn=tmp.groupby('episode',sort=False)['si'].mean()
#sd=tmp.groupby('episode',sort=False)['si'].std()
#plt.errorbar(range(5), mn,yerr=sd,fmt='o',color='black')
sns.pointplot(x='newEpi',
              y='si',
              hue='anID',
              data=tmp2[tmp.epiCorrect!=tmp.epiCorrect.values[0]],
              scale=0.2,
              palette=['gray'],
              order=episodeNames[plotOrder],
              ax=ax,
             zorder=0,
              legend=False,
             alpha=0.5)

sns.stripplot(x='newEpi',
              y='si',
              data=tmp2,
              palette=col,
              order=episodeNames[plotOrder],
              ax=ax,
              s=3,
             zorder=2,
             alpha=0.5)

sns.pointplot(x='newEpi',
              y='si',
              hue='anID',
              data=tmp2[tmp.epiCorrect==tmp.epiCorrect.values[0]],
              scale=0.1,
              palette=['gray'],
              order=episodeNames[plotOrder],
              ax=ax,
             zorder=1,
              legend=False,
             alpha=0.5)

sns.pointplot(x='newEpi',
              y='si',
              data=tmp2,
              order=episodeNames[plotOrder],
              palette=col,
              ax=ax,
              ci =None,
             zorder=1000,
             edgecolor='k',
             linewidth=1,
             markers=['_'],
             scale=4,
              legend=False,
             estimator=np.nanmean,
             alpha=0.8)





plt.ylabel('Attraction')
#plt.axhline(0,ls=':',color='k')
plt.axhline(0,ls='-',color='k',linewidth=0.5)
plt.axhline(CI95,ls='--',color='gray',linewidth=0.5)
plt.axhline(-CI95,ls='--',color='gray',linewidth=0.5)
plt.xlabel('')

# statistical annotation, see below for stats!
x1, x2 = 1, 2   # columns
l=0.025
y, h, col = tmp.si.max() + l, l, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, "***", ha='center', va='bottom', color=col,size=10)

# statistical annotation
x1, x2 = 3, 4   # columns
l=0.025
y, h, col = tmp.si.max() + l, l, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, "***", ha='center', va='bottom', color=col,size=10)

ax.set_xticklabels(['Mutual \n interaction',
                    'Natural path \n Natural bouts',
                    'Natural path \n Constant speed',
                    'Synthetic path \n Natural bouts',
                    'Synthetic path \n Constant speed'
                   ])

for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(10) 
    tick.label.set_rotation(45)
    
ax.tick_params(axis='x', which='major', pad=-5)
ax.legend_.remove()
sns.despine()
#plt.tight_layout(pad=1, w_pad=0, h_pad=0);
ax.set_yticks([0,.2,.4,.6]);
#plt.xticks(rotation=30)
#fig.set_size_inches(9/inToCm,4.5/inToCm)

figPath=outputDir+'\\2B_PathAndSpeedPilot.svg'
plt.savefig(figPath,bbox_inches='tight')

from shutil import copy2

def splitall(path):
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts



for i,row in info.iterrows():
    fn=row.txtPath
    head, tail = os.path.split(fn)

    copyList=[]
    #copyList.append(glob.glob(head+'\\ROI*.csv')[0])
    copyList.extend(glob.glob(head+'\\PositionTxt_*.txt'))
    #copyList.append(glob.glob(head+'\\PositionTxt_an1*.txt')[0])
    try:
        copyList.append(glob.glob(head+'\\PL*.txt')[0])
        copyList.append(glob.glob(head+'\\*anSize.csv')[0])
        copyList.append(glob.glob(head+'\\ROI*.csv')[0])
    except:
        pass
    #copyList.append(glob.glob(head+'\\*anSize.csv')[0])
    
    for f in copyList:
        print(f)
        if f[0]=='E':
            keepSlash=3
        else:
            keepSlash=4
        toDirectory = "e:\\b\\LarschAndBaier2018\\RawData\\" + os.path.join(*splitall(f)[keepSlash:-1])+"\\"
        #toDirectory = "e:\\b\\LarschAndBaier2018\\RawData\\" 
        if not os.path.isdir(toDirectory):
            os.makedirs(toDirectory)
        
        copy2(f, toDirectory)
