# Summary analysis: Kinetic parameters 4B

## Jump Distance on Knot trajectory

### speed = 0.7 px / frame



In [None]:
%config InteractiveShellApp.pylab_import_all = False
%matplotlib inline
%pylab inline
%reload_ext autoreload
%autoreload 2

import sys
import os
import fnmatch

import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
import glob
from datetime import datetime
from scipy import stats


if 'startDirMaster' not in locals():
    startDirMaster=os.getcwd()

propsFn=startDirMaster+'\\props.csv'
props=pd.read_csv(propsFn, header=None, index_col=0, squeeze=True,delim_whitespace=True).to_dict()

base=props['BaseDir']
expFile=props['allExpFn']

RawDataDir = os.path.join(base,props['RawDataDir'])+'\\'
ProcessingDir = os.path.join(base,props['ProcessingDir'])+'\\Fig2FG\\'
outputDir = os.path.join(base,props['outputDir'])+'\\'

if not os.path.isdir(ProcessingDir):
    os.makedirs(ProcessingDir)
if not os.path.isdir(outputDir):
    os.makedirs(outputDir)

os.chdir('..\\')
import functions.matrixUtilities_joh as mu
import matplotlib.pyplot as plt
import models.experiment as xp
import models.experiment_set as es
import functions.paperFigureProps as pfp
pfp.paper()
inToCm=2.54


In [None]:
info=pd.read_csv(expFile, sep=',')#pd.read_csv(expFile,quotechar='"', sep=',', converters={'bdGroup':ast.literal_eval})
info=info[info.stimulusProtocol=='4b']
info

In [None]:
# collect meta information and save to new csv file for batch processing

aviPath=[]
posPath=[]
PLPath=[]
expTime = []
    
for index,row in info.iterrows():
    startDir=RawDataDir+row.path+'\\'
    #startDir='D:\\data\\b\\2017\\'+row.path+'\\'
    #if not os.path.isdir(startDir):
    #    startDir='E:\\b\\2017\\'+row.path+'\\'
        
    posPath.append(glob.glob(startDir+'PositionTxt*.txt')[0])
    PLPath.append(glob.glob(startDir+'PL*.txt')[0])
    
    head, tail = os.path.split(posPath[-1])
    currTime=datetime.strptime(tail[-23:-4], '%Y-%m-%dT%H_%M_%S')
    expTime.append(currTime)
    
info['txtPath']=posPath
info['pairList']=PLPath

info['epiDur'] = 5      # duration of individual episodes (default: 5 minutes)
info['episodes'] = -1   # number of episodes to process: -1 to load all episodes (default: -1)
info['inDish'] = 10#np.arange(len(posPath))*120     # time in dish before experiments started (default: 10)
info['arenaDiameter_mm'] = 100 # arena diameter (default: 100 mm)
info['minShift'] = 60 # minimum number of seconds to shift for control IAD
info['episodePLcode'] = 0 # flag if first two characters of episode name encode animal pair matrix (default: 0)
info['recomputeAnimalSize'] = 0 # flag to compute animals size from avi file (takes time, default: 1)
info['SaveNeighborhoodMaps'] = 0 # flag to save neighborhood maps for subsequent analysis (takes time, default: 1)
info['computeLeadership'] = 0 # flag to compute leadership index (takes time, default: 1)
info['ComputeBouts'] = 1 # flag to compute swim bout frequency (takes time, default: 1)
info['set'] = np.arange(len(posPath))   # experiment set: can label groups of experiments (default: 0)
info['ProcessingDir']=ProcessingDir
info['outputDir']=outputDir

info['expTime']=expTime

csvFile=os.path.join(ProcessingDir,'Fig2_FG.csv')
info.to_csv(csvFile,encoding='utf-8')
info

In [None]:
def readExperiment(keepData=False):
    tmp=es.experiment_set(csvFile=csvFile)
    if keepData:
        return tmp
    else:
        return 1

expSet=readExperiment(keepData=True)

In [None]:
csvPath = []
for f in [mu.splitall(x)[-1][:-4] for x in info.txtPath]:
    csvPath.append(glob.glob(ProcessingDir+f+'*siSummary*.csv')[0])

df=pd.DataFrame()
i=0
for fn in csvPath:
    print(fn)
    tmp=pd.read_csv(fn,index_col=0,sep=',')
    tmp.animalSet=i
    tmp.animalIndex=tmp.animalIndex+((i)*15)
    df=pd.concat([df,tmp])
    i+=1
df['episode']=[x.strip().replace('_','') for x in df['episode']]

print('df shape',df.shape)

In [None]:
d=df.time
r=datetime(2017,1,1)
t2=[pd.to_datetime(x).replace(day=1,month=1)for x in df.time]
t3=[(x-r)/pd.Timedelta('1 hour') for x in t2]
df['t2']=t2
df['t3']=t3
df

## Habituation or Fatigue within 20 hours?

Plot shoaling index during closed loop skype episodes over time.

In [None]:
sns.tsplot(data=df, time="inDishTime",value="si",unit="animalIndex",condition="episode",estimator=np.nanmean,interpolate=False,err_style="ci_bars");
plt.xlim([0,8*60])

# Mean response over all stimuli per animal

In [None]:
sns.set_palette('viridis',105)
co=sns.color_palette("viridis", 105)
idx=(df['inDishTime']<400) & (df['inDishTime']>45)
dfDR=df[idx]
dfEpiAn=dfDR.groupby(['episode','animalIndex'],sort=True).mean().reset_index()
sns.stripplot(data=dfEpiAn,x='episode',y='si',zorder=-1,hue='animalIndex')
sns.pointplot(data=dfEpiAn,x='episode',y='si',hue='animalIndex',zorder=100,scale=0.2,palette=['gray'])
sns.pointplot(data=dfEpiAn,x='episode',y='si',join=False,zorder=100)
ax=plt.gca()
ax.legend_.remove()

# Group animals by age

In [None]:
sns.set_palette('viridis',8)
co=sns.color_palette("viridis", 8)
fig, axes = plt.subplots(figsize=(5, 5))
sns.pointplot(data=dfEpiAn,x='episode',y='si',hue='age',zorder=100,scale=1)
sns.despine()

axes.set_ylabel('attraction index')
axes.axhline(0,ls=':',color='k')
axes.set_title('Frequency tuning per age group');
plt.legend(title='age')

# Combine age-groups into 3 groups to compare across panels

In [None]:
dfEpiAn['ag']=0
dfEpiAn.loc[(dfEpiAn.age>16),'ag']=1
dfEpiAn.loc[(dfEpiAn.age>21),'ag']=2

sns.set_palette('viridis',3)
co=sns.color_palette("viridis", 3)
fig, axes = plt.subplots(figsize=(5, 5))
sns.pointplot(data=dfEpiAn,x='episode',y='si',hue='ag',zorder=100,scale=1)
sns.despine()

axes.set_ylabel('attraction index')
axes.axhline(0,ls=':',color='k')
axes.set_title('Frequency tuning per age group');
plt.legend(title='age')

### To plot data with numerical X-axis, pre-calculate group means and STD

In [None]:
dfEpiAn.head()

In [None]:
pfp.paper()
sns.set_palette('viridis',3)
co=sns.color_palette("viridis", 3)

xax=np.array([1/4., 1/2., 1., 1.5, 2., 3., 4.])*5.7

g_epiAg=dfEpiAn.groupby(['episode','ag'],sort=True)[['si']]

var_group_a=g_epiAg.std().unstack().values.T[:,:7]
si_group_a=g_epiAg.mean().unstack().reset_index()[:7]
si_group_a['xax']=xax

var_group_b=g_epiAg.std().unstack().values.T[:,7:]
si_group_b=g_epiAg.mean().unstack().reset_index()[7:]
si_group_b['xax']=xax


fig, axes = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True,figsize=(9/inToCm,4.5/inToCm))


#------------------------
#plot intermittent motion data & STD

si_group_a.plot(kind='line',
            marker='o',
            y='si',
            x='xax',
            ax=axes[0],
            ms=5,
            markeredgecolor='k',
            markeredgewidth=1,
            logx=True,
            legend=False)


#plot error bars with jitter x-offset
#not ideal because of log x-axis
jj=0
for j in [0,1,2]:
    y=si_group_a.si.loc[:,j].values
    x=xax+(jj-1)*0.1
    
    e=var_group_a[j]
    c=np.array(co[jj])
    axes[0].errorbar(x,y,e,fmt='none',color=c,alpha=0.5)
    jj+=1
    
#------------------------
#plot constant speed data & STD

si_group_b.plot(kind='line',
            marker='o',
            y='si',
            x='xax',
            ax=axes[1],
            ms=5,
            markeredgecolor='k',
            markeredgewidth=1,
            logx=True,
            legend=False)



#plot error bars with jitter x-offset
#not ideal because of log x-axis
jj=0
for j in [0,1,2]:
    y=si_group_b.si.loc[:,j].values
    x=xax+(jj-1)*0.1
    
    e=var_group_b[j]
    c=np.array(co[jj])
    axes[1].errorbar(x,y,e,fmt='none',color=c,alpha=0.5)
    jj+=1
    
    

# pretty axes and Labels

axes[0].set_xticks([1,2,5,10,20]);
xlab='%i %i %i %i %i' % tuple([1,2,5,10,20])
axes[0].set_xticklabels(xlab.split());
axes[0].set_yticks([0,.2,.4]);
axes[0].set_xlim([1,28])
axes[0].set_ylim([-.15,.5]);
axes[0].set_xlabel('Speed (mm/sec)')
axes[1].set_xlabel('Speed (mm/sec)')
axes[0].set_title('Intermittent motion')
axes[1].set_title('Constant motion')
axes[0].set_ylabel('Attraction')

sns.despine()
fig.subplots_adjust(top=0.75)

L=plt.legend(bbox_to_anchor=(.5, 1), loc=2, borderaxespad=0.,handletextpad=.2,title="Age (dpf)")
#L=plt.legend(ncol=1, loc='upper right',)

L.get_texts()[0].set_text('<17')
L.get_texts()[1].set_text('17-21')
L.get_texts()[2].set_text('>21')

figPath=outputDir+'\\2F_speedBoutVsConstan.svg'
plt.savefig(figPath)

# Analyze preferred dot speed over age
## Test on group averages
Compare simply picking the max for each curve to interpolating with a polynomial

Only look at intermittent motion data

In [None]:
sns.set_palette('viridis',8)
co=sns.color_palette("viridis", 8)

g_epiAge=dfEpiAn.groupby(['episode','age'],sort=True)[['si']]
var_age=g_epiAge.std().unstack().values.T[:,:5]
si_age=g_epiAge.mean().unstack().reset_index()[:5]

si_age['xax']=xax[:5]

fig, axes = plt.subplots(figsize=(3, 3))

axes=si_age.plot(x='xax',
                   y='si',
                   kind='line',
                   marker='o',
                   yerr=0,
                   linestyle=':',
                   ax=axes,
                   legend=True)

axes.set_ylabel('attraction index')
plt.xlim([0,xax.max()+xax.max()*0.1])
axes.axhline(0,ls=':',color='k')
axes.set_title('group frequency tuning');

In [None]:
maxPos=[]
nAges=df.age.unique().shape[0]
fig, ax = plt.subplots(nrows=nAges, 
                       ncols=1, 
                       sharex=True, 
                       sharey=True,
                       figsize=(4.5/inToCm,15/inToCm))

x=xax[:5]
for i in range(nAges):
    y=si_age.si.values[:,i]
    z=np.polyfit(x,y,2)
    p = np.poly1d(z)
    xp = np.linspace(0, 15, 1000)
    putativeMax=x[np.argmax(y)]
    if (y.max()>0):
        #maxPos.append(np.argmax(p(xp)[:800])/(1000/1.7))
        maxPos.append(putativeMax)
        ax[i].axvline(maxPos[-1])
    else:
        maxPos.append(np.nan)

    ax[i].plot(x, y, '.', xp, p(xp), '-')
    ax[i].set_ylim([-.10,.4])

In [None]:
import scipy.stats
pfp.paper()
inToCm=2.54
plt.figure(figsize=(4.5/inToCm,4.5/inToCm))
ax = plt.gca()

ys=np.array(maxPos)
notNan=np.isfinite(ys)
ys=ys[notNan]
xs=df.age.unique()

s,i,r,p,std=scipy.stats.linregress(xs,ys)
t=np.linspace(10,30,100)
l=i+s*t
ax.plot(t,l,':',xs,ys,'.')

ax.text(12,8,'R: {:.1f}'.format(r),color='k')
ax.text(12,7,"p = {:.3f}".format(p),color='k')



plt.ylim([1,8])
ax.set_ylabel('Vmax [mm/sec]')
ax.set_xlabel('age (dpf)')
sns.despine()

No tuning in mean curves detectable by analyzing the literal max

# Individual animal analysis

Now, run analogous analysis on individual animals

In [None]:
sns.set_palette('viridis',105)
co=sns.color_palette("viridis", 105)

g_epiAn=dfEpiAn.groupby(['episode','animalIndex'],sort=True)[['si']]
si_an=g_epiAn.mean().unstack().reset_index()[:5]
si_an['xax']=xax[:5]

#Manually adding zero attraction at zero speed to all animals to pin the fit
si_an.loc[-1]=np.zeros(si_an.shape[1])
si_an.index = si_an.index + 1  # shifting index
si_an = si_an.sort_index()  # sorting by index




fig, axes = plt.subplots(figsize=(3, 3))

axes=si_an.plot(x='xax',
                   y='si',
                   kind='line',
                   marker='.',
                   yerr=0,
                   linestyle=':',
                   ax=axes,
                   legend=True)

axes.set_ylabel('attraction index')
plt.xlim([0,xax[:5].max()+xax[:5].max()*0.1])
axes.axhline(0,ls=':',color='k')
axes.set_title('group speed tuning');

axes.legend_.remove()

# Find preferred frequency for each animal

interpolation was very sensitive to the degree of polinomial and I could not find convincing criteria to exclude bad fits.
Went again with reading off the literal max of each animal curve.

interpolation is still shown for visualization but not used!

In [None]:
fig, ax = plt.subplots(nrows=7, ncols=15, sharex=True, sharey=True,figsize=(30/inToCm,25/inToCm))
ax=ax.ravel()
col1=['gray','k']

maxPosAllRawMax=[]
maxPosAllRawMaxTr=[] 
ageAll=[]
x=si_an.xax

for i in range(si_an.si.shape[1]):
    y=si_an.si.values[:,i]
    z=np.polyfit(x,y,2)
    p = np.poly1d(z)
    xp = np.linspace(0, 2*5.7, 1000)
    ax[i].plot(x, y, '.', xp, p(xp), '-')
    ax[i].set_ylim([-.20,.6])
    ax[i].axis('off')
    ax[i].set_title('Animal '+str(i))
    putativeMax=x[np.argmax(y)]
    maxPosAllRawMax.append(putativeMax) # all animals
    if (y.max()>0.05): #only animals above threshold 
        maxPosAllRawMaxTr.append(putativeMax)
        interpolated=np.argmax(p(xp))/(1000/(2*5.7))
        ax[i].axvline(interpolated)
        ax[i].axvline(maxPosAllRawMaxTr[-1],color='r')
    else:
        maxPosAllRawMaxTr.append(np.nan)
    ageAll.append(df[df.animalIndex==si_an.si.columns[i]].age.values[0])

mpa=pd.DataFrame({'age':ageAll,'mp':maxPosAllRawMaxTr}) #mpa: max per animal, only animals above threshold
maxPosIndMn=mpa.groupby(['age']).mean().mp
maxPosIndSTD=mpa.groupby(['age']).std().mp
print([maxPosIndMn,maxPosIndSTD])

mpaAll=pd.DataFrame({'age':ageAll,'mp':maxPosAllRawMax}) #mpa: max per animal, all animals
maxPosIndMnAll=mpaAll.groupby(['age']).mean().mp
maxPosIndSTDAll=mpaAll.groupby(['age']).std().mp
print([maxPosIndMnAll,maxPosIndSTDAll])

In [None]:
#visualize maxima per over age for individual animals. Note discrete max levels

sns.jointplot(mpa.age,mpa.mp,alpha=0.2)

# Figure 2G
## plot the mean best speed for each age and fit a line through the means
## draw standard errors over the animals per age group

For the final figure, plot the means of only the animals that responded above threshold.

Comparison for all animals below

In [None]:
import scipy.stats
pfp.paper()
inToCm=2.54
plt.figure(figsize=(4.5/inToCm,4.5/inToCm))
ax = plt.gca()
imaxCol='gray'

xs=maxPosIndMn.index.values
ys=maxPosIndMn.values
s,i,r,p,std=scipy.stats.linregress(xs,ys)
t=np.linspace(10,30,100)
l=i+s*t

#plot preferred speed and linear fit
ax.plot(t,l,'--',xs,ys,'.',color=imaxCol,markersize=20)
(_, caps, _)=ax.errorbar(xs,ys,maxPosIndSTD.values,ls='',color=imaxCol,alpha=0.5)

for cap in caps:
    cap.set_markeredgewidth(1)

ax.text(20,2,'R: '+str(r)[:4],color=imaxCol,fontsize=10)
ax.text(20,1.2,"p = {:.1e}".format(p),color=imaxCol,fontsize=10)


#plot own speed and linear fit

#bidx=(0<np.ones(dfDR.shape[0]))#&(dfDR.episode=='01js1o4s')
#swimmSpeed=dfDR[bidx].groupby('age').mean()['avgSpeed_smooth'].reset_index()
swimmSpeed=dfDR.groupby('age').mean()['avgSpeed_smooth'].reset_index()

x=swimmSpeed.age.values
y=swimmSpeed.avgSpeed_smooth.values
so,io,ro,po,stdo=scipy.stats.linregress(x,y)
l2=io+so*t
ax.plot(t,l2,'--',color='k')
ax.plot(x,y,'.',color='k')
ax.text(11,7.5,'R: '+str(ro)[:4],color='k',fontsize=10)
ax.text(11,6.7,"p = {:.1e}".format(po),color='k',fontsize=10)


ax.text(11,9.5,'Preferred stimulus speed',color=imaxCol,fontsize=10)
ax.text(11,8.7,'Own swim speed',color='k',fontsize=10)

ax.set_ylabel('Speed (mm/sec)')
ax.set_xlabel('Age (dpf)')
plt.ylim([1,8])
sns.despine()
figPath=outputDir+'\\2G_Speed_corr.svg'
plt.savefig(figPath)

### for comparison, plot the fit through all animals

In [None]:
import scipy.stats
pfp.paper()
inToCm=2.54
plt.figure(figsize=(4.5/inToCm,4.5/inToCm))
ax = plt.gca()
imaxCol='gray'

xs=maxPosIndMnAll.index.values
ys=maxPosIndMnAll.values
s,i,r,p,std=scipy.stats.linregress(xs,ys)
t=np.linspace(10,30,100)
l=i+s*t

#plot preferred speed and linear fit
ax.plot(t,l,'--',xs,ys,'.',color=imaxCol,markersize=20)
(_, caps, _)=ax.errorbar(xs,ys,maxPosIndSTDAll.values,ls='',color=imaxCol,alpha=0.5)

for cap in caps:
    cap.set_markeredgewidth(1)

ax.text(20,2,'R: '+str(r)[:4],color=imaxCol,fontsize=10)
ax.text(20,1.2,"p = {:.1e}".format(p),color=imaxCol,fontsize=10)


#plot own speed and linear fit

#bidx=(0<np.ones(dfDR.shape[0]))#&(dfDR.episode=='01js1o4s')
#swimmSpeed=dfDR[bidx].groupby('age').mean()['avgSpeed_smooth'].reset_index()
swimmSpeed=dfDR.groupby('age').mean()['avgSpeed_smooth'].reset_index()

x=swimmSpeed.age.values
y=swimmSpeed.avgSpeed_smooth.values
so,io,ro,po,stdo=scipy.stats.linregress(x,y)
l2=io+so*t
ax.plot(t,l2,'--',color='k')
ax.plot(x,y,'.',color='k')
ax.text(11,7.5,'R: '+str(ro)[:4],color='k',fontsize=10)
ax.text(11,6.7,"p = {:.1e}".format(po),color='k',fontsize=10)


ax.text(11,9.5,'Preferred stimulus speed',color=imaxCol,fontsize=10)
ax.text(11,8.7,'Own swim speed',color='k',fontsize=10)

ax.set_ylabel('Speed (mm/sec)')
ax.set_xlabel('Age (dpf)')
plt.ylim([1,8])
sns.despine()


from shutil import copy2

def splitall(path):
    allparts = []
    while 1:
        parts = os.path.split(path)
        if parts[0] == path:  # sentinel for absolute paths
            allparts.insert(0, parts[0])
            break
        elif parts[1] == path: # sentinel for relative paths
            allparts.insert(0, parts[1])
            break
        else:
            path = parts[0]
            allparts.insert(0, parts[1])
    return allparts



for i,row in info.iterrows():
    fn=row.txtPath
    head, tail = os.path.split(fn)

    copyList=[]
    copyList.append(glob.glob(head+'\\ROI*.csv')[0])
    copyList.append(glob.glob(head+'\\PositionTxt*.txt')[0])
    copyList.append(glob.glob(head+'\\PL*.txt')[0])
    copyList.append(glob.glob(head+'\\*anSize.csv')[0])
    
    for f in copyList:
        print(f)
        if f[0]=='E':
            keepSlash=3
        else:
            keepSlash=4
        toDirectory = "e:\\b\\LarschAndBaier2018\\RawData\\" + os.path.join(*splitall(f)[keepSlash:-1])+"\\"
        #toDirectory = "e:\\b\\LarschAndBaier2018\\RawData\\" 
        if not os.path.isdir(toDirectory):
            os.makedirs(toDirectory)
        
        copy2(f, toDirectory)
