In [1]:
import numpy as np
import pandas as pd
import os


%matplotlib qt
import matplotlib.pyplot as plt
import matplotlib as mp

import matplotlib.cm as cm
from matplotlib.colors import Normalize

from m_gncspline import global_natural_spline
from fill_and_shape import reshape_and_interp


In [2]:
import dill
with open('smooth_pos_v8.pkl', 'rb') as var:
    sm_pos = dill.load(var)
    
with open('raw_dictionaries_v9.pkl', 'rb') as var:
    all_imported = dill.load(var)

In [3]:
#collect corrected marker spacings.
marks_s = [np.array(all_imported[i]['cms'])/100.0 for i in np.arange(len(all_imported))] #marker spacings are in cm; change to an array and divide by 100 to get meters.


#create a table of trial metadata
#build gap sizes, snake number, trial number lists
gs_m = []
gs_svl = []
sn_id = []
trial_ns = []
    
for i in np.arange(len(all_imported)):
    
    d = all_imported[i]
    gs_m.append(d['gs_m'])
    gs_svl.append(d['gs_%'])
    sn_id.append(d['ID'])
    trial_ns.append(d['tn'])
    
#build SVL list
svls = [each['svl'] for each in all_imported]

#build binned gap sizes list
gs_bin = [5 * round(i/5) for i in gs_svl]

bpd = pd.DataFrame(list(zip(trial_ns,gs_svl,gs_bin,sn_id,svls)),columns = ['tn','gsr','gs_bin','ID','svl'])
bpd.head()

Unnamed: 0,tn,gsr,gs_bin,ID,svl
0,38,30.901693,30,94.0,85.0
1,40,36.103761,35,94.0,85.0
2,41,38.615497,40,94.0,85.0
3,42,40.722447,40,94.0,85.0
4,43,40.833542,40,94.0,85.0


# Define the functions

In [4]:
#define marker correcting function

def fix_single_frame(SP_snake, spaces):
    """
    NOTE: must be at least 2 markers present in the frame.
    purpose: corrects an array of snake position data from a single frame, 
    and corresponding list of marker spacings, for markers that are missing 
    because they weren't visible in that frame
    
    snake = n x 3 array containing marker position data in 3 dimensions for n markers, measured in meters
    mark_spaces = list of n-1 spacings in meters between n markers
    """

    del_list = []
    n_snake = np.copy(SP_snake)
    n_sp = np.array(spaces)

    for t in np.arange(len(n_sp)+1):
        mark = n_snake[t,0]
        if np.isnan(mark): #make a list of markers to delete - delete ones with nans
            del_list.append(t)

    for l in reversed(del_list):
        n_snake=np.delete(n_snake,l,0) #delete the rows in the snake array
        if l==len(n_sp): #if the index corresponds to the last marker in the spacing array, just delete it
            n_sp=np.delete(n_sp,l-1)
        else:
            n_sp[l]=n_sp[l-1]+n_sp[l] #if the index corresponds to some other marker, get the appropriate spacing
            n_sp=np.delete(n_sp,l-1) #and then delete.

    
    return n_snake, n_sp


def find_frames(snake,drop):
    """takes a position file and a transition frame and finds the low point and high point. 
    
    inputs:
    snake = n frames by m markers by 3 dimensions file (meters), origin marker is origin of coord system
    drop = frame index of transition frame. 
    
    outputs: 
    highpoint = max position of the head after the transition frame
    lowpoint = lowest non-head marker position between entry and highpoint"""
    
    ###Section 1: Identify highpoint - max head position. 
    head_z = snake[:,0,2]  #isolate the head of the snake
    maxpt = np.nanargmax(head_z[drop:]) #find the frame where the max occurs (AFTER drop point)
    highpoint = maxpt+drop
  
        
    ### Section 2: evaluation frame: the lowest body point before the global max of head position
    lows=[]
        
    for f in np.arange(highpoint+1): #go through each frame between start and the globalmax frame
        vals = snake[f,1:,2] #Z position of body marks in that frame, excluding the head
        lowval = np.nanmin(vals)
        lows.append(lowval) #list the lowest z position of all markers in that frame.
        
    lowpoint=np.argmin(lows) #find the frame with the lowest poisition of all.
    
    return(lowpoint,highpoint)
    
def snake_shape(snake, spaces, svl, frame):
    """takes a snake file, list of marker spacings, 
    and a frame to evaluate at. Returns arc height and loop depth.
    
    inputs:
    snake = n frames by m markers by 3 dimensions file (meters), origin marker is origin of coord system
    marks = list of m marker spacings; includes distance from nosetip to first marker as first entry (meters)
    svl = snake snout vent length (meters)
    frame = whether to evaluate AH and LD at the lowframe or the highframe. Can be 0 (lowframe) or 1 (highframe) 
    
    outputs:
    AH = distance from head to the lowest point at the lowpoint
    LD = distance from origin end to the lowest body point at the lowpoint
    relative versions are in terms of %svl, regular are in meters.
    """
    ### Section 1: Create a spline across the markers in the frame.
    SP_snake = np.copy(snake[frame,:,:])
    sp = spaces[1:]

    n_snake,n_sp = fix_single_frame(SP_snake,sp) #correct for any missing markers in the frame.

    #now make the spline
    out = global_natural_spline(n_snake,np.array(n_sp),1000) #fit a spline
    r, dr, ddr, dddr, ts, ss, seg_lens, lengths_total, idx_pts = out    

    ###Section 2: Calculate AH and LD using the spline

    head_z = r[0,2]
    low_z = r[1:,2].min()
    orig_z = 0

    AH = head_z-low_z
    LD = orig_z-low_z

    rel_AH = AH/svl*100
    rel_LD = LD/svl*100

    
    return AH, LD, rel_AH, rel_LD

In [5]:
#test the lowpoint finder to verify all lowpoints are for markers in the gap (as opposed to on the branch.)
lows = []
trials = []

for ind in np.arange(len(sm_pos)):
    snake = sm_pos[ind]/1000.0 #change mm to m
    trial = all_imported[ind]['tn']
    
    frames = list(all_imported[ind]['fn'])
    dp = all_imported[ind]['dp']
    
    if not np.isnan(dp):
        dpi = frames.index(int(dp))
        BC = 1
        low, high = find_frames(snake,dpi)
        lows.append(low)
        trials.append(trial)
    
fig,ax = plt.subplots(10,13)

for i in np.arange(10):
    for t in np.arange(13):
        n = 10*i+t
        tnum = trials[n]
        p = trial_ns.index(tnum)
        
        snake = sm_pos[p][lows[n],:,:] #appropriate snake in appropriate frame
        ax[i,t].scatter(snake[:,0],snake[:,2])

In [6]:
#based on visual inspection, trials 125, 220, and 247-9 the low point is for a marker still on the branch. 
#I will therefore identify the lowpoint for these trials manually by inspecting the X and Z positions of 
#all markers side by side. 

fig,ax = plt.subplots(2,5,sharex='col')
trial_set = [125, 220, 247, 248, 249]
ax[0,0].set_ylabel('X position')
ax[1,0].set_ylabel('Z position')

for i in np.arange(5):
    tind = trial_ns.index(trial_set[i])
    snake = sm_pos[tind]

    ax[0,i].plot(snake[:,:,0])
    ax[0,i].axhline(0)
    ax[0,i].set_title(trial_set[i])

    
    ax[1,i].plot(snake[:,:,2])
    
#for trial 125, lowpoint is the lowest position of marker 3, after 800 frames.
#for trial 220, lowpoint is the lowest position of marker 2, around 11600 frames.
#for trial 247, lowpoint is the lowest position of marker 4, around 450 frames.
#for trial 248, lowpoint is the lowest position of marker 4, around 250 frames.
#for trial 249, lowpoint is the lowest position of marker 1, around 190 frames.

In [7]:
#identify lowest value for each of the specified markers AFTER the marker has entered the gap (i.e. on branch values don't count)
trial_set = [125, 220, 247, 248, 249]
marker_set = [3,2,4,4,1]

new_lows = []

for i in np.arange(5):
    tind = trial_ns.index(trial_set[i])
    m = marker_set[i]
    snake = sm_pos[tind]
    a,b,c = np.shape(snake)
    
    gapvals = []
    
    for f in np.arange(a):
        xval = snake[f,m,0]
        if xval > 0: #making sure the marker is in the gap. .
            gapvals.append(snake[f,m,2])
        else:
            gapvals.append(np.nan)
    
    low = np.nanargmin(gapvals) #finding lowpoint.
    new_lows.append(low)

new_lows

[909, 11616, 473, 265, 178]

In [8]:
#check whether it found the right frames

fig,ax = plt.subplots(2,5,sharex='col')
trial_set = [125, 220, 247, 248, 249]
marker_set = [4,3,5,5,2]
ax[0,0].set_ylabel('X position')
ax[1,0].set_ylabel('Z position')

#check1
for i in np.arange(5):
    tind = trial_ns.index(trial_set[i])
    m = marker_set[i]
    snake = sm_pos[tind]

    ax[0,i].plot(snake[:,:,0])
    ax[1,i].plot(snake[:,:,2])
    
    ax[0,i].axhline(0)
    ax[0,i].set_title(trial_set[i])
    ax[0,i].axvline(new_lows[i],color='r')
    ax[1,i].axvline(new_lows[i],color='r')

#check 2
fig1, ax1 = plt.subplots(5)
for i in np.arange(5):
    tind = trial_ns.index(trial_set[i])
    snake = sm_pos[tind]

    ax1[i].scatter(snake[new_lows[i],:,0],snake[new_lows[i],:,2])
    ax1[i].set_title(trial_set[i])

    
#looks good! manually set these lowpoints below. 

# Categorize 
## Categorization, manual assignment of special cases


In [30]:
#categorization process
low_AH = []
low_LD = []
high_AH = []
high_LD = []
bcodes = []
low_points = []
high_points = []

for ind in np.arange(len(sm_pos)):
    snake = sm_pos[ind]/1000.0 #change mm to m
    spaces = marks_s[ind]
    svl = all_imported[ind]['svl']/100.0 #change cm to m
    
    frames = list(all_imported[ind]['fn'])
    dp = all_imported[ind]['dp']
    tnum = all_imported[ind]['tn']
    
    if not np.isnan(dp):
        dpi = frames.index(int(dp))
        BC = 1
        low, high = find_frames(snake,dpi)
        
        #for five trials, the automated method picks out a the wrong frame. Manually set low points.
        off_list = [125,220,247,248,29]
        new_lows = [909, 11616, 473, 265, 178]
        
        if tnum in off_list:
            el = off_list.index(tnum)
            low = new_lows[el]
        
        AH, LD, rAH, rLD = snake_shape(snake,spaces,svl,low)
        AH1, LD1, rAH1, rLD1 = snake_shape(snake,spaces,svl,high)
    
    else:
        BC = 0
        AH = np.nan
        LD = np.nan
        rAH = np.nan
        rLD = np.nan
        AH1 = np.nan
        LD1 = np.nan
        rAH1 = np.nan
        rLD1 = np.nan
        low = np.nan
        high = np.nan
    
    bcodes.append(BC)    
    low_AH.append(rAH)
    low_LD.append(rLD)
    high_AH.append(rAH1)
    high_LD.append(rLD1)
    low_points.append(low)
    high_points.append(high)

In [31]:
#edit bcodes for "recovery" trials so they can be differentiated in graphs.
trial_ns = [bpd['tn'][i] for i in np.arange(len(sm_pos))]
rec_tns = [67,68,70,71,72,73,74]
rec_inds = [trial_ns.index(i) for i in rec_tns]
for val in rec_inds:
    bcodes[val] = 2
    
#edit bcodes for non-cantilevers that don't have transition frames listed.
no_dfs = [275,276,277,281,282,283,289]
ndf_inds = [trial_ns.index(i) for i in no_dfs]
for val in ndf_inds:
    bcodes[val] = 1

#remove LD for trials with missing origin markers.
missing_o = [trial_ns.index(i) for i in [94,93,85]] #trials with missing origin markers
for each in missing_o:
    low_LD[each] = np.nan
    high_LD[each] = np.nan

In [33]:
bpd['beh_c'] = bcodes #used for coloring markers in graphs, so that recovery trials are noted separately from other non-cantilevers

behs = []
for i in np.arange(len(bcodes)):
    if bcodes[i] == 2:
        behs.append(1)
    else:
        behs.append(bcodes[i])

bpd['beh'] = behs #same as original bcodes except recovery trials are listed with the non-cantilevers (as 1s)
bpd['ldl'] = low_LD
bpd['ldh'] = high_LD
bpd['ahh'] = high_AH
bpd['ahl'] = low_AH

## Straight line variation

How much does the snake's body deviate from a straight line?

In [33]:
max_c = []

for t in range(0,len(sm_pos)):
    snake = sm_pos[t]/1000 #mm to meters
    spaces = marks_s[t][1:] #already in meters; cut out the space from nosetip. 
    dev = []
    
    for frame in np.arange(len(snake)):
        
        #correct for missing markers
        n_snake, n_sp = fix_single_frame(snake[frame,:,:], spaces)
        
        #find straight line distance between first and last visible marker. 
        diff = n_snake[0,:] - n_snake[-1,:]
        straight_length = np.linalg.norm(diff) 
        
        #sum the marker spacings for the total curved length between the first and last visible marker. 
        curve_length = np.sum(n_sp)
        
        #difference
        dev.append(curve_length-straight_length)
        
         
    max_c.append(np.max(dev))

In [46]:
#exclude recovery trials - they are more curvy than normal non-cantilevers (when the snake is falling)
rec_tns = [67,68,70,71,72,73,74]
rec_inds = [trial_ns.index(i) for i in rec_tns]

for val in rec_inds:
    max_c[val] = np.nan

In [47]:
bpd['maxC'] = max_c
bpd['rel_maxC'] = bpd['maxC']/(bpd['svl']/100)*100 #convert svl to M from cm, then *100 for percentage.

## Determine overall positional deviation
What is the overall variation in position, averaged over each marker, along the y and z axes? 

In [37]:
#generate average data
avg_ydevs = []
avg_zdevs = []

for each in sm_pos:
    things = []
    for m in np.arange(np.shape(each)[1]):
        y = np.nanmax(each[:,m,1])-np.nanmin(each[:,m,1])
        things.append(y)
    yresult = np.nanmean(things)
    avg_ydevs.append(yresult)
    
for each in sm_pos:
    things = []
    for m in np.arange(np.shape(each)[1]):
        z = np.nanmax(each[:,m,2])-np.nanmin(each[:,m,2])
        things.append(z)
    zresult = np.nanmean(things)
    avg_zdevs.append(zresult)
    
#add to dataframe
bpd['avg_ydev'] = avg_ydevs #in m
bpd['avg_zdev'] = avg_zdevs #in m

# Relationship with distance travelled

In [38]:
dist_res = []
#define: total distance travelled by the head from low point to landing. 

for i in np.arange(len(sm_pos)):
 
    if not bcodes[i]==0 and not i in ndf_inds:
        snake = sm_pos[i]/1000.0 #to m
        lp = low_points[i]
        svl = all_imported[i]['svl']/100.0 #to m
        
        dist = snake[-1,0,:]-snake[lp,0,:] #how far the snake traveled from low point to landing.
        res_d = np.linalg.norm(dist)
        res_d = res_d/svl*100 #%svl
        dist_res.append(res_d)
        
    else:
        dist_res.append(np.nan)

In [39]:
bpd['dist'] = dist_res

## Amount of overshoot

In [40]:
overshoot = []
gap_size = []
dt = []
#define: amount that the snake overshoots the branch when jumping

for i in np.arange(len(sm_pos)):
    if bpd['beh'][i]==1:
        svl = bpd['svl'][i]
        la = np.linalg.norm(sm_pos[i][-1,0,:])/10.0 #distance of the head from the origin marker at landing, in cm
        gs = all_imported[i]['gs_m']*100 #gap size in cm
        diff = la-gs #overshoot
        diff = diff/svl
        overshoot.append(diff)
        gap_size.append(gs/svl*100)
        dt.append(la)
    else:
        overshoot.append(np.nan)
        gap_size.append(np.nan)

In [44]:
bpd['over'] = overshoot

In [45]:
bpd.head()

Unnamed: 0,tn,gsr,ID,svl,beh_c,beh,ldl,ldh,ahh,ahl,...,meanC,rel_minC,rel_maxC,rel_meanC,hydev,hzdev,avg_ydev,avg_zdev,dist,over
0,38,30.901693,94.0,85.0,0,0,,,,,...,0.045896,2.469912,12.205313,5.399588,0.039948,0.056896,54.007663,19.791069,,
1,40,36.103761,94.0,85.0,0,0,,,,,...,0.034534,1.591998,5.659286,4.062786,0.024263,0.019973,41.809502,16.172113,,
2,41,38.615497,94.0,85.0,0,0,,,,,...,0.024302,1.589866,13.813496,2.859043,0.022832,0.012152,49.16831,20.005959,,
3,42,40.722447,94.0,85.0,0,0,,,,,...,0.014696,0.782665,6.411876,1.728893,0.038437,0.019935,55.43357,21.442753,,
4,43,40.833542,94.0,85.0,0,0,,,,,...,0.023598,0.710341,8.148959,2.776216,0.023422,0.015038,50.197478,22.362443,,


# Save the data

In [46]:
cwd = os.getcwd()
bpd.to_csv(cwd+'/R files/Summary Datasets/bdata.csv',index=False,na_rep='NaN')

## Statistics for Curviness

Using scipy to get initial fit; then use R to create a model that includes random factors. 

In [53]:
from scipy.optimize import curve_fit
from scipy.stats.distributions import  t

def sigmoid(x, L, x0, k, b):
    y = L / (1 + np.exp(-k*(x-x0)))+b
    return y
def residuals(x,y,p):
    L, x0, k, b = p
    return y - sigmoid(x,L,x0,k,b)

xdata = bpd.dropna()['gsr']
ydata = bpd.dropna()['rel_maxC']

plt.figure()
plt.scatter(xdata,ydata)

p0 = [max(ydata), np.median(xdata),1,min(ydata)] # this is an mandatory initial guess

pars, pcov = curve_fit(sigmoid, xdata, ydata, p0, method='dogbox')
alpha = 0.05 # 95% confidence interval = 100*(1-alpha)

n = len(ydata)    # number of data points
pn = len(pars) # number of parameters

dof = max(0, n - pn) # number of degrees of freedom

# student-t value for the dof and confidence level
tval = t.ppf(1.0-alpha/2., dof) 

par_names = ['L','x0','k','b']
print('fn: y = L / (1 + e^(-k*(x-x0)))+b')
for i, p, var in zip(range(n), pars, np.diag(pcov)):
    sigma = var**0.5
    low = p - sigma*tval
    high = p+sigma*tval
    print( '''\
    {n} = {est} [{low},  {high}]
    '''.format(n=par_names[i],est=p,low=low,high=high))

xp = np.linspace(20, 120, 500)
pxp = sigmoid(xp,pars[0],pars[1],pars[2],pars[3])

# Plot the results
plt.plot(xp, pxp, '-')
plt.xlabel('Gap Size (%SVL)')
plt.ylabel('Max Curavture (%SVL)') 
plt.grid(True)

print(np.mean(np.abs(residuals(xdata,ydata,pars))))

fn: y = L / (1 + e^(-k*(x-x0)))+b
    L = 59.438025001046256 [47.67883411862928,  71.19721588346323]
    
    x0 = 60.55142319435201 [57.04255463828075,  64.06029175042327]
    
    k = 0.118802440119745 [0.07017561694031285,  0.16742926329917715]
    
    b = 9.921649650155436 [1.7348394504020046,  18.10845984990887]
    
9.441999571180572
