In [4]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat

### a. Load data

In [5]:
folder = '/Users/daiyiluo/Downloads/KQ095_210205' #KQ086_210110' #

# load behavior data
data = np.load(f'{folder}/S.npz')
spd = data['spd']
dis = data['dis']
lk = data['lk']

del data

spd.shape, dis.shape, lk.shape

((96593,), (96593,), (96593,))

In [13]:
# load neural activity data
spks = np.load(f'{folder}/spk.npz')['spks']
spks = np.round(spks*50)

data = loadmat(f'{folder}/dFishQ_raw.mat')
data.keys() # check the name of the variable

dict_keys(['__header__', '__version__', '__globals__', 'B'])

In [7]:
key = 'B' # 'B' for CA1 data, 'dFishQ_raw' for CA3 data
traces = data[key]
del data
print(f'Shape of calcium traces: {traces.shape}')

Shape of calcium traces: (96593, 219)


### b. Sort into time bin

In [8]:
new_tb = 334  # 2Hz: 500ms
ratio = int(30*new_tb/1000)
ratio

10

In [18]:
# Sample speed
avgspd = np.convolve(spd, np.ones(ratio)/ratio, mode='valid')
avgspd = avgspd[::ratio]

# Sample position
idx_dis = np.arange(round(ratio/2), len(dis)-round(ratio/2), ratio)
avgdis = dis[idx_dis]

avgspd.shape, avgdis.shape

((9659,), (9659,))

---------- Trace -----------

In [11]:
# Sample traces
avgtrc = []
for i in range(traces.shape[1]):
    t = np.convolve(traces[:,i], np.ones(ratio)/ratio, mode='valid')
    avgtrc.append(t[::ratio])    
avgtrc = (np.vstack(avgtrc)).T

avgtrc.shape

(9659, 219)

---------- Spike -----------

In [14]:
# Sample spks
avgspk = []
for i in range(spks.shape[1]):
    t = np.convolve(spks[:,i], np.ones(ratio)/ratio, mode='valid')
    avgspk.append(t[::ratio]*ratio)    
avgspk = (np.vstack(avgspk)).T

avgspk.shape

(9659, 219)

### c. Exclude immobile time bins 

In [15]:
spd_thr = 0.02 # 0.02 for 1s tb, 0.03 for 200/500 ms
pass_thr = np.where(avgspd>spd_thr)[0]
%matplotlib
plt.plot(avgspd)
plt.plot(pass_thr,avgspd[pass_thr],'*')
plt.plot([0,len(avgspd)], [spd_thr,spd_thr])

Using matplotlib backend: MacOSX


[<matplotlib.lines.Line2D at 0x7fbb837645e0>]

In [19]:
Distance = avgdis[avgspd>spd_thr]
idx = idx_dis[avgspd>spd_thr] # for licking data because it is not sorted into 500ms time bins
Distance.shape, idx.shape

((2780,), (2780,))

---------- Trace -----------

In [20]:
Trace = avgtrc[avgspd>spd_thr,:]
Trace.shape

(2780, 219)

---------- Spike -----------

In [21]:
Spike = avgspk[avgspd>spd_thr,:]
Spike.shape

(2780, 219)

In [None]:
# Show all spike data

for i in range(Spike.shape[1]):
    plt.plot(Spike[:,i]/np.max(Spike[:,i])+i)

### d. Sort into spatial bins and Locate laps

In [22]:
nbin = 50 # expected number of spatial bins
n = np.floor(1/(avgdis.max()/nbin))
Distance0 = np.round(Distance*n)/n # round the distance
print(f'Number of spatial bins: {np.unique(Distance0).shape}\nLength of data: {Distance0.shape}')

Number of spatial bins: (50,)
Length of data: (2780,)


In [23]:
Distance = Distance0

d = np.diff(Distance)
lap_end = np.where(d<-1.6)[0] # Locate the point where the laps end
Distance = Distance[lap_end[0]:]
idx = idx[lap_end[0]:]
f=lap_end[0]
print(f'Length of data after excluded the incomplete first lap: {Distance.shape}\nWhere the first complete lap starts: {f}')

Length of data after excluded the incomplete first lap: (2716,)
Where the first complete lap starts: 64


---------- Trace -----------

In [24]:
Trace = Trace[f:]
lap_end = lap_end-lap_end[0]
np.savez(f'{folder}/timebin_{new_tb}.npz', Trace=Trace, Distance=Distance, lap_end=lap_end, idx=idx)

Trace.shape, Distance.shape, idx.shape

((2716, 219), (2716,), (2716,))

---------- Spike -----------

In [None]:
Spike = Spike[f:]
np.save(f'{folder}/timebin_{new_tb}_spks.npy', Spike)

Spike.shape, Distance.shape, idx.shape