## Load necessary libraries and paths

In [None]:
import os
import sys
sys.path.append('../../swdb_2019_tools')
import spikeutils
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.ndimage.filters import gaussian_filter1d
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache
from allensdk.brain_observatory.ecephys import ecephys_session
%matplotlib inline

# fix slow autocomplete
%config Completer.use_jedi = False

import platform
platstring = platform.platform()

if 'Darwin' in platstring:
    # OS X 
    data_root = "/Volumes/Brain2019/"
elif 'Windows'  in platstring:
    # Windows (replace with the drive letter of USB drive)
    data_root = "E:/"
elif ('amzn1' in platstring):
    # then on AWS
    data_root = "/data/"
else:
    # then your own linux platform
    # EDIT location where you mounted hard drive
    data_root = "/media/$USERNAME/Brain2019/"

manifest_path = os.path.join(data_root,
                             "dynamic-brain-workshop/visual_coding_neuropixels/2019/manifest.json")
cache = EcephysProjectCache.fixed(manifest=manifest_path)
sessions = cache.get_sessions()

from scipy.interpolate import RegularGridInterpolator

## Choose a brain region. This will acquire all sessions with the given brain region
> ### Brain Region Acronyms: CA, DG, MB, TH, VISal, VISam, VISl, VISp, VISpm, VISrl, None

In [None]:
# Choose all the sessions that have the given brain region
brainRegion = 'MB'
sessionsToUse = sessions[[np.isin(brainRegion, x) for x in sessions.structure_acronyms]]
sessionsToUse

## Get sample session from above sessions and bin/smooth all cells from specified brain region to see output

In [None]:
# Get sample session to try binning and smoothing on
session = cache.get_session_data(sessionsToUse.index[0])
units = session.units
regionUnits = units[units.structure_acronym == brainRegion]
spike_times = session.spike_times
regionSpikes = {unit : spike_times[unit] for unit in regionUnits.index}
unitNames = np.asarray([n for n in regionSpikes.keys()])
toBin = [sp for sp in regionSpikes.values()]
startTime = 0
endTime = np.max([np.max(x) for x in spike_times.values()]) # Last spike across all cells in this session
binsize = 1000
binned = spikeutils.spiketimes_to_2D_rates(toBin, startime=startTime, stoptime=endTime, binsize=binsize)
sigma = 1
smoothed = np.zeros(binned.shape)
for cell in range(binned.shape[0]):
    smoothed[cell,:] = gaussian_filter1d(binned[cell,:], sigma=sigma) # Smoothed firing rate signal

# Plot before and after smoothing
fig, ax = plt.subplots(nrows=2,ncols=1,figsize=(18,10))
for i in range(5):
    maxVal = np.max([smoothed.max(), binned.max()])
    ax[0].plot(binned[i,:] + i*maxVal)
    ax[0].set_title('Before Smoothing', fontsize=20)
    ax[1].plot(smoothed[i,:] + i*maxVal)
    ax[1].set_title('After Smoothing', fontsize=20)

## Batch process spike times for all the sessions with the given brain region and create firing rates
> ### Specify whether you would like the tracesToSave to be 'binned', 'smoothed', or 'normalized'
- #### *binned*: binned based on binsize
- #### *smoothed*: binned as above and further smoothed based on sigma
- #### *normalized*: binned, smoothed, and then noramlized to maximum of 1

In [None]:
tracesToSave = 'binned'
startTime = 0
endTime = np.max([np.max(x) for x in spike_times.values()]) # Last spike of all cells in this session
binsize = 1000
sigma = 1

In [None]:
for sesh in sessionsToUse.index:
    session = cache.get_session_data(sesh)
    units = session.units
    regionUnits = units[units.structure_acronym == brainRegion]
    spike_times = session.spike_times
    spike_times = {unit : spike_times[unit] for unit in regionUnits.index}
    unitNames = np.asarray([n for n in spike_times.keys()])
    toBin = [sp for sp in spike_times.values()]
    
    cellType = []
    # Create RSU / FSU split
    for i in unitNames:
        if regionUnits[regionUnits.index.values == i].waveform_duration.values[0] < 0.4:
            cellType = np.append(cellType, 'FSU')
        else:
            cellType = np.append(cellType, 'RSU')
    cellTypeDF = pd.DataFrame(data={'CellType': cellType}, index=unitNames.astype(int))
    
    # Bin spike times to create firing rates
    startTime = startTime
    endTime = endTime
    binsize = binsize
    binned = spikeutils.spiketimes_to_2D_rates(toBin, startime=startTime,
                                               stoptime=endTime, binsize=binsize)
    
    # Smooth firing rates
    sigma = sigma
    smoothed = np.zeros(binned.shape)
    for cell in range(binned.shape[0]):
        smoothed[cell,:] = gaussian_filter1d(binned[cell,:], sigma=sigma)

    # Normalize firing rates to max of 1
    normalized = smoothed.copy()
    for cell in range(smoothed.shape[0]):
        maxVal = np.max(smoothed[cell,:])
        if maxVal != 0:
            normalized[cell,:] = smoothed[cell,:] / maxVal
    # Save
    if tracesToSave == 'binned':
        unitsToOutput = pd.concat([cellTypeDF, pd.DataFrame(binned, index=unitNames.astype(int))], axis=1)
        unitsToOutput.to_csv(('./buzsaki_data/' + brainRegion + '_firingrates' + '_session_' + str(sesh) + "_binned" +
                      str(startTime) + "to" + str(endTime) + "Secs" + str(binsize) + "binsize" + '.csv'))
    elif tracesToSave == 'smoothed':
        unitsToOutput = pd.concat([cellTypeDF, pd.DataFrame(smoothed, index=unitNames.astype(int))], axis=1)
        unitsToOutput.to_csv(('./buzsaki_data/' + brainRegion + '_firingrates' + '_session_' + str(sesh) + "_binned" +
                              str(startTime) + "to" + str(endTime) + "Secs" + str(binsize) + "binsize" + '_GaussianSmoothed_sigma' + str(sigma) + '.csv'))
    elif tracesToSave == 'normalized':
        unitsToOutput = pd.concat([cellTypeDF, pd.DataFrame(normalized, index=unitNames.astype(int))], axis=1)
        unitsToOutput.to_csv(('./buzsaki_data/' + brainRegion + '_firingrates' + '_session_' + str(sesh) + "_binned" +
                              str(startTime) + "to" + str(endTime) + "Secs" + str(binsize) + "binsize" + '_GaussianSmoothed_sigma' + str(sigma) + 'normalized' + '.csv'))
    else:
        raise Exception('Did not appropriately specify which data to save in tracesToSave. This value should be binned, smoothed, or normalized as a string.')

## Batch process stimulus epochs and running times
> ### Saves labels (stimulus epochs as ints), labelNames (stimulus epochs as strings), and runningSpeed (vector of floats)

In [None]:
for sesh in sessionsToUse.index:
    session = cache.get_session_data(sesh)
    stim_epochs = session.get_stimulus_epochs()
    epochlabels, epochuniques = pd.factorize(stim_epochs.stimulus_name)
    #assign labels
    numlabels = len(np.unique(stim_epochs.stimulus_name))
    numepochs = len(stim_epochs.start_time)
    labels = np.zeros((int(endTime*1000/binsize)))
    for i in range(numepochs):
        epochstart = (stim_epochs.start_time[i]*1000)/binsize
        epochend = (stim_epochs.stop_time[i]*1000)/binsize
        stimname = stim_epochs.stimulus_name[i]
        labels[int(epochstart):int(epochend)] = np.where(epochuniques==stimname)[0][0]
    epochmapping = {key:value for key, value in zip(np.arange(len(epochuniques)), epochuniques)}
    labels = labels.astype(int)
    labels_asNames = ['x']
    for i in labels:
        labels_asNames.append(epochmapping[i])
    labels_asNames = labels_asNames[1:]
    
    running_speed = session.running_speed
    runDiffTimes = np.asarray([(e - s)/2 + s for s,e in zip(running_speed.start_time,
                                                            running_speed.end_time)])
    interpolator = RegularGridInterpolator([runDiffTimes],
                                           running_speed.velocity.values)
    runTimeSeries = np.append(np.full(int(np.amin(runDiffTimes)), np.nan),
                              interpolator(np.arange(np.amin(runDiffTimes),np.amax(runDiffTimes),1)))
    description = "CA_stim_epochs_nums"
    np.savetxt(('./buzsaki_data/' + brainRegion + '_labels' + '_session_' + str(sesh) + "_binned" + str(startTime) + "to" + str(endTime) + "Secs" + str(binsize) + "binsize" + '.csv'),
               labels, delimiter=',', fmt='%s')
    np.savetxt(('./buzsaki_data/' + brainRegion + '_labelNames' + '_session_' + str(sesh) + "_binned" + str(startTime) + "to" + str(endTime) + "Secs" + str(binsize) + "binsize" + '.csv'),
               labels_asNames, delimiter=',', fmt='%s')
    np.savetxt(('./buzsaki_data/' + brainRegion + '_runningSpeed' + '_session_' + str(sesh) + "_binned" + str(startTime) + "to" + str(endTime) + "Secs" + str(binsize) + "binsize" + '.csv'),
               runTimeSeries, delimiter=',', fmt='%s')