In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import expipe
import pathlib
import numpy as np
import spatial_maps.stats as stats
import septum_mec.analysis.data_processing as dp
import head_direction.head as head
import spatial_maps as sp
import pnnmec.registration
import speed_cells.speed as spd
import re
import joblib
import multiprocessing
import shutil
import psutil
import pandas as pd
import matplotlib.pyplot as plt
import pnnmec
import scipy.ndimage.measurements
import quantities as pq
import exdir
from tqdm import tqdm_notebook as tqdm

11:38:51 [I] klustakwik KlustaKwik2 version 0.2.6


In [3]:
max_speed = 1 # m/s only used for speed score
min_speed = 0.02 # m/s only used for speed score
position_sampling_rate = 100 # for interpolation
position_low_pass_frequency = 6 # for low pass filtering of position

box_size = 1.0
bin_size=0.02
smoothing = 0.05

In [4]:
project_path = dp.project_path()

project = expipe.get_project(project_path)
actions = project.actions

In [5]:
identify_neurons = actions['identify-neurons']
sessions = pd.read_csv(identify_neurons.data_path('sessions'))
units = pd.read_csv(identify_neurons.data_path('units'))
session_units = pd.merge(sessions, units, on='action')

In [6]:
first_row = session_units.iloc[0]

In [7]:
output = pathlib.Path('output/shuffling')
output.mkdir(parents=True, exist_ok=True)
output_exdir = exdir.File(output / "shuffling.exdir")
output_units = output_exdir.require_group("units")

In [8]:
from elephant.spike_train_surrogates import dither_spike_train

def process(row):
    memory_in_gb = psutil.virtual_memory().available / 1024 / 1024 / 1024
    if memory_in_gb < 2:
        print("Running out of memory! Restart your kernel.")
        return
    
    action_id = row['action']
    channel_id = int(row['channel_group'])
    unit_id = int(row['unit_name'])
    
    action = actions[action_id]
    
    cell_name = "{}_{}_{}".format(action_id, channel_id, unit_id)
    
    if cell_name in output_units:
        print("Skipping existing", cell_name)
        return
    
    print("Processing", cell_name)
    
    output_group = output_units.require_group(cell_name)
    
    data_path = pathlib.Path(project_path) / "actions" / action_id / "data" / "main.exdir"

    # common values for all units == faster calculations
    x, y, t, speed = dp.load_tracking(data_path, position_sampling_rate, position_low_pass_frequency)
    ang, ang_t = dp.load_head_direction(data_path, position_sampling_rate, position_low_pass_frequency)
    sptr = dp.load_spike_train(data_path, channel_id, unit_id, t[-1])

    box_size_, bin_size_ = sp.maps._adjust_bin_size(box_size=box_size, bin_size=bin_size)
    xbins, ybins = sp.maps._make_bins(box_size_, bin_size_)
    occupancy_map = sp.maps._occupancy_map(x, y, t, xbins, ybins)
    
    smooth_occupancy_map = sp.maps.smooth_map(occupancy_map, bin_size=bin_size_, smoothing=smoothing)
    
    def calculate(spike_times):
        # common
        spike_map = sp.maps._spike_map(x, y, t, spike_times, xbins, ybins)
        smooth_spike_map = sp.maps.smooth_map(spike_map, bin_size=bin_size_, smoothing=smoothing)
        rate_map = smooth_spike_map / smooth_occupancy_map
        
        # gridness
        gridness = sp.gridness(rate_map)
        
        # border score
        fields = sp.separate_fields_by_laplace(rate_map)
        border_score = sp.border_score(rate_map, fields)
        
        # spatial information
        px = stats.prob_dist(x, y, xbins)
        information_rate = sp.information_rate(rate_map, px)
        
        # speed
        speed_score = spd.speed_correlation(
            speed, t, spike_times, min_speed=min_speed, max_speed=max_speed)
        
        # head direction
        ang_bin, ang_rate = head.head_direction_rate(spike_times, ang, ang_t)
        
        head_mean_ang, head_mean_vec_len = head.head_direction_score(ang_bin, ang_rate)
        
        statistics = {
            "gridness": gridness,
            "border_score": border_score,
            "information_rate": np.asscalar(information_rate),
            "speed_score": speed_score,
            "head_mean_ang": head_mean_ang,
            "head_mean_vec_len": head_mean_vec_len
        }
        
        return rate_map, statistics
    
    # Calculate for cell first
    rate_map, statistics = calculate(sptr)
    
    # Copy attrs from row
    attributes = row.to_dict()
    attributes.update(statistics)
    
    for key, value in attributes.items():
        if isinstance(value, (np.generic, np.ndarray)):
            attributes[key] = np.asscalar(value)
    
    output_group.attrs = attributes
    output_group['rate_map'] = rate_map
    
    # Calculate shuffled
    sample_count = 1000
    spike_trains = dither_spike_train(sptr, shift=30*pq.s, n=sample_count, edges=True)
   
    shuffling_data = []
    
    for i, spike_times in enumerate(spike_trains):
        rate_map, statistics = calculate(spike_times)
        shuffling_data.append(statistics)
        
    shuffling_data = pd.DataFrame(shuffling_data)
        
    quantiles = shuffling_data.quantile(0.95, axis=0)
    
    # TODO make it easier to create raw data in Exdir
    raw_path = output_group.root_directory / output_group.relative_path / "results"
    raw_path.mkdir(exist_ok=True)
    
    shuffling_data.to_csv(raw_path / "shuffling_data.csv", index=False)
    quantiles.to_csv(raw_path / "quantiles.csv")
    
    output_group['shuffled_rate_map_example'] = rate_map


In [9]:
# process(first_row)

# Shuffle a random sample

In [10]:
# args = []
# for index, row in session_units.sample(100, random_state=1).iterrows():
#     args.append(row)

# with multiprocessing.Pool(processes=4) as pool:
#     result = pool.map(process, args)

# Shuffle all

In [11]:
args = []
for index, row in session_units.iterrows():
    args.append(row)

with multiprocessing.Pool(processes=4) as pool:
    result = pool.map(process, args)

Skipping existing 1849-060319-3_1_104
Skipping existing 1834-120319-4_6_55
Skipping existing 1849-060319-3_1_108
Skipping existing 1849-060319-3_1_85
Skipping existing 1849-060319-3_1_94
Skipping existing 1834-120319-4_6_71
Skipping existing 1849-060319-3_1_98
Skipping existing 1849-060319-3_2_114
Skipping existing 1849-060319-3_2_130
Skipping existing 1849-060319-3_3_100
Skipping existing 1849-060319-3_3_110
Skipping existing 1834-220319-2_7_41
Skipping existing 1849-060319-3_4_120
Skipping existing 1849-220319-2_0_93
Skipping existing 1849-060319-3_5_112
Skipping existing 1834-120319-4_7_22
Skipping existing 1849-220319-2_1_99
Skipping existing 1834-120319-4_7_52
Skipping existing 1849-060319-3_7_118
Skipping existing 1849-220319-2_2_91
Skipping existing 1834-150319-4_7_23
Skipping existing 1849-060319-3_7_92
Skipping existing 1849-280219-4_0_62
Skipping existing 1849-220319-2_2_95
Skipping existing 1839-120619-4_1_104
Skipping existing 1849-280219-4_0_64
Skipping existing 1849-22031

Skipping existing 1834-150319-3_6_65
Skipping existing 1833-050619-1_4_25
Skipping existing 1834-150319-1_7_92
Skipping existing 1834-150319-3_7_27
Skipping existing 1833-050619-1_5_73
Skipping existing 1833-010719-1_3_171
Skipping existing 1839-120619-1_5_158
Skipping existing 1833-050619-1_6_111
Skipping existing 1834-150319-3_7_28
Skipping existing 1839-120619-1_6_143
Skipping existing 1833-010719-1_3_198
Skipping existing 1833-050619-1_6_60
Skipping existing 1834-150319-3_7_30
Skipping existing 1833-010719-1_3_216
Skipping existing 1833-050619-1_6_64
Skipping existing 1839-120619-1_7_152
Skipping existing 1834-150319-3_7_44
Skipping existing 1833-050619-1_6_91
Skipping existing 1833-010719-1_3_240
Skipping existing 1833-260619-2_0_174
Skipping existing 1834-150319-3_7_45
Skipping existing 1833-010719-1_5_134
Skipping existing 1833-060619-1_1_137
Skipping existing 1834-150319-3_7_49
Skipping existing 1833-010719-1_5_144
Skipping existing 1833-260619-2_0_2
Skipping existing 1833-0606

Skipping existing 1834-150319-2_7_29
Skipping existing 1839-200619-1_5_145
Skipping existing 1839-200619-1_6_139
Skipping existing 1834-110319-3_3_41
Skipping existing 1839-200619-1_7_135
Skipping existing 1834-110319-3_6_70
Skipping existing 1834-150319-2_7_30
Skipping existing 1834-110319-3_6_83
Skipping existing 1834-150319-2_7_31
Skipping existing 1834-150319-2_7_32
Skipping existing 1834-150319-2_7_37
Skipping existing 1834-110319-1_0_65
Skipping existing 1834-150319-2_7_42
Skipping existing 1834-110319-3_7_27
Skipping existing 1834-110319-1_0_0
Skipping existing 1849-110319-3_1_177
Skipping existing 1834-110319-1_4_11
Skipping existing 1834-110319-3_7_28
Skipping existing 1834-110319-1_5_14
Skipping existing 1849-110319-3_4_104
Skipping existing 1834-110319-3_7_29
Skipping existing 1849-110319-3_4_163
Skipping existing 1834-110319-1_5_16
Skipping existing 1849-110319-3_5_165
Skipping existing 1834-110319-3_7_30
Skipping existing 1839-290519-2_0_107
Skipping existing 1834-110319-1

  return self.magnitude < other


Skipping existing 1839-290519-2_7_144
Skipping existing 1833-290519-3_5_117
Skipping existing 1849-150319-2_1_44
Skipping existing 1839-060619-4_4_230


  return self.magnitude > other


Skipping existing 1839-060619-4_5_228
Skipping existing 1833-290519-3_5_139
Skipping existing 1839-060619-4_7_227
Skipping existing 1849-150319-2_1_64
Skipping existing 1839-060619-4_7_200
Skipping existing 1834-010319-5_3_11
Skipping existing 1849-150319-2_1_48
Skipping existing 1833-290519-3_5_143
Skipping existing 1849-150319-2_2_9


  return np.true_divide(other, self)


Skipping existing 1834-010319-5_3_28
Skipping existing 1833-290519-3_6_125
Skipping existing 1849-150319-2_2_62
Skipping existing 1834-010319-5_5_14


  s = (x.conj() * x).real


Skipping existing 1834-010319-5_5_15
Skipping existing 1849-150319-2_2_34
/media/storage/expipe/septum-mec/actions/1849-280219-3/data/main.exdir
-1.5894949112258199e+308 dimensionless 1.2647164869759367e+308 dimensionless
Skipping existing 1834-010319-5_6_20
Skipping existing 1849-150319-2_2_36


  return super(Quantity, self).__sub__(other)


Skipping existing 1834-010319-5_6_33
Skipping existing 1849-150319-2_3_32
Skipping existing 1834-010319-5_6_35
Skipping existing 1849-150319-2_3_38
Skipping existing 1834-010319-5_7_19
Skipping existing 1849-150319-2_4_17
Skipping existing 1849-150319-2_4_46
Skipping existing 1834-010319-5_7_21
Skipping existing 1849-150319-2_5_18
Skipping existing 1834-010319-5_7_23
Skipping existing 1849-150319-2_5_19
Skipping existing 1849-150319-2_5_22
Skipping existing 1834-010319-5_7_39
Skipping existing 1849-150319-2_5_67
Skipping existing 1833-290519-1_0_134
Skipping existing 1849-280219-2_0_88
Skipping existing 1833-290519-1_0_114
Skipping existing 1833-290519-1_0_116
Skipping existing 1834-010319-3_7_58
Skipping existing 1833-290519-1_0_132
Skipping existing 1849-280219-2_1_102
Skipping existing 1834-010319-3_7_60
Skipping existing 1849-280219-2_1_110
Skipping existing 1849-280219-2_7_42
Skipping existing 1833-290519-1_1_124
Skipping existing 1834-010319-3_7_61
Skipping existing 1833-290519-1

Skipping existing 1834-060319-1_0_71
Skipping existing 1833-120619-2_3_69
Skipping existing 1834-060319-3_7_35
Skipping existing 1849-060319-1_5_75
Skipping existing 1834-060319-3_7_37
Skipping existing 1849-060319-1_5_92
Skipping existing 1833-120619-2_4_170
Skipping existing 1834-060319-3_7_40
Skipping existing 1834-060319-1_0_72
Skipping existing 1834-060319-3_7_41
Skipping existing 1849-060319-1_6_44
Skipping existing 1833-120619-2_4_81
Skipping existing 1834-060319-1_0_73
Skipping existing 1849-060319-4_1_76
Skipping existing 1833-120619-2_5_100
Skipping existing 1849-060319-1_6_81
Skipping existing 1834-060319-1_0_74
Skipping existing 1849-060319-4_2_72
Skipping existing 1833-120619-2_5_123
Skipping existing 1849-060319-1_7_49
Skipping existing 1833-120619-2_5_133
Skipping existing 1849-060319-4_2_82
Skipping existing 1849-060319-1_7_90
Skipping existing 1849-060319-4_3_68
Skipping existing 1833-120619-2_5_145
Skipping existing 1834-060319-1_0_79
Skipping existing 1834-220319-1_0

Skipping existing 1833-260619-4_6_218
Skipping existing 1839-060619-5_6_142
Skipping existing 1833-290519-2_4_110
Skipping existing 1833-260619-4_6_80
Skipping existing 1839-060619-5_6_162
Skipping existing 1833-290519-2_4_125
Skipping existing 1833-260619-4_7_177
Skipping existing 1833-200619-2_0_283
Skipping existing 1833-200619-2_0_268
Skipping existing 1833-290519-2_4_133
Skipping existing 1839-060619-5_6_166
Skipping existing 1833-200619-2_1_278
Skipping existing 1833-290519-2_5_81
Skipping existing 1833-200619-2_2_325
Skipping existing 1833-290519-2_6_83
Skipping existing 1833-200619-2_3_257
Skipping existing 1839-060619-5_6_168
Skipping existing 1833-290519-2_6_85
Skipping existing 1833-200619-2_3_264
Skipping existing 1839-060619-5_7_183
Skipping existing 1833-290519-2_6_97
Skipping existing 1833-200619-2_4_254
Skipping existing 1839-060619-5_7_136
Skipping existing 1833-290519-2_7_111
Skipping existing 1833-200619-2_5_270
Skipping existing 1833-290519-2_7_113
Skipping existing

Skipping existing 1839-200619-2_6_89
Skipping existing 1849-280219-1_5_40
Skipping existing 1839-200619-2_6_92
Skipping existing 1839-200619-2_6_94
Skipping existing 1849-280219-1_6_106
Skipping existing 1839-200619-2_7_116
Skipping existing 1849-280219-1_6_108
Skipping existing 1839-200619-2_7_74
Skipping existing 1839-200619-2_7_90
Skipping existing 1849-280219-1_6_44
Skipping existing 1849-280219-1_6_45
Skipping existing 1833-260619-3_0_196
Skipping existing 1849-280219-1_6_96
Skipping existing 1833-260619-3_0_209
Skipping existing 1849-280219-1_6_98
Skipping existing 1833-260619-3_0_1
Skipping existing 1849-280219-1_7_52
Skipping existing 1833-260619-3_0_0
Skipping existing 1849-280219-1_7_53
Skipping existing 1833-260619-3_0_141
Skipping existing 1833-260619-3_0_180
Skipping existing 1849-280219-1_7_62
Skipping existing 1833-010719-2_0_239
Skipping existing 1833-010719-2_0_261
Skipping existing 1833-260619-3_0_194
Skipping existing 1833-260619-3_0_140
Skipping existing 1833-010719

  return self.magnitude < other
  return self.magnitude > other
  return np.true_divide(other, self)
  s = (x.conj() * x).real


/media/storage/expipe/septum-mec/actions/1833-200619-1/data/main.exdir
-1.1376529585788919e+308 dimensionless 1.6416469997615234e+308 dimensionless


  return super(Quantity, self).__sub__(other)


ValueError: Maximum allowed size exceeded

# Gather all results and statistics

In [None]:
cell_statistics_path = output / "statistics"
cell_statistics_path.mkdir(exist_ok=True)

In [None]:
all_statistics = []
for cell_id, cell in output_units.items():
    all_statistics.append(cell.attrs.to_dict())

In [None]:
df_statistics = pd.DataFrame(all_statistics)
df_statistics.head()

In [None]:
df_statistics.to_csv(cell_statistics_path / "cell_statistics.csv", index=False)

In [None]:
quantiles_95 = []
quantiles_99 = []

for cell_id, cell in output_units.items():
    results = cell['results']
    shuffling_data_path = results.root_directory / results.relative_path / "shuffling_data.csv"
    shuffling_data = pd.read_csv(shuffling_data_path)
    quantile_95 = shuffling_data.quantile(0.95, axis=0)
    quantile_99 = shuffling_data.quantile(0.99, axis=0)
    
    def add_attrs(quantile):
        quantile['action'] = cell.attrs['action']
        quantile['channel'] = cell.attrs['channel']
        quantile['unit'] = cell.attrs['unit']
    
    add_attrs(quantile_95)
    add_attrs(quantile_99)
        
    quantiles_95.append(quantile_95)
    quantiles_99.append(quantile_99)

In [None]:
pd_quantiles_95 = pd.DataFrame(quantiles_95)
pd_quantiles_99 = pd.DataFrame(quantiles_99)

In [None]:
pd_quantiles_95.head()

In [None]:
pd_quantiles_99.head()

In [None]:
pd_quantiles_95.to_csv(cell_statistics_path / "cell_quantiles_95.csv", index=False)
pd_quantiles_99.to_csv(cell_statistics_path / "cell_quantiles_99.csv", index=False)

# Quick verification of results

In [None]:
from scipy.interpolate import interp1d
def summarize(row, value):
    action_id = row['action']
    channel_id = row['channel']
    unit_id = row['unit']
    
    cell_name = "{}_{}_{}".format(action_id, channel_id, unit_id)
    cell_group = output["units"][cell_name]
    results_group = cell_group["results"]
    
    # TODO simplify this in Exdir
    shuffling_path = results_group.root_directory / results_group.relative_path / "shuffling_data.csv"
    shuffling_data = pd.read_csv(shuffling_path)
    quantiles = shuffling_data.quantile(0.95, axis=0)
    
    action = actions[action_id]
    data_path = pathlib.Path(project_path) / "actions" / action_id / "data" / "main.exdir"
    unit_path = dp.unit_path(channel_id, unit_id)
    
    x1, y1, t1, x2, y2, t2 = dp.load_leds(data_path)
    x, y, t, speed = dp.load_tracking(
        data_path, sampling_rate=position_sampling_rate, 
        low_pass_frequency=position_low_pass_frequency)
    spike_times = dp.load_spike_train(data_path, unit_path, t)
    
    title = "{}\n{}: {:.2f}\n(threshold: {:.2f})".format(cell_name, value, row[value], quantiles.T[value])
    
    if value not in ["head_mean_vec_len", 'speed_score']:
        spatial_map = sp.SpatialMap(x, y, t, spike_times, box_size=1.0, bin_size=0.02)
        rate_map = spatial_map.rate_map(0.03)
        plt.imshow(rate_map)
        
#         plt.plot(x, y, alpha=.5, color='grey')
#         plt.xticks([])
#         plt.yticks([])
#         sx = interp1d(t, x)(spike_times)
#         sy = interp1d(t, y)(spike_times)
#         plt.scatter(sx, sy, color='r', s=1)
#         plt.xlim(0,1)
#         plt.ylim(0,1)
#         plt.gca().set_aspect(1)
    elif value == "head_mean_vec_len":
        ang, ang_t = head.head_direction(x1, y1, x2, y2, t1)
        ang_bin, ang_rate = head.head_direction_rate(spike_times, ang, ang_t)
        head_mean_ang, head_mean_vec_len = head.head_direction_score(ang_bin, ang_rate)
        plt.plot(ang_bin, ang_rate)
        title = title + '\n'
    else:
        binsize = 0.02
        speed_score, inst_speed, rate, times = spd.speed_correlation(
            speed, t, spike_times, return_data=True)
        speed_bins = np.arange(min_speed, max_speed + binsize, binsize)
        ia = np.digitize(inst_speed, bins=speed_bins, right=True)
        mean_rate = np.zeros_like(speed_bins)
        for i in range(len(speed_bins)):
            mean_rate[i] = np.mean(rate[ia==i])
        
        plt.plot(speed_bins, mean_rate)
        aspect = (max_speed - min_speed) / (np.nanmax(mean_rate) - np.nanmin(mean_rate))
        plt.gca().set_aspect(aspect)
        
    plt.title(title)

def top(value):
    projection = 'polar' if value == 'head_mean_vec_len' else None
    plt.figure(figsize=(14,26))
    top = df_statistics.sort_values(by=value, ascending=False).head(30)
    counter = 1
    for index, row in top.iterrows():
        plt.subplot(6, 5, counter, projection=projection)
        summarize(row, value)
        counter += 1
#     plt.tight_layout()

# Top gridness

In [None]:
top("gridness")

# Top border score

In [None]:
top("border_score")

# Top spatial information

In [None]:
top("information_rate")

# Top head direction

In [None]:
top("head_mean_vec_len")

# Top speed score

In [None]:
top("speed_score")

# Store results in Expipe action

In [None]:
shuffling_action = project.require_action("shuffling")

In [None]:
shuffling_action.data["shuffling"] = "shuffling.exdir"

In [None]:
shutil.copytree(output, shuffling_action.data_path())

In [None]:
septum_mec.registration.store_notebook(
    shuffling_action, "10-shuffling.ipynb")