In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline

In [4]:
import os
import expipe
import pathlib
import numpy as np
import spatial_maps.stats as stats
import pnnmec.data_processing as dp
import head_direction.head as head
import spatial_maps as sp
import pnnmec.registration
import speed_cells.speed as spd
import pnnmec.spikes as spikes
#import pnnmec.version_control as vc
import re
import joblib
import multiprocessing
import shutil
import psutil
import pandas as pd
import matplotlib.pyplot as plt
import pnnmec
import scipy.ndimage.measurements
# Progress bars for pandas
from tqdm import tqdm
tqdm.pandas()

In [5]:
max_speed = 100, # m/s only used for speed score
min_speed = 2, # m/s only used for speed score
position_sampling_rate = 100 # for interpolation
position_low_pass_frequency = 6 # for low pass filtering of position

box_size = 1.0
bin_size = 0.02
smoothing_low = 0.03
smoothing_high = 0.05

Make sure to set `CHARLOTTE_PNN_MEC_DATA` in your environment before running `jupyter notebook`:

```
export CHARLOTTE_PNN_MEC_DATA=/path/to/data
jupyter notebook
```

Then you can continue running the following cell:

In [None]:
project_path = dp.project_path()

project = expipe.get_project(project_path)

# Create action for this notebook


In [None]:
field_variability = project.require_action("field_variability")

# Load related sessions action

In [None]:
relate_sessions = project.actions['relate_sessions']
relate_sessions.data['same-day-familiar']

In [None]:
relate_sessions = project.actions['relate_sessions']

# TODO create helper function that does this
data_path = pathlib.Path(project_path) / "actions" / relate_sessions.id / "data" / relate_sessions.data['same-day-familiar']
data_path

In [None]:
table = pd.read_csv(data_path)
table.head()

In [None]:
table = table[~table['session_1'].str.contains("z:")]
table.head()

In [None]:
data_cache = {}

In [None]:
def load_data(action_id, channel_id, unit_id):
    global data_cache
    unique_id = hash((action_id, channel_id, unit_id))
    
    data_path = pathlib.Path(project_path) / "actions" / action_id / "data" / "main.exdir"
    unit_path = "/processing/electrophysiology/channel_group_{}/UnitTimes/{}".format(
        channel_id,                                                          
        unit_id
    )
    print("Loading", data_path, channel_id, unit_id)
    if unique_id in data_cache:
        print("Using result from cache")
        return data_cache[unique_id]
    
#     unit_path = "{}/{}/".format(channel_id, unit_id)
#     position_path = "position_data/"
    
    root_group = exdir.File(data_path, plugins=[exdir.plugins.quantities,
                                                exdir.plugins.git_lfs.Plugin(verbose=True)])
    unit_group = root_group[unit_path]

    # tracking data
    position_group = root_group['processing']['tracking']['camera_0']['Position']
    stop_time = position_group.attrs.to_dict()["stop_time"]
    # stop_time = 100
    # print(stop_time)
    # stop_time = 100. # TODO remove
    def get_raw_position(spot_group):
        coords = spot_group["data"]
        t = spot_group["timestamps"].data
        x = coords[:, 0]
        y = coords[:, 1]
        return x, y, t
    
    x1, y1, t1 = tr.get_raw_position(position_group['led_0'])
    x2, y2, t2 = tr.get_raw_position(position_group['led_1'])
    x, y, t = tr.select_best_position(x1, y1, t1, x2, y2, t2)
    x, y, t = tr.interp_filt_position(x, y, t, pos_fs=par['pos_fs'], f_cut=par['f_cut'])
    mask = t <= stop_time
    x = x[mask]
    y = y[mask]
    t = t[mask]
    
    dt = np.mean(np.diff(t))
    vel = np.gradient([x,y],axis=1)/dt
    speed = np.linalg.norm(vel,axis=0)

    # spiketrain data
    sptr_group = unit_group 
    metadata = {}
    times = pq.Quantity(sptr_group['times'].data,
                        sptr_group['times'].attrs['unit'])
    t_stop = sptr_group.parent.attrs['stop_time']
    t_start = sptr_group.parent.attrs['start_time']
    metadata.update(sptr_group['times'].attrs.to_dict())
    metadata.update({'exdir_path': str(data_path)})
    sptr = neo.SpikeTrain(times=times,
                      t_stop=t_stop,
                      t_start=t_start,
                      waveforms=None,
                      sampling_rate=None,
                      **metadata)
    
    data_cache[unique_id] = (x, y, t, speed, sptr)
    return x, y, t, speed, sptr

# first_row = table.iloc[0]
first_row = table[table['session_1'] == "1528-140115-01"][table['group'] == 3][table['unit'] == 4].iloc[0]

x, y, t, speed, sptr = load_data(first_row['session_1'], first_row['group'], first_row['unit'])

plt.plot(x, y)

In [None]:
def calculate_rate_map(x, y, t, speed, sptr):
    rate_map = tr.spatial_rate_map(x, y, t, sptr, 
                                   binsize=par['spat_binsize'],
                                   smoothing=0.06,
                                   mask_unvisited=False, 
                                   convolve = True)
    
    #rate_map = sp.rate_map(x, y, t, sptr, box_size=1.0, bin_size=0.03, smoothing=5)
    
    return rate_map
    
rate_map = calculate_rate_map(x, y, t, speed, sptr)
plt.imshow(rate_map.T, origin='lower')

In [None]:
def calculate_rate_map(x, y, t, speed, sptr):    
#     rate_map = sp.rate_map(x, y, t, sptr, box_size=1.0, bin_size=0.02, smoothing=5)

    spatial_map = sp.SpatialMap(x, y, t.magnitude, sptr, box_size=1.0, bin_size=0.02)
    rate_map = spatial_map.rate_map(smoothing=0.05)
    
    return rate_map
    
rate_map = calculate_rate_map(x, y, t, speed, sptr)
plt.imshow(rate_map.T, origin='lower')

## Calculate autocorrelation

In [None]:
def calculate_autocorrelation(rate_map):
    return exana.misc.tools.fftcorrelate2d(rate_map, rate_map, mode='full', normalize=True)

autocorrelation = calculate_autocorrelation(rate_map)

plt.imshow(autocorrelation.T, origin='lower')

## Find autocorrelation maxima and place field radius

In [None]:
import scipy.ndimage as ndimage
import scipy.ndimage.filters as filters

def find_maxima(image):
    image_max = filters.maximum_filter(image, 3)
    is_maxima = (image == image_max)
    labels, num_objects = ndimage.label(is_maxima)
    indices = np.arange(1, num_objects+1)
    maxima = ndimage.maximum_position(image, labels=labels, index=indices)
    maxima = np.array(maxima)
    return maxima

maxima = find_maxima(autocorrelation)

plt.imshow(autocorrelation.T, origin="lower")
plt.scatter(maxima[:, 0], maxima[:, 1], color="red")
plt.show()

In [None]:
def place_field_radius(auto_correlation, maxima):
    map_size = np.array(auto_correlation.shape)
    center = map_size / 2
    distances = np.linalg.norm(maxima - center, axis=1)
    distances_sorted = sorted(distances)
    min_distance = distances_sorted[1] # the first one is basically the center
    return 0.7 * min_distance / 2 # 0.7 because that is what Ismakov et al. used

radius = place_field_radius(autocorrelation, maxima)
radius

## Find rate map maxima

In [None]:
def plot_fields(rate_map, rate_map_maxima, radius):
    plt.imshow(rate_map.T, origin="lower")
    ax = plt.gca()
    
    for point in rate_map_maxima:
        ax.add_artist(plt.Circle(point, radius, edgecolor="r", facecolor="#ff000022"))
        ax.add_artist(plt.Circle(point, 0.6, color="r"))

rate_map_maxima = find_maxima(rate_map)

plot_fields(rate_map, rate_map_maxima, radius)
plt.show()

## Remove fields that are too close to each other

We take the distance between each field pair.
When encountering a pair where the distance is smaller than the `place_field_radius`, we add it to a list of pairs to investigate.
For each such pair, we remove the field with the lowest firing rate.
This will leave the highest one when there are three or more fields close to each other.
This is not ideal and should be addressed, but we leave it as is to reproduce the method of Ismakov et al.

In [None]:
np.linalg.norm(np.array([40, 23]) - np.array([46, 17]))

In [None]:
import scipy.spatial as spatial

# TODO verify this for an example where there are fields too close
def too_close_removed(rate_map, rate_map_maxima, place_field_radius):
    result = []
    rate_map_maxima_value = rate_map[tuple(rate_map_maxima.T)]
    distances = spatial.distance.cdist(rate_map_maxima, rate_map_maxima)
    too_close_pairs = np.where(distances < place_field_radius*2)
    not_accepted = []
    
    for i, j in zip(*too_close_pairs):
        if i == j:
            continue
            
        if rate_map_maxima_value[i] > rate_map_maxima_value[j]:
            not_accepted.append(j)
        else:
            not_accepted.append(i)
        
    for i in range(len(rate_map_maxima)):
        if i in not_accepted:
            continue
        
        result.append(rate_map_maxima[i])
        
    return np.array(result)

rate_map_maxima_filtered = too_close_removed(rate_map, rate_map_maxima, radius)

plt.imshow(rate_map.T, origin="lower")
plot_fields(rate_map, rate_map_maxima_filtered, radius)
plt.show()

## Remove fields that are too small in value in comparison to max

NOTE: This is not necessarily used.

In [None]:
def too_small_removed(rate_map, maxima, threshold=0.5):
    result = []
    maxima_values = rate_map[tuple(maxima.T)]
    result = maxima[np.where(maxima_values > maxima_values.mean() * threshold)]
    return result

rate_map_maxima_filtered_2 = too_small_removed(rate_map, rate_map_maxima_filtered)

plt.imshow(rate_map.T, origin="lower")
plot_fields(rate_map, rate_map_maxima_filtered_2, radius)
plt.show()


## Find the peak rates at the remaining indices

In [None]:
rate_map_maxima

In [None]:
rate_map_maxima_value = rate_map[tuple(rate_map_maxima_filtered.T)]  # picks values in points
rate_map_maxima_value

## Calculate the CV

In [None]:
def find_cv(in_array):
    SD = np.std(in_array)
    mean = np.mean(in_array)
    CV = SD/mean
    return CV

In [None]:
field_cv = find_cv(rate_map_maxima_value)
field_cv

## Verify rate maps

To manually verify that the rate maps look reasonable, we plot each one:

In [None]:
def plot_maxima(row, session_id):
    action_id = row[session_id]
    group = row['group']
    unit = row['unit']
    x, y, t, speed, sptr = load_data(action_id, group, unit)
    rate_map = calculate_rate_map(x, y, t, speed, sptr)
    autocorrelation = calculate_autocorrelation(rate_map)
    autocorrelation_maxima = find_maxima(autocorrelation)
    radius = place_field_radius(autocorrelation, autocorrelation_maxima)
    rate_map_maxima = find_maxima(rate_map)
    rate_map_maxima_filtered = too_close_removed(rate_map, rate_map_maxima, radius)
    
    maxima_count = len(rate_map_maxima)
    filtered_maxima_count = len(rate_map_maxima_filtered)
    
    print("Radius is {}".format(radius))
    
    print("Filtered {} of {} maxima".format(
        len(rate_map_maxima) - len(rate_map_maxima_filtered),
        len(rate_map_maxima_filtered))
    )
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 3, 1)
    plt.imshow(rate_map.T, origin="lower")
    
    plt.subplot(1, 3, 2)
    plot_fields(rate_map, rate_map_maxima_filtered, radius)
    
    plt.subplot(1, 3, 3)
    plt.imshow(autocorrelation.T, origin="lower")
    plt.show()

print("Session 1:")

table.apply(lambda row: plot_maxima(row, 'session_1'), axis=1)

print("Session 2:")

table.apply(lambda row: plot_maxima(row, 'session_2'), axis=1)

print("Done!")

In [None]:
def row_cv(row, session_id):
    action_id = row[session_id]
    group = row['group']
    unit = row['unit']
    x, y, t, speed, sptr = load_data(action_id, group, unit)
    rate_map = calculate_rate_map(x, y, t, speed, sptr)
    autocorrelation = calculate_autocorrelation(rate_map)
    maxima = find_maxima(autocorrelation)
    radius = place_field_radius(autocorrelation, maxima)
    rate_map_maxima = find_maxima(rate_map)
    rate_map_maxima_filtered = too_close_removed(rate_map, rate_map_maxima, radius)
    rate_map_maxima_value = rate_map[tuple(rate_map_maxima_filtered.T)]
    field_cv = find_cv(rate_map_maxima_value)
    return field_cv

table['cv_1'] = table.apply(lambda row: row_cv(row, 'session_1'), axis=1)
table['cv_2'] = table.apply(lambda row: row_cv(row, 'session_2'), axis=1)

In [None]:
table['cv_diff'] = table['cv_2'] - table['cv_1']
table.head()

In [None]:
from scipy.stats import ttest_ind

control = table[table['Control'] == 1]
chabc = table[table['Control'] == 0]

print(control['cv_1'].mean(), control['cv_1'].std())
print(chabc['cv_1'].mean(), chabc['cv_1'].std())
print(ttest_ind(control['cv_1'], chabc['cv_1']))

print(control['cv_diff'].mean(), control['cv_diff'].std())
print(chabc['cv_diff'].mean(), chabc['cv_diff'].std())
print(ttest_ind(control['cv_diff'], chabc['cv_diff']))

## Calculating correlation coefficient within one session

In [None]:
def cv_first_vs_second_half(row, session_id):
    action_id = row[session_id]
    group = row['group']
    unit = row['unit']
    x, y, t, speed, sptr = load_data(action_id, group, unit)
    
    def split(arr):
        midpoint = int(len(x) / 2)
        return arr[:midpoint], arr[midpoint:]
    
    x1, x2 = split(x)
    y1, y2 = split(y)
    t1, t2 = split(t)
    speed1, speed2 = split(speed)
    
    rate_map = calculate_rate_map(x, y, t, speed, sptr)
    rate_map1 = calculate_rate_map(x1, y1, t1, speed1, sptr)
    rate_map2 = calculate_rate_map(x2, y2, t2, speed2, sptr)
    
    # NOTE: Ismakov et al. uses only the first half to find the "zone mat"
    autocorrelation = calculate_autocorrelation(rate_map)
    autocorrelation_maxima = find_maxima(autocorrelation)
    radius = place_field_radius(autocorrelation, autocorrelation_maxima)
    rate_map_maxima = find_maxima(rate_map)
    rate_map_maxima_filtered = too_close_removed(rate_map, rate_map_maxima, radius)
    
    maxima_count = len(rate_map_maxima)
    filtered_maxima_count = len(rate_map_maxima_filtered)
    
    print("Radius is {}".format(radius))
    
    print("Filtered {} of {} maxima".format(
        len(rate_map_maxima) - len(rate_map_maxima_filtered),
        len(rate_map_maxima_filtered))
    )
    
    rate_map_maxima_value1 = rate_map1[tuple(rate_map_maxima_filtered.T)]
    rate_map_maxima_value2 = rate_map2[tuple(rate_map_maxima_filtered.T)]
    field_cv1 = find_cv(rate_map_maxima_value1)
    field_cv2 = find_cv(rate_map_maxima_value2)
    
    return field_cv1, field_cv2
    
list(cv_first_vs_second_half(first_row, "session_1"))

In [None]:
first_vs_second = table.apply(lambda row: pd.Series(cv_first_vs_second_half(row, 'session_1')), axis=1)
table['cv_1_first'], table['cv_1_second'] = first_vs_second[0], first_vs_second[1]

In [None]:
table["cv_1_first_second_diff"] = table["cv_1_second"] - table["cv_1_first"]

control = table[table['Control'] == 1]
chabc = table[table['Control'] == 0]

print(control['cv_1_first_second_diff'].mean(), control['cv_1_first_second_diff'].std())
print(chabc['cv_1_first_second_diff'].mean(), chabc['cv_1_first_second_diff'].std())
print(ttest_ind(control['cv_1_first_second_diff'], chabc['cv_1_first_second_diff']))

# Plot the firing rates against each other and calculate r 

In [None]:
def r_first_vs_second_half(row, session_id):
    action_id = row[session_id]
    group = row['group']
    unit = row['unit']
    x, y, t, speed, sptr = load_data(action_id, group, unit)
    
    def split(arr):
        midpoint = int(len(x) / 2)
        return arr[:midpoint], arr[midpoint:]
    
    x1, x2 = split(x)
    y1, y2 = split(y)
    t1, t2 = split(t)
    speed1, speed2 = split(speed)
    
    rate_map = calculate_rate_map(x, y, t, speed, sptr)
    rate_map1 = calculate_rate_map(x1, y1, t1, speed1, sptr)
    rate_map2 = calculate_rate_map(x2, y2, t2, speed2, sptr)
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 3, 1)
    plt.title("All")
    plt.imshow(rate_map.T, origin="lower")
    plt.subplot(1, 3, 2)
    plt.title("First half")
    plt.imshow(rate_map1.T, origin="lower")
    plt.subplot(1, 3, 3)
    plt.title("Second half")
    plt.imshow(rate_map2.T, origin="lower")
    
    # NOTE: Ismakov et al. uses only the first half to find the "zone mat"
    autocorrelation = calculate_autocorrelation(rate_map)
    autocorrelation_maxima = find_maxima(autocorrelation)
    radius = place_field_radius(autocorrelation, autocorrelation_maxima)
    rate_map_maxima = find_maxima(rate_map)
    rate_map_maxima_filtered = too_close_removed(rate_map, rate_map_maxima, radius)
    #rate_map_maxima_filtered = too_small_removed(rate_map, rate_map_maxima_filtered)
    
    maxima_count = len(rate_map_maxima)
    filtered_maxima_count = len(rate_map_maxima_filtered)
    
    print("Radius is {}".format(radius))
    
    print("Filtered {} of {} maxima".format(
        len(rate_map_maxima) - len(rate_map_maxima_filtered),
        len(rate_map_maxima_filtered))
    )
    
    rate_map_maxima_value1 = rate_map1[tuple(rate_map_maxima_filtered.T)]
    rate_map_maxima_value2 = rate_map2[tuple(rate_map_maxima_filtered.T)]

    reg = np.polyfit(rate_map_maxima_value1, rate_map_maxima_value2, 1)
    p = np.poly1d(reg)
    
    x = np.linspace(0, rate_map_maxima_value1.max(), 10)
    
    plt.figure()
    plt.scatter(rate_map_maxima_value1, rate_map_maxima_value2)
    plt.plot(x, p(x))
    plt.show()
    
    return p[1]
    
r_first_vs_second_half(first_row, "session_1")

In [None]:
table['r_first_vs_second_1'] = table.apply(lambda row: r_first_vs_second_half(row, 'session_1'), axis=1)
table['r_first_vs_second_2'] = table.apply(lambda row: r_first_vs_second_half(row, 'session_2'), axis=1)

## Is the r-value different from zero

If yes, then there is a stable pattern in the firing rate of the fields.

In [None]:
control = table[table['Control'] == 1]
chabc = table[table['Control'] == 0]

# Combine data from all sessions
# TODO this is cumbersome because we use relate_sessions - we could make this cleaner
control_r = pd.concat([control["r_first_vs_second_1"],
                      control["r_first_vs_second_2"]])

chabc_r = pd.concat([chabc["r_first_vs_second_1"],
                    chabc["r_first_vs_second_2"]])

In [None]:
print(ttest_1samp(control_r, 0.0))
print(ttest_1samp(chabc_r, 0.0))

## Are the r-values for the groups significantly different?

In [None]:
print("r_first_vs_second control mean", control_r.mean())
print("r_first_vs_second chabc mean", chabc_r.mean())

print(ttest_ind(control_r, chabc_r))

# Calculate r across sessions

In [None]:
def r_two_sessions(row, session_id_1, session_id_2):
    action_id_1 = row[session_id_1]
    action_id_2 = row[session_id_2]
    group = row['group']
    unit = row['unit']
    
    x1, y1, t1, speed1, sptr1 = load_data(action_id_1, group, unit)
    x2, y2, t2, speed2, sptr2 = load_data(action_id_2, group, unit)
    
    rate_map1 = calculate_rate_map(x1, y1, t1, speed1, sptr1)
    rate_map2 = calculate_rate_map(x2, y2, t2, speed2, sptr2)
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 3, 1)
    plt.title("First session")
    plt.imshow(rate_map1.T, origin="lower")
    plt.subplot(1, 3, 2)
    plt.title("Second session")
    plt.imshow(rate_map2.T, origin="lower")
    
    autocorrelation = calculate_autocorrelation(rate_map1)
    autocorrelation_maxima = find_maxima(autocorrelation)
    radius = place_field_radius(autocorrelation, autocorrelation_maxima)
    rate_map_maxima = find_maxima(rate_map1)
    rate_map_maxima_filtered = too_close_removed(rate_map1, rate_map_maxima, radius)
    #rate_map_maxima_filtered = too_small_removed(rate_map1, rate_map_maxima_filtered)
    
    maxima_count = len(rate_map_maxima)
    filtered_maxima_count = len(rate_map_maxima_filtered)
    
    print("Radius is {}".format(radius))
    
    print("Filtered {} of {} maxima".format(
        len(rate_map_maxima) - len(rate_map_maxima_filtered),
        len(rate_map_maxima_filtered))
    )
    
    rate_map_maxima_value1 = rate_map1[tuple(rate_map_maxima_filtered.T)]
    rate_map_maxima_value2 = rate_map2[tuple(rate_map_maxima_filtered.T)]

    reg = np.polyfit(rate_map_maxima_value1, rate_map_maxima_value2, 1)
    p = np.poly1d(reg)
    
    x = np.linspace(0, rate_map_maxima_value1.max(), 10)
    
    plt.figure()
    plt.scatter(rate_map_maxima_value1, rate_map_maxima_value2)
    plt.plot(x, p(x))
    plt.show()
    
    return p[1]
    
r_two_sessions(first_row, "session_1", "session_2")

In [None]:
table['r_session_1_vs_session_2'] = table.apply(lambda row: r_two_sessions(row, 'session_1', "session_2"), axis=1)

# Are the r-values different?

In [None]:
control = table[table['Control'] == 1]
chabc = table[table['Control'] == 0]

print("r_session_1_vs_session_2 control mean", control['r_session_1_vs_session_2'].mean())
print("r_session_1_vs_session_2 chabc mean", chabc['r_session_1_vs_session_2'].mean())

print(ttest_ind(control['r_session_1_vs_session_2'], chabc['r_session_1_vs_session_2']))

# Store results form table as CSV

In [None]:

# TODO make this an expipe function
output_path = pathlib.Path(project_path) / "actions" / "field_variability" / "data"
output_path.mkdir(parents=True, exist_ok=True)

field_variability.data["results"] = "results.csv"
table.to_csv(output_path / "results.csv")

# Store this notebook to action

In [None]:
field_variability.data["notebook"] = "field_variability.ipynb"
shutil.copy("field_variability.ipynb", output_path / "field_variability.ipynb")

In [None]:
# As HTML
os.system('jupyter nbconvert --to html field_variability.ipynb')
field_variability.data["html"] = "field_variability.html"
shutil.copy("field_variability.html", output_path / "field_variability.html")