This script takes an `xarray` Dataset that was generated from analysis of Calium data, and writes it to disk in such a way that it can be read and post-processed by `R` scripts which test the statistical significance of the different activity of two groups or more, with knowledge of the mouse ID that generated this data. These functions currently don't exist in Python, which is why it had to be done in R.

The R scripts as well as this one are currently optimized for Amit's FMR-WT data, but that should be easily changed.

In [None]:
import pathlib
import itertools
import pickle

import pandas as pd
import numpy as np
import xarray as xr
import seaborn as sns
import matplotlib.pyplot as plt

from calcium_bflow_analysis.single_fov_analysis import filter_da
from calcium_bflow_analysis.dff_analysis_and_plotting import dff_analysis
%load_ext autoreload
%autoreload 2

In [20]:
foldername = pathlib.Path('/data/Amit_QNAP/Calcium_FXS/')
fname_glob = '*.nc'
full_fnames = next(foldername.glob(fname))
full_fnames

PosixPath('/data/Amit_QNAP/Calcium_FXS/data_of_day_-1.nc')

In [19]:
data = xr.open_dataset(full_fname)
data

In [201]:
fxs_wt = {'FXS': {'spont': {}, 'stim': {}, 'all': {}}, 'WT': {'spont': {}, 'stim': {}, 'all': {}}}
epochs = ('spont', 'stim', 'all')

for mouse_id, ds in data.groupby('mouse_id'):
    for epoch in epochs:
        dff = filter_da(ds, epoch)
        condition = str(ds.condition[0].values)
        mean_dff = dff_analysis.calc_mean_dff(dff)
        mean_spike_rate = dff_analysis.calc_mean_spike_num(dff, fps=ds.attrs['fps'], thresh=0.65)        
        fxs_wt[condition][epoch][mouse_id] = {'mean_dff': mean_dff, 'mean_spike_rate': mean_spike_rate}


In [175]:
fname = pathlib.Path('/data/Amit_QNAP/Calcium_FXS/fxs_wt.npz')

In [202]:
fxs_wt

{'FXS': {'spont': {'609': {'mean_dff': array([1.1984913 , 1.27814072, 2.35301033, 1.72591068, 1.42942517,
           1.63253425, 1.88135976, 1.14319508, 1.42789927, 1.45005213,
           1.25828045, 1.88869637, 1.4163228 , 1.47051668, 1.68349042,
           1.6345608 , 1.62031123, 2.24337438, 1.6122621 , 1.15474793,
           0.55756302, 0.94166517, 1.24411082, 3.38995815, 1.9030005 ,
           2.37197476, 1.39155752, 2.00735523, 1.28306155, 0.79262038,
           0.7843376 , 0.8197387 , 0.78912537, 0.83194438, 1.12152467,
           0.93664673, 0.56781512, 0.8078844 , 0.71269489, 0.79474506,
           0.72242878, 0.70052377, 0.90677545, 0.71624239, 0.74587396,
           0.58790062, 0.84314036, 0.84684278, 1.05191455, 1.05450189,
           0.83557566, 0.77336061, 0.63553169, 0.78055313]),
    'mean_spike_rate': array([0.00211111, 0.00022222, 0.00088889, 0.00011111, 0.00011111,
           0.00011111, 0.00011111, 0.00011111, 0.00011111, 0.00011111,
           0.00011111, 0.00011111

In [209]:
with open(fname.with_suffix('.p'), 'wb') as f:
    pickle.dump(fxs_wt, f)

In [265]:
df_list = []
for geno, genodata in fxs_wt.items():
    for epoch, epochdata in genodata.items():
        for mid, midata in epochdata.items():
            for measure, measurement in midata.items():
                df_list.append(pd.DataFrame({'Epoch': epoch, 'Genotype': geno, 'MouseID': mid, 'Measure': measure, 'Value': measurement}))
                
df = pd.concat(df_list, ignore_index=True)

In [250]:
fxs_wt['WT']

{'spont': {'615': {'mean_dff': array([0.86652432, 1.04746501, 0.65891063, 0.75204882, 0.63740786,
          0.54145751, 0.53046212, 0.76553018, 0.68381389, 0.68396236,
          0.731301  , 0.69493505, 0.77479107, 0.68954809, 0.80602615,
          0.81781175, 0.79308861, 0.7324556 , 0.92064897, 0.82828401,
          0.77740947, 0.62790808, 0.79771544, 0.73353635, 0.58647289,
          0.36444643, 0.61682842, 0.4453686 , 0.52496925, 0.50119385,
          0.50399313, 0.38458234, 0.78378521, 0.5845329 , 0.60334416,
          0.56846633, 0.50157263, 0.61165265, 0.56713114, 0.57348103,
          0.49243984, 0.56992646, 0.53514223, 0.59630779, 0.66820845,
          0.69612597, 0.5889852 , 0.53966793, 0.5651649 , 0.58664563,
          0.51976493, 0.76213265, 0.59286933, 0.70394421, 0.40481861,
          0.4772209 , 0.4795844 , 0.56324443, 0.47365911, 0.357445  ,
          0.51440733, 0.60996927, 0.64220129, 0.48684115, 0.35983359,
          0.3299205 , 0.96814664, 1.01623291, 0.94199342, 1.26

In [267]:
measures = ('mean_dff', 'mean_spike_rate')

for epoch in epochs:
    for measure in measures:
        data = df.query(f'Epoch == "{epoch}" and Measure == "{measure}"')
        data.loc[:, ['Genotype', 'MouseID', 'Value']].to_csv(fname.with_name(f'epoch_{epoch}_measure_{measure}.csv'), index=False)

In [268]:
df

Unnamed: 0,Epoch,Genotype,MouseID,Measure,Value
0,spont,FXS,609,mean_dff,1.198491
1,spont,FXS,609,mean_dff,1.278141
2,spont,FXS,609,mean_dff,2.353010
3,spont,FXS,609,mean_dff,1.725911
4,spont,FXS,609,mean_dff,1.429425
...,...,...,...,...,...
3241,all,WT,674,mean_spike_rate,0.003000
3242,all,WT,674,mean_spike_rate,0.000444
3243,all,WT,674,mean_spike_rate,0.000222
3244,all,WT,674,mean_spike_rate,0.000111
