# Export of Data Around Drug Injection

- Export sets of two datasets, one for the acute and chronic experiments 
- Each dataset contains activity for each neuron 10 minutes prior to and 20 minutes following drug challenge
- One dataset contains a timeseries of neuronal activity sampled every second. This includes the raw firing rate as well as zscored firing rate (using the 10 minutes prior to drug admin to map zscores to other time periods) 
- The other dataset contains zscores binned into 3 10-minute bins


Data exported:
- **acute_challenge.csv**
- **acute_challenge_binned.csv**
- **chronic_challenge.csv**
- **chronic_challenge_binned.csv**

In [1]:
from ephys_queries import select_ifr
from ephys_queries import db_setup_core
import dotenv

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path 

In [3]:
from spiketimes.df import zscore_normalise_by_neuron
from spiketimes.df import bin_df

In [4]:
dotenv.load_dotenv()
engine, metadata = db_setup_core()

In [5]:
data_dir = Path(".").absolute().parent / "data"
df_clusters = pd.read_csv(data_dir / "baseline.csv")

## Acute Data

#### Select Raw Data

In [6]:
group_names = [
    "acute_citalopram", 
    "acute_saline", 
    "acute_cit", 
    "acute_sal"
]

block_name = "chal"
t_before = 600

ifr_acute = select_ifr(engine, metadata, 
                 block_name=block_name, 
                 group_names=group_names, 
                 align_to_block=True, 
                 t_before=t_before)

#### Zscore IFR to 10 mins Pre-Challenge

In [7]:
dfz_acute = zscore_normalise_by_neuron(ifr_acute, 
                                 timepoint_cutoff_max=0, 
                                 col_to_act_on="ifr", 
                                 timepoint_colname="timepoint_s")


#### Bin Data into 10-Minute Bins

In [8]:
bins = np.arange(-600, 1201, 599)

df_binned_acute = (
    bin_df(dfz_acute, colname="timepoint_s", bins=bins, bin_val_name="bin")
    .groupby(["neuron_id", "bin"])
    .apply(lambda x: np.nanmean(x["zscore"]))
    .unstack()
    .reset_index()
    .melt(id_vars="neuron_id", value_name="zscore")
    .sort_values(by=["neuron_id", "bin"])
    .reset_index(drop=True)
)
df_binned_acute.head()

  


Unnamed: 0,neuron_id,bin,zscore
0,1011,-600,0.002392
1,1011,-1,-0.218167
2,1011,598,-1.148725
3,1011,1197,-4.909319
4,1013,-600,-0.003044


#### Inspect for Systematic Missing Values

In [9]:
df_binned_wide_acute = df_binned_acute.pivot(
                    columns="bin", 
                    index="neuron_id", 
                    values="zscore"
                    ).reset_index()
df_binned_wide_acute.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 386 entries, 0 to 385
Data columns (total 5 columns):
neuron_id    386 non-null int64
-600         384 non-null float64
-1           384 non-null float64
598          384 non-null float64
1197         384 non-null float64
dtypes: float64(4), int64(1)
memory usage: 15.2 KB


#### Merge with Cluster Labels 

In [10]:
df_binned_acute = pd.merge(df_clusters[["neuron_id", "cluster", "group_name"]], df_binned_acute)
dfz_acute = pd.merge(df_clusters[["neuron_id", "cluster"]], dfz_acute)

#### Save Data

In [11]:
df_binned_acute.to_csv(data_dir / "acute_challenge_binned.csv", index=False)
dfz_acute.to_csv(data_dir / "acute_challenge_ts.csv", index=False)

## Chronic Data

#### Select Raw Data

In [12]:
group_names = ["citalopram_continuation", "chronic_saline", "citalopram_discontinuation", 
               "chronic_citalopram", "chronic_saline_"]

ifr_chronic = select_ifr(engine, metadata, 
                 block_name=block_name, 
                 group_names=group_names, 
                 align_to_block=True, 
                 t_before=t_before)

#### Zscore IFR to 10 mins Pre-Challenge

In [13]:
dfz_chronic = zscore_normalise_by_neuron(ifr_chronic, 
                                 timepoint_cutoff_max=0, 
                                 col_to_act_on="ifr", 
                                 timepoint_colname="timepoint_s")

#### Bin Data into 10-Minute Bins

In [14]:
df_binned_chronic = (
    bin_df(dfz_chronic, colname="timepoint_s", bins=bins, bin_val_name="bin")
    .groupby(["neuron_id", "bin"])
    .apply(lambda x: np.nanmean(x["zscore"]))
    .unstack()
    .reset_index()
    .melt(id_vars="neuron_id", value_name="zscore")
    .sort_values(by=["neuron_id", "bin"])
    .reset_index(drop=True)
)
df_binned_chronic.head()

  after removing the cwd from sys.path.


Unnamed: 0,neuron_id,bin,zscore
0,1069,-600,-0.009013
1,1069,-1,14.218447
2,1069,598,-0.502549
3,1069,1197,26.965464
4,1070,-600,-0.001864


#### Inspect for Systematic Missing Values

In [15]:
df_binned_wide_chronic = df_binned_chronic.pivot(
                    columns="bin", 
                    index="neuron_id", 
                    values="zscore"
                    ).reset_index()
df_binned_wide_chronic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660 entries, 0 to 659
Data columns (total 5 columns):
neuron_id    660 non-null int64
-600         656 non-null float64
-1           658 non-null float64
598          658 non-null float64
1197         658 non-null float64
dtypes: float64(4), int64(1)
memory usage: 25.9 KB


#### Merge with Cluster Labels 

In [16]:
df_binned_chronic = pd.merge(df_clusters[["neuron_id", "cluster", "group_name"]], df_binned_chronic)
dfz_chronic = pd.merge(df_clusters[["neuron_id", "cluster"]], dfz_chronic)

#### Save Data

In [17]:
df_binned_chronic.to_csv(data_dir / "chronic_challenge_binned.csv", index=False)
dfz_chronic.to_csv(data_dir / "chronic_challenge_ts.csv", index=False)