In [None]:
%load_ext autoreload
%autoreload 2

# Dewan Lab Neuron Pseudopopulation Analysis
## Import Dependencies

In [None]:
import os
os.environ['ISX'] = '0'

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

from dewan_calcium.helpers.project_folder import ProjectFolder
from dewan_calcium import classifiers
pd.options.mode.copy_on_write = "warn"

print('Finished importing required libraries!')

## Load Data from Project Folder

In [None]:
# Create Project Folder to Gather and Hold all the File Paths
project_folder = ProjectFolder('ODOR', project_dir=r'/mnt/r/2_Inscopix/1_DTT/5_Combined/ID/VGLUT', combined=True)

In [None]:
# If this is the first time the project folder has been created,
# move the files to the appropriate directories and then run this cell, otherwise skip this cell
project_folder.get_data()

## Check that the data file exists

In [None]:
cell_class = 'vglut'
data_file = []
sig_table = []
if project_folder.raw_data_dir.combined_data_path:
    if cell_class.lower() in str(project_folder.raw_data_dir.combined_data_path).lower():
        data_file = project_folder.raw_data_dir.combined_data_path[0]
else:
    raise FileExistsError(f'No data file with class {cell_class} exists!')

if project_folder.raw_data_dir.combined_sig_table_path:
    if cell_class.lower() in str(project_folder.raw_data_dir.combined_sig_table_path).lower():
        sig_table = project_folder.raw_data_dir.combined_sig_table_path[0]
else:
    raise FileExistsError(f'No significance table with class {cell_class} exists!')

## Load and Z-Score Data

In [None]:
def z_score_data(df):
    cell_name = df.index.get_level_values(0).unique()
    df = df.loc[cell_name]
    return df.apply(stats.zscore)

In [None]:
combined_data = pd.read_pickle(data_file, compression={'method': 'xz'})
z_scored_combined_data = combined_data.T.groupby(level=0, group_keys=False).apply(z_score_data).T
# Transform our dataframe to put the cells/odors as the index, group by level=0 (cell names), apply stats.zscore to each group, transform back

cells = np.unique(combined_data.columns.get_level_values(0).values)

## SVM Classifier


### Sliding Window Decoding

In [None]:
WINDOW = 5
mean_svm_scores, splits_v_repeat_df, all_confusion_mats, (true_labels, pred_labels) = classifiers.sliding_window_ensemble_decoding(z_scored_combined_data, window_size=WINDOW, num_splits=1000)

output_dir = project_folder.analysis_dir.output_dir.path

mean_score_df = pd.DataFrame(mean_svm_scores, np.arange(len(mean_svm_scores)))
mean_score_df.insert(0, column='num_cells',value=len(cells))

if WINDOW:
    mean_score_df.insert(0, column='window_size', value=WINDOW)
    output_dir = output_dir.joinpath(f'Window-{WINDOW}')
    if not output_dir.exists():
        output_dir.mkdir()

mean_score_df.to_excel(output_dir.joinpath('mean_svm_scores.xlsx'))
mean_scores_path = output_dir.joinpath('mean_svm_scores.pickle')
pd.to_pickle(mean_svm_scores, mean_scores_path)
splits_path = output_dir.joinpath('splits_v_repeat_df.pickle')
pd.to_pickle(splits_v_repeat_df, splits_path)
all_confusion_mat_path = output_dir.joinpath('all_confusion_mat.pickle')
pd.to_pickle(all_confusion_mats, all_confusion_mat_path)
labels_path = output_dir.joinpath('labels.pickle')
pd.to_pickle((true_labels, pred_labels), labels_path)

### Shuffled Sliding Window Decoding

In [None]:
shuffled_data = classifiers.shuffle_data(z_scored_combined_data)

shuffled_mean_svm_scores, shuffled_splits_v_repeat_df, shuffled_all_confusion_mats, (shuffled_true_labels, shuffled_pred_labels) = classifiers.sliding_window_ensemble_decoding(shuffled_data, window_size=WINDOW, num_splits=1000)
output_dir = project_folder.analysis_dir.output_dir.subdir(cell_class)

shuffled_mean_score_df = pd.DataFrame(shuffled_mean_svm_scores, np.arange(len(shuffled_mean_svm_scores)))
shuffled_mean_score_df.insert(0, column='num_cells',value=len(cells))

if WINDOW:
    shuffled_mean_score_df.insert(0, column='window_size', value=WINDOW)
    output_dir = output_dir.joinpath(f'Window-{WINDOW}')
    if not output_dir.exists():
        output_dir.mkdir()
        
shuffled_mean_score_df.to_excel(output_dir.joinpath('shuffle_mean_svm_scores.xlsx'))
shuffled_mean_scores_path = output_dir.joinpath('shuffle_mean_svm_scores.pickle')
pd.to_pickle(shuffled_mean_svm_scores, shuffled_mean_scores_path)
shuffled_splits_path = output_dir.joinpath('shuffle_splits_v_repeat_df.pickle')
pd.to_pickle(shuffled_splits_v_repeat_df, shuffled_splits_path)
shuffled_all_confusion_mat_path = output_dir.joinpath('shuffle_all_confusion_mat.pickle')
pd.to_pickle(shuffled_all_confusion_mats, shuffled_all_confusion_mat_path)
shuffled_labels_path = output_dir.joinpath('shuffle_labels.pickle')
pd.to_pickle((shuffled_true_labels, shuffled_pred_labels), shuffled_labels_path)

### Plot SVM Performance

In [None]:
mean_performance = [mean_svm_scores[key] for key in mean_svm_scores]
shuffle_mean_performance = [shuffled_mean_svm_scores[key] for key in shuffled_mean_svm_scores]

fig, ax = plt.subplots()
x_vals = np.linspace(-2, 3.5, len(mean_performance), endpoint=True,) + 0.5
ax.plot(x_vals, mean_performance, color='#04BBC9', linewidth=3, marker='o')
ax.plot(x_vals, shuffle_mean_performance, color='#C500FF', linewidth=1)
plt.xticks(x_vals)
ax.vlines(x=0, ymin=-1, ymax=1, color='r')
ax.hlines(y=0.15, xmin=-4, xmax=10, color='#FFEC00')
ax.set_ylim([-0.05, 1])
ax.set_xlim([-1.6, 4.1])
plt.suptitle(f'vGLUT Conc SVM Classifier n={len(cells)}', fontsize=18, fontweight='bold')
ax.set_ylabel('Classifier Performance', fontsize=12)
ax.set_xlabel('Time Relative to Odor Onset (s)', fontsize=12)
plt.savefig(output_dir.joinpath('vGLUT_Classifier.pdf'), dpi=600)

## Population and Lifetime Sparseness


### Calculate Cell-Odor Means

In [None]:
# Pop Sparseness -> per odor
# Lifetime Sparseness -> per cell
BASELINE_FRAMES = 20
ODOR_FRAMES = 20
POST_FRAMES = 20
transposed_data = z_scored_combined_data.T

cell_medians = {}
cell_means = {}
nonzero_cell_medians = {}

cells = transposed_data.groupby('Cells')

for cell_name, cell_df in cells:
    medians = []
    means = []
    nonzero_medians = []
    odors = cell_df.groupby('Trials')

    for name, odor_data in odors:
        baseline_mean = odor_data.iloc[:, :BASELINE_FRAMES].mean(axis=1) # baseline means for each trial
        odor_trial_means = odor_data.iloc[:, BASELINE_FRAMES: BASELINE_FRAMES + ODOR_FRAMES].mean(axis=1) # odor evoked means for each trial
        diff = odor_trial_means.subtract(baseline_mean, axis=0)  # subtract the baseline from odor activity
        nonzero_medians.append(diff.median())

        diff.loc[diff<0] = 0
        means.append(diff.mean())
        medians.append(diff.median())

    cell_means[cell_name] = means
    cell_medians[cell_name] = medians
    nonzero_cell_medians[cell_name] = nonzero_medians

odors = transposed_data.index.get_level_values(1).unique()

cell_means = pd.DataFrame(cell_means, index=odors)
cell_medians = pd.DataFrame(cell_medians, index=odors)
nonzero_cell_medians = pd.DataFrame(nonzero_cell_medians, index=odors)

## Population Sparseness

In [None]:
from dewan_calcium.dewan_calcium.stats import sparseness

pop_sparseness_values = {}

num_odors = len(list(pop_sparseness_values.keys()))

for odor_name, odor_data in cell_medians.iterrows():
    num_cells = odor_data.shape[0]
    odor_data = odor_data.values
    sparseness_value = sparseness(num_cells, odor_data)
    pop_sparseness_values[odor_name] = sparseness_value

pop_sparseness_values = pd.DataFrame(pop_sparseness_values, index=[0])

## Lifetime Sparseness

In [None]:
lifetime_sparseness_values = {}

for cell_name, cell_data in cell_medians.items():
    num_odors = cell_data.shape[0]
    cell_data = cell_data.values
    sparseness_values = sparseness(num_odors, cell_data)
    lifetime_sparseness_values[cell_name] = sparseness_values

lifetime_sparseness_values = pd.DataFrame(lifetime_sparseness_values, index=[0])

## Write to File

In [None]:
sparseness_path = project_folder.analysis_dir.output_dir.path.joinpath('sparseness.xlsx')

with pd.ExcelWriter(sparseness_path, engine='xlsxwriter') as writer:
    lifetime_sparseness_values.to_excel(writer, sheet_name='Lifetime Sparseness', index=[0])
    pop_sparseness_values.to_excel(writer, sheet_name='Population Sparseness')
    cell_means.to_excel(writer, sheet_name='Cell Means')
    cell_medians.to_excel(writer, sheet_name='Cell Medians')

## Correlations

In [None]:
import itertools
odors = transposed_data.index.get_level_values(1).unique()
odors = np.sort(odors)
perms = list(itertools.permutations(odors, 2))

In [None]:
def shuffle_df(df):
    num_rows = df.shape[0]
    index = np.arange(num_rows)
    np.random.shuffle(index)
    return df.iloc[index]

In [None]:
from scipy.stats import pearsonr

odor_data = transposed_data.groupby('Trials')

for odor1, odor2 in perms:
    odor_1_data = odor_data.get_group(odor1)
    odor_2_data = odor_data.get_group(odor2)

    odor_1_index = odor_1_data.index.get_level_values(1)
    odor_1_data.index = odor_1_index
    odor_2_index = odor_2_data.index.get_level_values(1)
    odor_2_data.index = odor_2_index

    odor_1_data = shuffle_df(odor_1_data)
    odor_2_data = shuffle_df(odor_2_data)
    shapes = (odor_1_data.shape[0], odor_2_data.shape[0])
    smallest_shape = min(shapes)

    odor_1_data = odor_1_data[:smallest_shape]
    odor_2_data = odor_2_data[:smallest_shape]

    print(odor_1_data.shape, odor_2_data.shape)

    pearson_result = pearsonr(odor_1_data, odor_2_data)
    print(pearson_result.statistic, pearson_result.pvalue)

    break

In [None]:
nonzero_cell_medians

In [None]:
sorted_odors = np.sort(odors)

correlations = pd.DataFrame(dtype=float, index=sorted_odors, columns=sorted_odors)  # Explicitly set dtype to float


for odor1, odor2 in perms:
    odor1_means = cell_medians.loc[odor1]
    odor2_means = cell_medians.loc[odor2]

    pearson_result = pearsonr(odor1_means, odor2_means)

    correlations.loc[odor1, odor2] = pearson_result.statistic

correlations = correlations.fillna(1.0)

In [None]:
new_odors = ['-'.join([odor[0], odor[1:]]) for odor in sorted_odors]
split_odors = [odor.split('-') for odor in new_odors]
split_odors = np.array(split_odors)
sorted_odors = split_odors[np.argsort(split_odors[:, 1])]
sorted_odors = ['-'.join(odor) for odor in sorted_odors]

In [None]:
sorted_odors