# Dewan Lab Neuron Pseudopopulation Analysis
## Import Dependencies

In [None]:
import itertools
import os
os.environ['ISX'] = '0'

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats, signal
from sklearn import metrics

from dewan_calcium import classifiers, plotting
from dewan_calcium import stats as dewan_stats
from dewan_calcium.helpers import IO
from dewan_calcium.helpers.project_folder import ProjectFolder

pd.options.mode.copy_on_write = "warn"

print('Finished importing required libraries!')

## Load Data from Project Folder

In [None]:
# Create Project Folder to Gather and Hold all the File Paths
project_folder = ProjectFolder('ODOR', project_dir='/mnt/r2d2/2_Inscopix/1_DTT/5_Combined/ID/VGLUT_Comb', combined=True)

In [None]:
# If this is the first time the project folder has been created,
# move the files to the appropriate directories and then run this cell, otherwise skip this cell
project_folder.get_data()

## Configs


In [None]:
ANALYSIS_VARIABLE = 'odor' # Either 'odor', 'class' or 'block'

CELL_CLASS = 'vglut' # vglut or vgat

WINDOW = 2 # Window size for the moving-window SVM decoder; set to None for no window and to consider all data at once
NUM_SVM_SPLITS = 20  # Number of random test-train splits to run and average per SVM run

# Values used in the combine-data.py standalone script to define the sizes of the different data periods
BASELINE_FRAMES = 20
ODOR_FRAMES = 20
POST_FRAMES = 20

SHOW_FIGURES = False

## Constants

In [None]:
## ============================ CONSTANTS ============================ ##
VARS = ['odor', 'class', 'block']
CLASSES = ['vglut', 'vgat', 'oxtr']

CONC_ORDER = {
    '5ATE1': 'ATE', '5ATE10': 'ATE', '5ATE100' : 'ATE', '5ATE1000': 'ATE',
    '5AL1': 'AL', '5AL10' : 'AL', '5AL100' : 'AL', '5AL1000' : 'AL',
    '5AMINE1' : 'AMINE', '5AMINE10' : 'AMINE', '5AMINE100' : 'AMINE', '5AMINE1000' : 'AMINE',
    '5OL1' : 'OL', '5OL10' : 'OL', '5OL100' : 'OL', '5OL1000' : 'OL',
    '5ONE1' : 'ONE', '5ONE10' : 'ONE', '5ONE100' : 'ONE', '5ONE1000' : 'ONE'
}

SEPARATE_CONC_ORDER = {

    'AL': ['5AL1', '5AL10', '5AL100', '5AL1000'],
    'ATE': ['5ATE1', '5ATE10', '5ATE100', '5ATE1000'],
    'AMINE': ['5AMINE1', '5AMINE10', '5AMINE100', '5AMINE1000'],
    'OL': ['5OL1', '5OL10', '5OL100', '5OL1000'],
    'ONE': ['5ONE1', '5ONE10', '5ONE100', '5ONE1000']
}

ODOR_BASE = ['ATE', 'AL', 'AMINE', 'OL', 'ONE']

LEVEL_MAP = {
    'class': (1,2), # Level = 3 if we want to only look at class; drops odors and blocks
    'odor': (2,3), # Level = 2 if we want to only look at odors; drops block and class labels
    'block': (1,3) # level = 1 if we want to only look at blocks; drops odor and class labels
}

CM_WINDOWS = {
   'Baseline': (0,20),
   'Odor_Period': (20,40),
   'Latent_Period': (40,60),
   'Odor_and_Latent': (20,60)
}

## VALIDATE INPUTS

IO.verify_input('ANALYSIS_VARIABLE', ANALYSIS_VARIABLE, [str], allowed_values=VARS)
IO.verify_input('CELL_CLASS', CELL_CLASS, [str], allowed_values=CLASSES)
IO.verify_input('WINDOW', WINDOW, [int], allowed_range=(1, 20))
IO.verify_input('NUM_SVM_SPLITS', NUM_SVM_SPLITS, [int], allowed_range=(1, 100), inclusive=True)

## Select Labels

if ANALYSIS_VARIABLE == 'odor':
        _exp_type = 'CONC'
        _labels=list(CONC_ORDER.keys())
        _classes = CONC_ORDER
        _separate_labels = SEPARATE_CONC_ORDER
elif ANALYSIS_VARIABLE == 'class':
        _exp_type = 'Conc-Class'
        _labels=ODOR_BASE
else: # Blocks
    _exp_type = 'Conc-Blocks'
    _labels=[1, 2, 3]


data_file = []
sig_table = []

if project_folder.raw_data_dir.combined_data_path:
    if CELL_CLASS.lower() in str(project_folder.raw_data_dir.combined_data_path).lower():
        data_file = project_folder.raw_data_dir.combined_data_path[0]
else:
    raise FileExistsError(f'No data file with class {CELL_CLASS} exists!')

if project_folder.raw_data_dir.combined_sig_table_path:
    if CELL_CLASS.lower() in str(project_folder.raw_data_dir.combined_sig_table_path).lower():
        sig_table = project_folder.raw_data_dir.combined_sig_table_path[0]
else:
    raise FileExistsError(f'No significance table with class {CELL_CLASS} exists!')

## Load and Normalize Data

In [None]:
combined_data = pd.read_pickle(data_file, compression={'method': 'xz'})

In [None]:
odors = combined_data[combined_data.columns.get_level_values(0).unique()[0]].columns
def _calc_dff(trial_series):
    f0 = np.mean(trial_series.iloc[0:BASELINE_FRAMES])
    df = np.subtract(trial_series, f0)
    dff = np.divide(df, f0)
    return dff

def dff(cell_df):
    cell = cell_df.index.get_level_values(0).unique()
    new_index = pd.MultiIndex.from_product([cell, np.arange(len(odors))], names=['cell', 'odor'])
    cell_df.index = new_index
    return cell_df.T.apply(_calc_dff)

dff_combined = combined_data.T.groupby(level=0, group_keys=False).apply(dff).T

In [None]:
# # # Transform our dataframe to put the cell/odor/block as the index, group by level=2 (experiment block), apply stats.zscore to each group, transform back

cells = np.unique(combined_data.columns.get_level_values(0).values)

new_columns = []
odors = z_scored_combined_data.columns.get_level_values(1)
for i, orig_tuple in enumerate(z_scored_combined_data.columns.values):
    odor_class = _classes[odors[i]]
    new_columns.append(orig_tuple + tuple([odor_class]))
new_index = pd.MultiIndex.from_tuples(new_columns, names=['Cells', 'Odor', 'Class'])
z_scored_combined_data.columns = new_index
original_columns = z_scored_combined_data.columns

In [None]:
# Run this cell to reset the dataframe to its original configuration
# z_scored_combined_data.columns = original_columns

## SVM Classifier

### Sliding Window Decoding Directories

In [None]:
## Load/Create output directories

svm_output_dir = project_folder.analysis_dir.output_dir.subdir('SVM')
svm_fig_dir = project_folder.analysis_dir.figures_dir.subdir('SVM')

if WINDOW:
    svm_output_dir = svm_output_dir.joinpath(f'Window-{WINDOW}')
    svm_fig_dir = svm_fig_dir.joinpath(f'Window-{WINDOW}')

svm_output_dir = svm_output_dir.joinpath(ANALYSIS_VARIABLE)
svm_fig_dir = svm_fig_dir.joinpath(ANALYSIS_VARIABLE)
svm_output_dir.mkdir(parents=True, exist_ok=True)
svm_fig_dir.mkdir(parents=True, exist_ok=True)

cm_data_save_dir = svm_output_dir.joinpath('CM')
cm_figure_save_dir = svm_fig_dir.joinpath('CM')
cm_data_save_dir.mkdir(parents=True, exist_ok=True)
cm_figure_save_dir.mkdir(parents=True, exist_ok=True)

#### While similar to 'class' mode, this will determine the classifiers ability to descriminate between each version of an odor instead of CLASS A v. CLASS B v. CLASS C, etc.


In [None]:
num_cells = len(cells)

# new_columns = z_scored_combined_data.columns.droplevel(2) # Drop block labels
# z_scored_combined_data.columns = new_columns

grouped_by_odor = z_scored_combined_data.T.groupby('Class') # Group by class so each odor is separated

odor_segmented_svm_results = {odor: {} for odor in ODOR_BASE}
odor_segmented_shuffled_svm_results = {odor: {} for odor in ODOR_BASE}

for odor, odor_df in grouped_by_odor:
    odor_output_dir = svm_output_dir.joinpath(odor)
    odor_svm_fig_dir = svm_fig_dir.joinpath(odor)
    odor_CM_data_dir = cm_data_save_dir.joinpath(odor)
    odor_CM_fig_dir = cm_figure_save_dir.joinpath(odor)
    odor_output_dir.mkdir(parents=True, exist_ok=True)
    odor_svm_fig_dir.mkdir(parents=True, exist_ok=True)
    odor_CM_data_dir.mkdir(parents=True, exist_ok=True)
    odor_CM_fig_dir.mkdir(parents=True, exist_ok=True)

    odor_df = odor_df.T
    odor_df.columns = odor_df.columns.droplevel(2) # No longer need class names, so drop them
    _labels = _separate_labels[odor]

    mean_svm_scores, splits_v_repeat_df, all_confusion_mats, (true_labels, pred_labels) = classifiers.sliding_window_ensemble_decoding(odor_df, window_size=WINDOW, num_splits=NUM_SVM_SPLITS, class_labels=_labels)
    mean_score_df = classifiers.postprocess(mean_svm_scores, num_cells, window=WINDOW)
    IO.save_SVM_output(odor_output_dir, mean_score_df, mean_svm_scores, splits_v_repeat_df, all_confusion_mats, true_labels, pred_labels)

    shuffled_data = classifiers.shuffle_data(odor_df)
    shuffled_mean_svm_scores, shuffled_splits_v_repeat_df, shuffled_all_confusion_mats, (shuffled_true_labels, shuffled_pred_labels) = classifiers.sliding_window_ensemble_decoding(shuffled_data, window_size=WINDOW, num_splits=NUM_SVM_SPLITS, class_labels=_labels)
    shuffled_mean_score_df = classifiers.postprocess(shuffled_mean_svm_scores, num_cells, window=WINDOW)
    IO.save_SVM_output(odor_output_dir, shuffled_mean_score_df, shuffled_mean_svm_scores, shuffled_splits_v_repeat_df, shuffled_all_confusion_mats, shuffled_true_labels, shuffled_pred_labels, True)

    # Preprocess SVM Data
    mean_performance, CI_min, CI_max = classifiers.preprocess_for_plotting(mean_svm_scores, splits_v_repeat_df)
    shuffle_mean_performance, shuffle_CI_min, shuffle_CI_max = classifiers.preprocess_for_plotting(shuffled_mean_svm_scores, shuffled_splits_v_repeat_df)
    CI = (CI_min, CI_max)
    shuffle_CI = (shuffle_CI_min, shuffle_CI_max)

    # Save SVM Performance Data
    _index = [(int(item[0]), int(item[1])) for item in list(mean_svm_scores.keys())]
    classifiers.save_svm_data(mean_performance, shuffle_mean_performance, _index, CI, shuffle_CI, odor_output_dir)

    odor_segmented_svm_results[odor] = {
        'mean_svm_scores': mean_svm_scores,
        'splits_v_repeat_df': splits_v_repeat_df,
        'all_confusion_mats': all_confusion_mats,
        'true_labels': true_labels,
        'pred_labels': pred_labels,
    }

    odor_segmented_shuffled_svm_results[odor] = {
        'mean_svm_scores': shuffled_mean_svm_scores,
        'splits_v_repeat_df': shuffled_splits_v_repeat_df,
        'all_confusion_mats': shuffled_all_confusion_mats,
        'true_labels': shuffled_true_labels,
        'pred_labels': shuffled_pred_labels,
    }


    # Plot SVM Performance
    descriptors = (_exp_type, CELL_CLASS, ANALYSIS_VARIABLE, len(cells))
    svm_fig = plotting.plot_svm_performance(mean_performance, shuffle_mean_performance, CI, shuffle_CI, descriptors, odor_svm_fig_dir)

    if SHOW_FIGURES:
        plt.show()
    else:
        plt.close()

    windows = list(all_confusion_mats.keys())
    window_averaged_cms = classifiers.average_CM(all_confusion_mats, windows)

    # Plot and Save average confusion matrices per window
    for window_name in CM_WINDOWS:
        cm_window = CM_WINDOWS[window_name]

        _fig = classifiers.save_and_plot_CM(window_averaged_cms, cm_window, window_name, windows, _labels, odor_CM_data_dir, odor_CM_fig_dir)

        if not SHOW_FIGURES:
            plt.close()

In [None]:
svm_means = []
shuffled_svm_means = []
all_vals = []
shuffled_vals = []

avg_svm_output_dir = svm_output_dir.joinpath('separated')
avg_svm_fig_dir = svm_fig_dir.joinpath('separated')
avg_svm_output_dir.mkdir(parents=True, exist_ok=True)
avg_svm_fig_dir.mkdir(parents=True, exist_ok=True)

for odor in odor_segmented_svm_results:
    mean_scores = odor_segmented_svm_results[odor]['mean_svm_scores']
    shuffled_mean_scores = odor_segmented_shuffled_svm_results[odor]['mean_svm_scores']
    means = [mean_scores[key] for key in mean_svm_scores]
    shuffled_means = [shuffled_mean_scores[key] for key in shuffled_mean_scores]
    svm_means.append(means)
    shuffled_svm_means.append(shuffled_means)

    splits_v_repeat_df = odor_segmented_svm_results[odor]['splits_v_repeat_df']
    shuffled_splits_v_repeat_df = odor_segmented_shuffled_svm_results[odor]['splits_v_repeat_df']
    all_vals.append(splits_v_repeat_df)
    shuffled_vals.append(shuffled_splits_v_repeat_df)


average_svm_performance = np.mean(svm_means, axis=0)
average_shuffled_svm_performance = np.mean(shuffled_svm_means, axis=0)
average_svm_splits = pd.DataFrame(np.mean(all_vals, axis=0))
average_shuffled_splits = pd.DataFrame(np.mean(shuffled_vals, axis=0))

average_svm_performance_dict = {}
average_shuffled_svm_performance_dict = {}
for i in range(len(average_svm_performance)):
    average_svm_performance_dict[i] = average_svm_performance[i]
    average_shuffled_svm_performance_dict[i] = average_shuffled_svm_performance[i]

mean_performance, CI_min, CI_max = classifiers.preprocess_for_plotting(average_svm_performance_dict, average_svm_splits)
shuffle_mean_performance, shuffle_CI_min, shuffle_CI_max = classifiers.preprocess_for_plotting(average_shuffled_svm_performance_dict, average_shuffled_splits)

CI = (CI_min, CI_max)
shuffle_CI = (shuffle_CI_min, shuffle_CI_max)

# Save SVM Performance Data
_index = [(int(item[0]), int(item[1])) for item in list(mean_svm_scores.keys())]
classifiers.save_svm_data(mean_performance, shuffle_mean_performance, _index, CI, shuffle_CI, avg_svm_output_dir)

# Plot SVM Performance
descriptors = (_exp_type, CELL_CLASS, ANALYSIS_VARIABLE, len(cells))

svm_fig = plotting.plot_svm_performance(mean_performance, shuffle_mean_performance, CI, shuffle_CI, descriptors, avg_svm_fig_dir)

plt.show()

windows = list(all_confusion_mats.keys())
window_averaged_cms = classifiers.average_CM(all_confusion_mats, windows)

# Plot and Save average confusion matrices per window
for window_name in CM_WINDOWS:
    cm_window = CM_WINDOWS[window_name]

    _fig = classifiers.save_and_plot_CM(window_averaged_cms, cm_window, window_name, windows, _labels, odor_CM_data_dir, odor_CM_fig_dir)

    if not SHOW_FIGURES:
        plt.close()
