# Analyis notebook for sparsely labeled cells.

Based on `sparse_prototype.ipynb`. Protoyping was done with this dataset: `"/data2/gergely/invivo_DATA/sleep/dock13b1/8_8/TSeries-08082024-1650-001/TSeries-08082024-1650-001.sima/"`, cell# `1029`

Currently it works with fluorescence or spike data. Both are Z-scored. 

* setup 11/12/2024

In [None]:
import os
from os.path import join
import json

import matplotlib.pyplot as plt
import numpy as np
# import scipy.stats as stats
from scipy.stats import zscore
# import statsmodels.api as sm

import pandas as pd
# import seaborn as sns

from src.classes.suite2p_class import Suite2p as s2p
import sparse_helpers as sp

plt.rcParams["text.usetex"] = False
plt.rcParams["svg.fonttype"] = "none"

In [None]:
sima_folder = "/data2/gergely/invivo_DATA/sleep/dock13b3/8_4/TSeries-08042024-1156-001/TSeries-08042024-1156-001.sima/"

# which cell_num to plot, use the number from the suite2p gui
cell_num = int(900)

In [None]:
cell_folder = join(sima_folder, f"cell{cell_num}")
if not os.path.exists(cell_folder):
    os.makedirs(cell_folder)

## Data prep prerequisites:
* have fluorescence or spike data `suite2p` folder with segemented cells.
* have mobility data `mobility_immobility.json`
* have brain state data: `velo_eeg.csv`

In [None]:
data = {}
ids = {}
planes = {0: "top_plane_d", 1: "mid_plane_d", 2: "soma_plane_d"}

for plane, plane_name in planes.items():
    s2p_data = s2p(join(sima_folder, "suite2p"))
    signals = s2p_data.get_cells(plane=plane)
    roi_ids = s2p_data.get_iscell_indices(plane=plane)
    # z scoring
    zscored_signal = zscore(signals, axis=1)

    # Add the z-scored data to the dictionary
    if plane_name not in data:
        data[plane_name] = []  # Initialize a list for each plane
    data[plane_name].append(zscored_signal)
    ids[plane_name] = roi_ids

# Load the mobility data
mobility_data = []
mob_data = join(sima_folder, "behavior", "mobility_immobility.json")
with open(mob_data, "r") as f:
    mobility_data.append(np.array(json.load(f)))

# Load the brain states
brain_states = pd.read_csv(join(sima_folder, "eeg", "velo_eeg.csv"))

# check if the data is the same length
data["top_plane_d"][0].shape, mobility_data[0].shape, brain_states.shape


### Creating a dictionary for the roi id-s.

In [None]:
id_dict = {}
for key, list_of_arrays in ids.items():
    if len(list_of_arrays) == 0:
        continue
    concatenated_array = np.hstack(list_of_arrays)

    for i, roi_id in enumerate(concatenated_array):
        id_dict[f"{key}_{i}"] = int(roi_id)

### Selecting the soma ROI

In [None]:
soma_id = None
for key, value in id_dict.items():
    if value == cell_num:
        soma_id = key
        break
soma_id

### Adding brain state data

In [None]:
data_dict = {}
for key, list_of_arrays in data.items():
    # Convert the list of arrays to a 2D array by stacking along axis 0
    concatenated_array = np.vstack(list_of_arrays)
    for i in range(concatenated_array.shape[0]):
        data_dict[f"{key}_{i}"] = concatenated_array[i]

# Create a DataFrame
data_df = pd.DataFrame(data_dict)
data_df["mobility"] = np.hstack(mobility_data)
data_df["mobile"] = data_df["mobility"] == 1
data_df["immobile"] = data_df["mobility"] == 0
data_df = pd.concat([data_df, brain_states[["awake", "NREM", "REM", "other"]]], axis=1)

data_df.head()

### Renaming the column which contains the somatic data

In [None]:
data_df.rename(columns={soma_id: "soma"}, inplace=True)
data_df.head()

## Calculatons and plotting

In [None]:
sp.plot_soma_denrite_traces(data_df, cell_folder, cell_num, savefig=True)

In [None]:
xcorr_data = sp.data_calculation(
    data_df, "awake", "NREM", cell_folder, cell_num, "pearson", save_data=True
)
xcorr_data

In [None]:
sp.plot_xcorr_scatter(xcorr_data, "awake", "NREM", cell_folder, cell_num, savefig=True)

In [None]:
std_dev = sp.data_calculation(
    data_df, "awake", "NREM", cell_folder, cell_num, "stdev", save_data=True
)
std_dev

In [None]:
sp.plot_std_dev_bars(std_dev, "awake", "NREM", cell_folder, cell_num,
                      savefig=True)

In [None]:
sp.plot_correlation_heatmap(
    xcorr_data, "awake", "NREM", cell_folder, cell_num, savefig=True
)

In [None]:
soma_dendrite_corr = sp.calculate_mean_correlations_triangle(xcorr_data,
                                                             cell_folder,
                                                            cell_num,
                                                             "awake pearson",
                                                             "NREM pearson",
                                                            save_data=True)
soma_dendrite_corr

In [None]:
sp.plot_mean_correlations_line(soma_dendrite_corr,
                                cell_folder,
                                  cell_num,
                                  "awake pearson",
                                  "NREM pearson",
                                  savefig=True)