# Correlation analysis between dendritic and somatic signals. 

* 10/31/2024 setup

In [None]:
from os.path import join
import json

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import zscore
from scipy.interpolate import interp1d

import pandas as pd

from src.classes.suite2p_class import Suite2p as s2p

In [None]:
sima_folders = [
    "/data2/gergely/invivo_DATA/sleep/dock11c5/8_2/TSeries-08022024-1036-001/TSeries-08022024-1036-001.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11c5/8_2/TSeries-08022024-1036-002/TSeries-08022024-1036-002.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11b2/6_17/TSeries-06172024-0946-001/TSeries-06172024-0946-001.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11b2/6_17/TSeries-06172024-0946-003/TSeries-06172024-0946-003.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11b1/6_17/TSeries-06172024-0946-001/TSeries-06172024-0946-001.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11b1/7_31/TSeries-07312024-1030-002/TSeries-07312024-1030-002.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11b1/8_3/TSeries-08022024-1036-001/TSeries-08022024-1036-001.sima/",
    "/data2/gergely/invivo_DATA/sleep/dock11b1/8_3/TSeries-08022024-1036-002/TSeries-08022024-1036-002.sima/",
]


In [None]:
spks = {}
planes = {0: "dendrites", 1: "soma"}
for folder in sima_folders:
    for plane, plane_name in planes.items():
        s2p_data = s2p(join(folder, "suite2p"))
        spikes = s2p_data.get_spikes(plane=plane)
        # z scoring
        zscored_spikes = zscore(spikes, axis=1)

        # Add the z-scored data to the dictionary
        if plane_name not in spks:
            spks[plane_name] = []  # Initialize a list for each plane
        spks[plane_name].append(zscored_spikes)

mob_immobs = []
for folder in sima_folders:
    data = join(folder, "behavior", "mobility_immobility.json")
    with open(data, "r") as f:
        mob_immobs.append(np.array(json.load(f)))


In [None]:
def interval_length_calculator(
    data: pd.DataFrame, state_column: str, state_value: int
) -> pd.DataFrame:
    """
    Calculates the length, start, and stop indices of intervals for a
      specific state.

    Args:
        data (pd.DataFrame): The DataFrame containing interval data.
        state_column (str): Column name indicating the state for each interval.
        state_value (int): The value of the state to filter intervals by.

    Returns:
        pd.DataFrame: A DataFrame with columns for the interval number ('n'),
        the start index, the stop index, and the length of each interval of
        the specified state.
    """
    # Ensure data is not altered outside the function
    data = data.copy()

    # Convert the state column to ensure compatibility
    data[state_column] = data[state_column].astype(int)

    # Detect changes to and from the target state
    is_target_state = data[state_column] == state_value
    starts = is_target_state & (~is_target_state.shift(fill_value=False))
    stops = (~is_target_state) & is_target_state.shift(fill_value=False)

    # Prepare for data accumulation
    rows = []

    # Iterate over starts
    for start in data[starts].index:
        # Find the corresponding stop
        stop = data[stops & (data.index > start)].index.min()
        # If there's no corresponding stop, use the last index
        if not pd.isna(stop):
            stop -= 1
        else:
            stop = data.index[-1]

        length = stop - start + 1
        rows.append({"start": start, "stop": stop, "length": length})

    # Create DataFrame from accumulated rows
    result = pd.DataFrame(rows)

    # Add interval numbers
    result.insert(0, "n", range(1, len(result) + 1))

    return result


In [None]:
# Function to apply interval_length_calculator to each mobility array
def calculate_intervals_for_mobility_data(mob_immob_data, state_value=1):
    """
    Calculate intervals for the specified mobility state using interval_length_calculator.

    Parameters:
    - mob_immob_data: List of numpy arrays containing mobility data (1 for moving, 0 for immobile)
    - state_value: State to filter intervals by (1 for moving, 0 for immobile)

    Returns:
    - A list of DataFrames, each containing intervals of the specified state for a particular recording.
    """
    interval_results = []

    for idx, mobility_array in enumerate(mob_immob_data):
        # Convert numpy array to DataFrame for compatibility with interval_length_calculator
        mobility_df = pd.DataFrame({"state": mobility_array})

        # Calculate intervals using the provided function
        intervals_df = interval_length_calculator(
            mobility_df, state_column="state", state_value=state_value
        )

        # Store the results with an additional column indicating the recording
        intervals_df["recording"] = f"folder_{idx + 1}"
        interval_results.append(intervals_df)

    return interval_results


In [None]:
# Calculate moving and immobile intervals
moving_intervals = calculate_intervals_for_mobility_data(mob_immobs, state_value=1)
immobile_intervals = calculate_intervals_for_mobility_data(mob_immobs, state_value=0)

# Combine all interval results into one DataFrame for moving and one for immobile
all_moving_intervals_df = pd.concat(moving_intervals, ignore_index=True)
all_immobile_intervals_df = pd.concat(immobile_intervals, ignore_index=True)


In [None]:
all_moving_intervals_df


In [None]:
moving_immobile_spikes = {}

# Assuming `spks` is a dictionary with keys ('dendrites', 'soma') and values being lists of arrays
# Assuming `mob_immobs` is a list of mobility data arrays, one for each spike array in `spks`
for key, spk_list in spks.items():
    moving_spikes_list = []
    immobile_spikes_list = []

    for spk, mob_immob in zip(spk_list, mob_immobs):
        try:
            # Convert spk and mob_immob to numpy arrays if they are lists
            spk = np.array(spk) if isinstance(spk, list) else spk
            mob_immob = (
                np.array(mob_immob) if isinstance(mob_immob, list) else mob_immob
            )

            print(spk.shape, mob_immob.shape)

            # Check if shapes are mismatched
            if spk.shape[1] != mob_immob.shape[0]:
                print("shapes mismatched")

                # Create an interpolation function for mob_immob
                x = np.linspace(0, 1, mob_immob.shape[0])
                f = interp1d(x, mob_immob, kind="linear")

                # Create a new x array matching the spk shape and interpolate
                new_x = np.linspace(0, 1, spk.shape[1])
                mob_immob_interpolated = f(new_x)

                # Convert interpolated values to boolean if necessary
                mob_immob_interpolated = (
                    mob_immob_interpolated >= 0.5
                )  # Adjust this threshold as needed
            else:
                mob_immob_interpolated = mob_immob

            # Split spikes into moving and immobile based on mobility data
            moving_spikes = spk[:, mob_immob_interpolated == 1]
            immobile_spikes = spk[:, mob_immob_interpolated != 1]

            # Append the results to the respective lists
            moving_spikes_list.append(moving_spikes)
            immobile_spikes_list.append(immobile_spikes)

        except ValueError as e:  # Adjust exception type if needed
            print(f"Error processing spike and mobility data for key '{key}': {e}")
            # Handle or log the error appropriately

    # Store the results in the `moving_immobile_spikes` dictionary
    moving_immobile_spikes[key] = {
        "moving_spikes": moving_spikes_list,
        "immobile_spikes": immobile_spikes_list,
    }

# Now `moving_immobile_spikes` contains the keys from the input dictionary `spks`,
# with each key having a dictionary of both "moving_spikes" and "immobile_spikes" lists.


In [None]:
filtered_mob_immobs = [
    filter_long_intervals(mob_immob, min_length=100) for mob_immob in mob_immobs
]

In [None]:
plt.plot(mob_immobs[5])
plt.plot(filtered_mob_immobs[5])


In [None]:
# Initialize dictionaries to store correlation results
correlation_results = {"moving": [], "immobile": []}

# Loop over all the folders and calculate correlations
for moving_spikes_d, immobile_spikes_d, moving_spikes_s, immobile_spikes_s in zip(
    moving_immobile_spikes["dendrites"]["moving_spikes"],
    moving_immobile_spikes["dendrites"]["immobile_spikes"],
    moving_immobile_spikes["soma"]["moving_spikes"],
    moving_immobile_spikes["soma"]["immobile_spikes"],
):
    # Correlate for moving spikes
    if moving_spikes_d.shape[1] > 0 and moving_spikes_s.shape[1] > 0:
        # Calculate Pearson correlation across corresponding time points
        moving_correlation = np.corrcoef(moving_spikes_d, moving_spikes_s, rowvar=False)
        correlation_results["moving"].append(moving_correlation)

    # Correlate for immobile spikes
    if immobile_spikes_d.shape[1] > 0 and immobile_spikes_s.shape[1] > 0:
        # Calculate Pearson correlation across corresponding time points
        immobile_correlation = np.corrcoef(
            immobile_spikes_d, immobile_spikes_s, rowvar=False
        )
        correlation_results["immobile"].append(immobile_correlation)

# Now `correlation_results` contains the correlation matrices for moving and immobile periods.
# You can further process or visualize these results as needed.
