# prototype code to analyze sparse labeled someatodendritic data

* 11/2/2024 setup. Works for 1 cell. 

In [None]:
from os.path import join
import json

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from scipy.stats import zscore
import statsmodels.api as sm

from scipy.interpolate import interp1d

import pandas as pd
import seaborn as sns

from src.classes.suite2p_class import Suite2p as s2p

plt.rcParams["text.usetex"] = False
plt.rcParams["svg.fonttype"] = "none"


In [None]:
sima_folders = [
    "/data2/gergely/invivo_DATA/sleep/dock13b3/8_4/TSeries-08042024-1156-001/TSeries-08042024-1156-001.sima/",
]
# which cell_num to plot
cell_num = 5

In [None]:
spks = {}
planes = {0: "plane1_d", 1: "plane2_d", 2: "soma"}
for folder in sima_folders:
    for plane, plane_name in planes.items():
        s2p_data = s2p(join(folder, "suite2p"))
        spikes = s2p_data.get_cells(plane=plane)
        # z scoring
        zscored_spikes = zscore(spikes, axis=1)

        # Add the z-scored data to the dictionary
        if plane_name not in spks:
            spks[plane_name] = []  # Initialize a list for each plane
        spks[plane_name].append(zscored_spikes)

mob_immobs = []
for folder in sima_folders:
    data = join(folder, "behavior", "mobility_immobility.json")
    with open(data, "r") as f:
        mob_immobs.append(np.array(json.load(f)))

spks["plane1_d"][0].shape, mob_immobs[0].shape


In [None]:
data_dict = {}
for key, list_of_arrays in spks.items():
    # Convert the list of arrays to a 2D array by stacking along axis 0
    concatenated_array = np.vstack(list_of_arrays)
    for i in range(concatenated_array.shape[0]):
        data_dict[f"{key}_{i}"] = concatenated_array[i]

# Create a DataFrame
data_df = pd.DataFrame(data_dict)
data_df["mobility"] = np.hstack(mob_immobs)

data_df


In [None]:
spks_df = data_df.copy()
# Split the DataFrame based on mobility status
immobile_spks_df = spks_df[spks_df["mobility"] == False].drop(columns=["mobility"])
mobile_spks_df = spks_df[spks_df["mobility"] == True].drop(columns=["mobility"])

# Calculate Pearson cross-correlation matrices for both conditions
immobile_corr = immobile_spks_df.corr()
mobile_corr = mobile_spks_df.corr()

# Extract columns containing "plane" for both correlation matrices
plane_columns = [col for col in immobile_corr.columns if "plane" in col]

# Extract the correlations for these columns from both matrices
immobile_plane_corr = immobile_corr.loc[plane_columns, plane_columns]
mobile_plane_corr = mobile_corr.loc[plane_columns, plane_columns]

# Flatten the correlation matrices and prepare data for scatter plot
immobile_corr_values = immobile_plane_corr.values.flatten()
mobile_corr_values = mobile_plane_corr.values.flatten()

# Create a DataFrame for plotting
scatter_data = pd.DataFrame(
    {
        "Immobile Correlation": immobile_corr_values,
        "Mobile Correlation": mobile_corr_values,
    }
)

# Create the scatter plot
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=scatter_data, x="Immobile Correlation", y="Mobile Correlation", color="blue"
)
plt.title("Scatter Plot of Pearson Correlations (Mobile vs. Immobile)")
plt.xlabel("Immobile Correlation")
plt.ylabel("Mobile Correlation")
plt.axline((0, 0), slope=1, linestyle="--", color="red", label="y = x")

# Calculate and plot the mean of the correlations
mean_immobile_corr = immobile_corr_values.mean()
mean_mobile_corr = mobile_corr_values.mean()
plt.scatter(
    mean_immobile_corr,
    mean_mobile_corr,
    color="green",
    s=200,
    edgecolor="gray",
    linewidth=2,
    label="Mean Correlation",
)

plt.legend()
plt.tight_layout()
plt.savefig(join(sima_folders[0], f"xcorr_scatter_mob_immob_{cell_num}.png"), dpi=300)
plt.savefig(
    join(sima_folders[0], f"xcorr_scatter_mob_immob_{cell_num}.svg"),
    format="svg",
    dpi=300,
)
plt.show()


In [None]:
scatter_data["sima_folder"] = sima_folders[0]
scatter_data["cell_num"] = cell_num
scatter_data.to_csv(join(sima_folders[0], f"xcorr_scatter_mob_immob_{cell_num}.csv"))

In [None]:
spks_df = data_df.copy()
# Assuming spks_df is your DataFrame

# Split the DataFrame based on the 'mobility' column
mobility_true = spks_df[spks_df["mobility"] == True]
mobility_false = spks_df[spks_df["mobility"] == False]

# Calculate the standard deviation for each column, excluding 'mobility'
std_true = mobility_true.std()
std_false = mobility_false.std()

# Filter out the 'mobility' column itself from the standard deviations
std_true = std_true.drop(labels="mobility")
std_false = std_false.drop(labels="mobility")

# Combine the results into a DataFrame for plotting
std_df = pd.DataFrame({"Mobility = True": std_true, "Mobility = False": std_false})

# Plot the standard deviations for each column
std_df.plot(kind="bar", figsize=(14, 6), colormap="viridis")
plt.ylabel("Standard Deviation")
plt.title("Comparison of Standard Deviation (Split by Mobility)")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.savefig(
    join(sima_folders[0], f"zscored_traces_standard_dev_mob_immob_{cell_num}.png"),
    dpi=300,
)
plt.savefig(
    join(sima_folders[0], f"zscored_traces_standard_dev_mob_immob_{cell_num}.svg"),
    format="svg",
    dpi=300,
)
plt.show()


In [None]:
spks_df = data_df.copy()
# Split the DataFrame based on mobility status
immobile_spks_df = spks_df[spks_df["mobility"] == False].drop(columns=["mobility"])
mobile_spks_df = spks_df[spks_df["mobility"] == True].drop(columns=["mobility"])

# Calculate Pearson cross-correlation matrices for both conditions
immobile_corr = immobile_spks_df.corr()
mobile_corr = mobile_spks_df.corr()

# Set up a figure with two subplots
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Set common color range for both heatmaps
vmin = min(immobile_corr.min().min(), mobile_corr.min().min())
vmax = max(immobile_corr.max().max(), mobile_corr.max().max())

# Plot the heatmaps
sns.heatmap(immobile_corr, annot=True, cmap="Greens", vmin=vmin, vmax=vmax, ax=axes[0])
axes[0].set_title("Cross-Correlation (Immobile)")

sns.heatmap(mobile_corr, annot=True, cmap="Greens", vmin=vmin, vmax=vmax, ax=axes[1])
axes[1].set_title("Cross-Correlation (Mobile)")

plt.tight_layout()
plt.savefig(join(sima_folders[0], f"correlation_matrix{cell_num}.png"), dpi=300)
plt.savefig(
    join(sima_folders[0], f"correlation_matrix{cell_num}.svg"), format="svg", dpi=300
)
plt.show()


In [None]:
spks_df = data_df.copy()
interval_lenght = 50

# Filter columns that contain 'plane' and define the 'soma_0' column
plane_columns = [col for col in spks_df.columns if "plane" in col]
soma_column = "soma_0"

# Initialize lists to store mean correlation values
mean_corrs_true = []
mean_corrs_false = []

# Split the data based on the "mobility" column
mobility_true = spks_df[spks_df["mobility"] == True]
mobility_false = spks_df[spks_df["mobility"] == False]

# Iterate over all columns containing 'plane' and calculate correlation with 'soma_0'
for plane_col in plane_columns:
    # Calculate Pearson correlation for mobility = True
    if len(mobility_true) > interval_lenght:
        corr_true, _ = stats.pearsonr(
            mobility_true[plane_col], mobility_true[soma_column]
        )
        mean_corrs_true.append(corr_true)
    else:
        mean_corrs_true.append(np.nan)

    # Calculate Pearson correlation for mobility = False
    if len(mobility_false) > interval_lenght:
        corr_false, _ = stats.pearsonr(
            mobility_false[plane_col], mobility_false[soma_column]
        )
        mean_corrs_false.append(corr_false)
    else:
        mean_corrs_false.append(np.nan)

# Prepare data for plotting
labels = plane_columns
x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

# Set up the figure for plotting
fig, ax = plt.subplots(figsize=(12, 6))

# Plot the bar charts for mean correlations
bars1 = ax.bar(
    x - width / 2, mean_corrs_false, width, label="Mobility = False", color="blue"
)
bars2 = ax.bar(
    x + width / 2, mean_corrs_true, width, label="Mobility = True", color="red"
)

# Add some text for labels, title and custom x-axis tick labels
ax.set_xlabel("Plane Columns")
ax.set_ylabel("Mean Pearson Correlation with Soma_0")
ax.set_title(
    "Mean Pearson Correlation of Plane Columns with Soma_0 (Split by Mobility)"
)
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45, ha="right")
ax.set_ylim(-1, 1)  # Set y-axis range for correlation values
ax.legend()

plt.tight_layout()
plt.savefig(
    join(sima_folders[0], f"soma_dendrite_corr_mob_immob_{cell_num}.png"), dpi=300
)
plt.savefig(
    join(sima_folders[0], f"soma_dendrite_corr_mob_immob_{cell_num}.svg"),
    format="svg",
    dpi=300,
)
plt.show()


In [None]:
import scipy.stats as stats
import numpy as np

interval_length = 50

# Filter columns that contain 'plane' and define the 'soma_0' column
plane_columns = [col for col in spks_df.columns if "plane" in col]
soma_column = "soma_0"

# Initialize lists to store mean correlation values
mean_corrs_true = []
mean_corrs_false = []

# Split the data based on the "mobility" column
mobility_true = spks_df[spks_df["mobility"] == True]
mobility_false = spks_df[spks_df["mobility"] == False]

# Calculate the average value of all 'plane' columns
spks_df["plane_mean"] = spks_df[plane_columns].mean(axis=1)

# Iterate over all columns containing 'plane' and calculate correlation with 'soma_0'
for plane_col in plane_columns:
    # Calculate Pearson correlation for mobility = True
    if len(mobility_true) > interval_length:
        corr_true, _ = stats.pearsonr(
            mobility_true[plane_col], mobility_true[soma_column]
        )
        mean_corrs_true.append(corr_true)
    else:
        mean_corrs_true.append(np.nan)

    # Calculate Pearson correlation for mobility = False
    if len(mobility_false) > interval_length:
        corr_false, _ = stats.pearsonr(
            mobility_false[plane_col], mobility_false[soma_column]
        )
        mean_corrs_false.append(corr_false)
    else:
        mean_corrs_false.append(np.nan)

# Prepare data for plotting
labels = plane_columns
x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

# Set up the figure for plotting
fig, ax = plt.subplots(figsize=(12, 6))

# Plot the bar charts for mean correlations
bars1 = ax.bar(
    x - width / 2, mean_corrs_false, width, label="Mobility = False", color="blue"
)
bars2 = ax.bar(
    x + width / 2, mean_corrs_true, width, label="Mobility = True", color="red"
)

# Add some text for labels, title and custom x-axis tick labels
ax.set_xlabel("Plane Columns")
ax.set_ylabel("Mean Pearson Correlation with Soma_0")
ax.set_title(
    "Mean Pearson Correlation of Plane Columns with Soma_0 (Split by Mobility)"
)
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45, ha="right")
ax.set_ylim(-1, 1)  # Set y-axis range for correlation values
ax.legend()

plt.tight_layout()
plt.savefig(
    join(sima_folders[0], f"soma_dendrite_corr_mob_immob_with_mean_{cell_num}.png"),
    dpi=300,
)
plt.savefig(
    join(sima_folders[0], f"soma_dendrite_corr_mob_immob_with_mean{cell_num}.svg"),
    format="svg",
    dpi=300,
)
plt.show()


In [None]:
spks_df["sima_folder"] = sima_folders[0]
spks_df["cell_num"] = cell_num
spks_df.to_csv(join(sima_folders[0], f"spks_df_cell{cell_num}.csv"))
spks_df

In [None]:
spks_df = data_df.copy()
# Calculate the sum of columns where the label starts with "dendrite"
spks_df["dendrite_sum"] = spks_df.filter(like="plane").sum(axis=1)

# Split the DataFrame based on the "mobility" column
immobile_data = spks_df[spks_df["mobility"] == False]
mobile_data = spks_df[spks_df["mobility"] == True]

# Set up the figure and axes
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Define the colors for the plots
colors = {"immobile": "blue", "mobile": "green"}

# Calculate the regression lines and plot scatter with regression
for ax, (data, label, color) in zip(
    axes,
    [
        (immobile_data, "Immobile", colors["immobile"]),
        (mobile_data, "Mobile", colors["mobile"]),
    ],
):
    # Scatter plot with regression line
    sns.regplot(x="soma_0", y="dendrite_sum", data=data, ax=ax, color=color, ci=None)
    ax.set_title(f"{label}: Dendrite Sum vs Soma Cell 0")
    ax.set_xlabel("soma_0")
    ax.set_ylabel("dendrite_sum")

    # Calculate and annotate the regression slope
    X = sm.add_constant(data["soma_0"])  # Add constant for intercept
    model = sm.OLS(data["dendrite_sum"], X).fit()  # Fit linear model
    slope = model.params["soma_0"]
    ax.annotate(
        f"Slope: {slope:.3f}",
        xy=(0.05, 0.9),
        xycoords="axes fraction",
        fontsize=12,
        color=color,
    )

# Set the y-axis limits for both subplots to be the same
y_min = min(immobile_data["dendrite_sum"].min(), mobile_data["dendrite_sum"].min())
y_max = max(immobile_data["dendrite_sum"].max(), mobile_data["dendrite_sum"].max())
for ax in axes:
    ax.set_ylim(y_min, y_max)

plt.tight_layout()
plt.savefig(
    join(sima_folders[0], f"soma_sum_dendrite_scatter_mob_immob_{cell_num}.png"),
    dpi=300,
)
plt.savefig(
    join(sima_folders[0], f"soma_sum_dendrite_scatter_mob_immob_{cell_num}.svg"),
    format="svg",
    dpi=300,
)
plt.show()


In [None]:
spks_df = data_df.copy()
# Downsample the data by a factor of 10
downsampled_df = spks_df.iloc[::10, :]

# Set up a figure with multiple rows and one column
num_cells = downsampled_df.shape[1]  # Include the 'mobility' column
fig, axes = plt.subplots(num_cells, 1, figsize=(12, num_cells * 2), sharex=True)

# Plot each cell's activity with color-coded mobility states
for idx, cell in enumerate(
    downsampled_df.columns
):  # Iterate through all columns including 'mobility'
    ax = axes[idx]

    # If it's the mobility column, plot it differently
    if cell == "mobility":
        ax.plot(
            downsampled_df.index,
            downsampled_df[cell],
            color="black",
            label="Mobility",
            linewidth=1.5,
        )
    else:
        # Plot the entire trace with segments color-coded by mobility state
        ax.plot(
            downsampled_df.index,
            downsampled_df[cell],
            color="blue",
            label="Immobile (False)",
            linewidth=1.5,
        )
        ax.plot(
            downsampled_df.index,
            downsampled_df[cell].where(downsampled_df["mobility"]),
            color="red",
            label="Mobile (True)",
            linewidth=1.5,
        )

    # Set titles and labels
    ax.set_title(f"{cell} Activity" if cell != "mobility" else "Mobility State")
    if idx == num_cells - 1:
        ax.set_xlabel("Time")
    ax.set_ylabel("Activity Level" if cell != "mobility" else "Mobility")

    # Only add legend to the first subplot to reduce clutter
    if idx == 0:
        ax.legend(loc="upper right")

# Adjust layout to avoid overlap
plt.tight_layout()
plt.savefig(
    join(sima_folders[0], f"soma_dendrite_traces_mob_immob_{cell_num}.png"), dpi=300
)
plt.savefig(
    join(sima_folders[0], f"soma_dendrite_traces_mob_immob_{cell_num}.svg"),
    format="svg",
    dpi=300,
)
plt.show()


## the next cell compares the means of the compartments in the two groups. it does not make too much sense when the data is z-scored. hence the comparision of STD above.

In [None]:
# Split the DataFrame based on the 'mobility' column
mobility_true = spks_df[spks_df["mobility"] == True]
mobility_false = spks_df[spks_df["mobility"] == False]

# Calculate the mean for each column, excluding 'mobility'
mean_true = mobility_true.mean()
mean_false = mobility_false.mean()

# Filter out the 'mobility' column itself from the averages
mean_true = mean_true.drop(labels="mobility")
mean_false = mean_false.drop(labels="mobility")

# Combine the results into a DataFrame for plotting
mean_df = pd.DataFrame({"Mobility = True": mean_true, "Mobility = False": mean_false})

# Plot the means for each column
mean_df.plot(kind="bar", figsize=(14, 6), colormap="viridis")
plt.ylabel("Average Value")
plt.title("Comparison of Average Values (Split by Mobility)")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()
