# Summary plot for the sparse labeling. 
this notebook was created related to R01 applcation in 11/2024. it uses cells from one mouse `dock13b3` recording date was `8_4`. The notebook which generated the data for this is named: `sparse_prototype.ipynb`. The data is stored in the mouse's folder.

In [None]:
from os.path import join

import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
import seaborn as sns

plt.rcParams["text.usetex"] = False
plt.rcParams["svg.fonttype"] = "none"


In [None]:
sima_folder = "/data2/gergely/invivo_DATA/sleep/dock13b3/8_4/TSeries-08042024-1156-001/TSeries-08042024-1156-001.sima/"


In [None]:
# Create a list of filenames based on the pattern
filenames = [join(sima_folder, f"xcorr_scatter_mob_immob_{i}.csv") for i in range(1, 6)]

# Read the CSV files into a list of DataFrames and concatenate them into one DataFrame
dfs = [pd.read_csv(file) for file in filenames]
long_form_df = pd.concat(dfs, ignore_index=True)

# Ensure column names are consistent and remove any leading/trailing whitespace
long_form_df.columns = long_form_df.columns.str.strip()

long_form_df.head()


In [None]:
# Aggregate the data by 'cell_num' and calculate the average of 'Mobile Correlation' and 'Immobile Correlation'
aggregated_df = (
    long_form_df.groupby("cell_num")[["Mobile Correlation", "Immobile Correlation"]]
    .mean()
    .reset_index()
)

# Create a square scatter plot with the individual points and the aggregated data
plt.figure(figsize=(8, 8))

# Plot individual points with smaller gray markers
plt.scatter(
    long_form_df["Immobile Correlation"],
    long_form_df["Mobile Correlation"],
    color="gray",
    alpha=0.6,
    s=20,
    label="Individual Points",
)

# Plot the aggregated points with larger rainbow-colored markers
scatter = plt.scatter(
    aggregated_df["Immobile Correlation"],
    aggregated_df["Mobile Correlation"],
    c=aggregated_df["cell_num"],
    cmap="rainbow",
    s=100,
    edgecolor="black",
    label="Averages",
)

# Plot the diagonal line
diagonal_min = min(
    long_form_df["Immobile Correlation"].min(), long_form_df["Mobile Correlation"].min()
)
diagonal_max = max(
    long_form_df["Immobile Correlation"].max(), long_form_df["Mobile Correlation"].max()
)
plt.plot(
    [diagonal_min, diagonal_max],
    [diagonal_min, diagonal_max],
    "r--",
    label="y = x (Diagonal)",
)

plt.xlabel("Immobile Correlation")
plt.ylabel("Mobile Correlation")
plt.title("Average Scatter Plot of Immobile vs Mobile Correlation by Cell Number")
plt.colorbar(scatter, label="Cell Number")
plt.legend()
plt.axis("square")
plt.show()


In [None]:
# Aggregate the data by 'cell_num' and calculate the average and standard error of 'Mobile Correlation' and 'Immobile Correlation'
aggregated_df = (
    long_form_df.groupby("cell_num")[["Mobile Correlation", "Immobile Correlation"]]
    .agg(["mean", "sem"])
    .reset_index()
)

# Flatten the MultiIndex columns
aggregated_df.columns = [
    "_".join(col).strip() if col[1] else col[0] for col in aggregated_df.columns.values
]

# Create a square scatter plot with the individual points and the aggregated data
plt.figure(figsize=(8, 8))

# Plot individual points with smaller gray markers
plt.scatter(
    long_form_df["Immobile Correlation"],
    long_form_df["Mobile Correlation"],
    color="gray",
    alpha=0.6,
    s=20,
    label="Individual Points",
)

# Plot the aggregated points with larger rainbow-colored markers
scatter = plt.scatter(
    aggregated_df["Immobile Correlation_mean"],
    aggregated_df["Mobile Correlation_mean"],
    c=aggregated_df["cell_num"],
    cmap="rainbow",
    s=100,
    edgecolor="black",
    label="Averages",
)

# Plot the diagonal line
diagonal_min = min(
    long_form_df["Immobile Correlation"].min(), long_form_df["Mobile Correlation"].min()
)
diagonal_max = max(
    long_form_df["Immobile Correlation"].max(), long_form_df["Mobile Correlation"].max()
)
plt.plot(
    [diagonal_min, diagonal_max],
    [diagonal_min, diagonal_max],
    "r--",
    label="y = x (Diagonal)",
)

plt.xlabel("Immobile Correlation")
plt.ylabel("Mobile Correlation")
plt.title("Average Scatter Plot of Immobile vs Mobile Correlation by Cell Number")
plt.colorbar(scatter, label="Cell Number")
plt.legend()
plt.axis("square")
plt.savefig(join(sima_folder, "Summary_scatter_plot_cell1_5.svg"))
plt.savefig(join(sima_folder, "Summary_scatter_plot_cell1_5.png"))
plt.show()

# Create a bar plot for aggregated values with standard error
plt.figure(figsize=(10, 6))

# Prepare data for bar plot
aggregated_long = aggregated_df.melt(
    id_vars="cell_num",
    value_vars=["Immobile Correlation_mean", "Mobile Correlation_mean"],
    var_name="Correlation Type",
    value_name="Average Value",
)

# Add standard error values to the melted DataFrame
aggregated_long["Standard Error"] = aggregated_long.apply(
    lambda row: aggregated_df.loc[
        aggregated_df["cell_num"] == row["cell_num"],
        row["Correlation Type"].replace("_mean", "_sem"),
    ].values[0],
    axis=1,
)

# Plot the bar plot with individual points and error bars
sns.barplot(
    data=aggregated_long,
    x="Correlation Type",
    y="Average Value",
    errorbar="se",
    # ci=None,
    palette="pastel",
    edgecolor="black",
    errwidth=1.5,
    capsize=0.1,
)

# Plot individual points and connectors
for _, row in aggregated_df.iterrows():
    plt.plot(
        ["Immobile Correlation", "Mobile Correlation"],
        [row["Immobile Correlation_mean"], row["Mobile Correlation_mean"]],
        color="gray",
        alpha=0.6,
        linestyle="-",
        marker="o",
    )

plt.xlabel("Correlation Type")
plt.ylabel("Average Value")
plt.title(
    "Bar Plot of Aggregated Correlations with Individual Points, Connectors, and Standard Error"
)
plt.savefig(join(sima_folder, "Summary_bar_plot_cell1_5.svg"))
plt.savefig(join(sima_folder, "Summary_bar_plot_cell1_5.png"), dpi=300)
plt.show()
