# Plotting individual corr matrices using exported data. 
This was made as a helper notebook for R01 submission on 11/3/2024

In [None]:
from os.path import join

import matplotlib.pyplot as plt
import numpy as np
from scipy.cluster.hierarchy import linkage, dendrogram, leaves_list

import pandas as pd
import seaborn as sns

plt.rcParams["text.usetex"] = False
plt.rcParams["svg.fonttype"] = "none"


In [None]:
sima_folders = "/data2/gergely/invivo_DATA/sleep/dock13b3/8_4/TSeries-08042024-1156-001/TSeries-08042024-1156-001.sima/"
# which cell_num to plot
cell_num = 5


In [None]:
# Construct file path for the raw data
raw_data_file_name = f"spks_df_cell{cell_num}.csv"
raw_data_file_path = join(sima_folders, raw_data_file_name)

# Load raw data from CSV file
try:
    raw_df = pd.read_csv(raw_data_file_path)
    print(f"Data successfully loaded from {raw_data_file_path}")
    print(raw_df.head())  # Display the first few rows of the loaded data
except FileNotFoundError:
    print(f"Data file not found: {raw_data_file_path}. Please check the path.")
    raw_df = None


In [None]:
if raw_df is not None:
    # Drop the unnecessary columns
    columns_to_drop = ["sima_folder", "cell_num"]
    if all(col in raw_df.columns for col in columns_to_drop):
        raw_df = raw_df.drop(columns=columns_to_drop)
        print("Dropped unnecessary columns: 'sima_folder' and 'cell_num'")
    else:
        print(f"One or both of the columns {columns_to_drop} not found in the data.")

    # Display the first few rows of the updated DataFrame
    print(raw_df.head())


In [None]:
if raw_df is not None:
    # Drop the unnecessary columns
    columns_to_drop = ["sima_folder", "cell_num"]
    if all(col in raw_df.columns for col in columns_to_drop):
        raw_df = raw_df.drop(columns=columns_to_drop)
        print("Dropped unnecessary columns: 'sima_folder' and 'cell_num'")

    # Identify columns that contain 'plane' or 'soma'
    pattern_cols = [col for col in raw_df.columns if "plane" in col or "soma" in col]

    # Split the data based on the 'mobility' column
    if "mobility" in raw_df.columns:
        mobile_df = raw_df[raw_df["mobility"] == True][pattern_cols]
        immobile_df = raw_df[raw_df["mobility"] == False][pattern_cols]

        # Calculate the cross-correlation matrices for both subsets
        mobile_corr = mobile_df.corr()
        immobile_corr = immobile_df.corr()

        # Perform hierarchical clustering on the correlation matrices
        mobile_linkage = linkage(mobile_corr, method="average")
        immobile_linkage = linkage(immobile_corr, method="average")

        mobile_order = leaves_list(mobile_linkage)
        immobile_order = leaves_list(immobile_linkage)

        mobile_corr = mobile_corr.iloc[mobile_order, mobile_order]
        immobile_corr = immobile_corr.iloc[immobile_order, immobile_order]

        # Mask the upper triangle of the correlation matrices
        mask = np.triu(np.ones_like(mobile_corr, dtype=bool))

        # Set the color scale limits to be the same for both plots
        vmin = min(mobile_corr.min().min(), immobile_corr.min().min())
        vmax = max(mobile_corr.max().max(), immobile_corr.max().max())

        # Plot the correlation matrices for mobile and immobile data side by side
        fig, axes = plt.subplots(1, 2, figsize=(20, 8))

        # Plot the correlation matrix for mobile data (lower triangle only)
        sns.heatmap(
            mobile_corr,
            mask=mask,
            annot=True,
            cmap="Greens",
            linewidths=0.5,
            ax=axes[0],
            vmin=vmin,
            vmax=vmax,
        )
        axes[0].set_title("Correlation Matrix - Mobile Data")

        # Plot the correlation matrix for immobile data (lower triangle only)
        sns.heatmap(
            immobile_corr,
            mask=mask,
            annot=True,
            cmap="Greens",
            linewidths=0.5,
            ax=axes[1],
            vmin=vmin,
            vmax=vmax,
        )
        axes[1].set_title("Correlation Matrix - Immobile Data")

        # Adjust layout
        plt.tight_layout()
        plt.savefig(
            join(
                sima_folders,
                "corr_plots",
                f"cell{cell_num}_clustered_correlation_matrices.svg",
            )
        )
        plt.savefig(
            join(
                sima_folders,
                "corr_plots",
                f"cell{cell_num}_clustered_correlation_matrices.png",
            ),
            dpi=300,
        )
        plt.show()

    else:
        print("Column 'mobility' not found in the data.")


In [None]:
if raw_df is not None:
    # Drop the unnecessary columns
    columns_to_drop = ["sima_folder", "cell_num"]
    if all(col in raw_df.columns for col in columns_to_drop):
        raw_df = raw_df.drop(columns=columns_to_drop)
        print("Dropped unnecessary columns: 'sima_folder' and 'cell_num'")

    # Identify columns that contain 'plane' or 'soma'
    pattern_cols = [col for col in raw_df.columns if "plane" in col or "soma" in col]

    # Split the data based on the 'mobility' column
    if "mobility" in raw_df.columns:
        mobile_df = raw_df[raw_df["mobility"] == True][pattern_cols]
        immobile_df = raw_df[raw_df["mobility"] == False][pattern_cols]

        # Calculate the cross-correlation matrices for both subsets
        mobile_corr = mobile_df.corr()
        immobile_corr = immobile_df.corr()

        # Perform hierarchical clustering on the mobile correlation matrix
        mobile_linkage = linkage(mobile_corr, method="average")
        mobile_order = leaves_list(mobile_linkage)

        # Reorder both correlation matrices based on the mobile data clustering
        mobile_corr = mobile_corr.iloc[mobile_order, mobile_order]
        immobile_corr = immobile_corr.iloc[mobile_order, mobile_order]

        # Mask the upper triangle of the correlation matrices
        mask = np.triu(np.ones_like(mobile_corr, dtype=bool))

        # Set the color scale limits to be the same for both plots
        vmin = min(mobile_corr.min().min(), immobile_corr.min().min())
        vmax = max(mobile_corr.max().max(), immobile_corr.max().max())

        # Plot the correlation matrices for mobile and immobile data side by side
        fig, axes = plt.subplots(1, 2, figsize=(20, 8))

        # Plot the correlation matrix for mobile data (lower triangle only)
        sns.heatmap(
            mobile_corr,
            mask=mask,
            annot=True,
            cmap="Greens",
            linewidths=0.5,
            ax=axes[0],
            vmin=vmin,
            vmax=vmax,
        )
        axes[0].set_title("Correlation Matrix - Mobile Data")

        # Plot the correlation matrix for immobile data (lower triangle only)
        sns.heatmap(
            immobile_corr,
            mask=mask,
            annot=True,
            cmap="Greens",
            linewidths=0.5,
            ax=axes[1],
            vmin=vmin,
            vmax=vmax,
        )
        axes[1].set_title("Correlation Matrix - Immobile Data")

        # Adjust layout
        plt.tight_layout()
        plt.show()

    else:
        print("Column 'mobility' not found in the data.")


In [None]:
if raw_df is not None:
    # Drop the unnecessary columns
    columns_to_drop = ["sima_folder", "cell_num"]
    if all(col in raw_df.columns for col in columns_to_drop):
        raw_df = raw_df.drop(columns=columns_to_drop)
        print("Dropped unnecessary columns: 'sima_folder' and 'cell_num'")

    # Identify columns that contain 'plane' or 'soma'
    pattern_cols = [col for col in raw_df.columns if "plane" in col or "soma" in col]

    # Split the data based on the 'mobility' column
    if "mobility" in raw_df.columns:
        mobile_df = raw_df[raw_df["mobility"] == True][pattern_cols]
        immobile_df = raw_df[raw_df["mobility"] == False][pattern_cols]

        # Calculate the cross-correlation matrices for both subsets
        mobile_corr = mobile_df.corr()
        immobile_corr = immobile_df.corr()

        # Perform hierarchical clustering on the mobile correlation matrix
        mobile_linkage = linkage(mobile_corr, method="average")
        mobile_order = leaves_list(mobile_linkage)

        # Reorder both correlation matrices based on the mobile data clustering
        mobile_corr = mobile_corr.iloc[mobile_order, mobile_order]
        immobile_corr = immobile_corr.iloc[mobile_order, mobile_order]

        # Create a combined correlation matrix
        combined_corr = mobile_corr.copy()
        combined_corr.values[np.triu_indices_from(combined_corr, k=1)] = (
            immobile_corr.values[np.triu_indices_from(immobile_corr, k=1)]
        )

        # Plot the combined correlation matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(
            combined_corr,
            annot=True,
            cmap="Greens",
            linewidths=0.5,
            vmin=min(mobile_corr.min().min(), immobile_corr.min().min()),
            vmax=max(mobile_corr.max().max(), immobile_corr.max().max()),
        )
        plt.title(
            "Combined Correlation Matrix (Mobile - Lower Triangle, Immobile - Upper Triangle)"
        )
        plt.tight_layout()
        plt.show()

    else:
        print("Column 'mobility' not found in the data.")


In [None]:
if raw_df is not None:
    # Drop the unnecessary columns
    columns_to_drop = ["sima_folder", "cell_num"]
    if all(col in raw_df.columns for col in columns_to_drop):
        raw_df = raw_df.drop(columns=columns_to_drop)
        print("Dropped unnecessary columns: 'sima_folder' and 'cell_num'")

    # Identify columns that contain 'plane' or 'soma'
    pattern_cols = [col for col in raw_df.columns if "plane" in col or "soma" in col]

    # Split the data based on the 'mobility' column
    if "mobility" in raw_df.columns:
        mobile_df = raw_df[raw_df["mobility"] == True][pattern_cols]
        immobile_df = raw_df[raw_df["mobility"] == False][pattern_cols]

        # Calculate the cross-correlation matrices for both subsets
        mobile_corr = mobile_df.corr()
        immobile_corr = immobile_df.corr()

        # Create a combined correlation matrix
        combined_corr = mobile_corr.copy()
        combined_corr.values[np.triu_indices_from(combined_corr, k=1)] = (
            immobile_corr.values[np.triu_indices_from(immobile_corr, k=1)]
        )

        # Plot the combined correlation matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(
            combined_corr,
            annot=True,
            cmap="Greens",
            linewidths=0.5,
            vmin=min(mobile_corr.min().min(), immobile_corr.min().min()),
            vmax=max(mobile_corr.max().max(), immobile_corr.max().max()),
        )
        plt.title(
            "Combined Correlation Matrix (Mobile - Lower Triangle, Immobile - Upper Triangle)"
        )
        plt.tight_layout()
        plt.savefig(
            join(
                sima_folders,
                "corr_plots",
                f"cell{cell_num}_clustered_correlation_matrices_sq.svg",
            )
        )
        plt.savefig(
            join(
                sima_folders,
                "corr_plots",
                f"cell{cell_num}_clustered_correlation_matrices_sq.png",
            ),
            dpi=300,
        )
        plt.show()

    else:
        print("Column 'mobility' not found in the data.")
