<a href="https://colab.research.google.com/github/GergelyTuri/sleep/blob/master/notebooks/Cosine_dist_calc_single_set.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prototype notebook to calculate cosine distance for one dataset
designed to run on google drive data.
* 3/10/2024 setup

In [None]:
!git clone https://github.com/GergelyTuri/sleep.git

In [None]:
%cd sleep
!pip install .

In [None]:
# %cd sleep
# !git pull

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
from os.path import join
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

import numpy as np
import pandas as pd
import seaborn as sns
from sleep.src import google_drive
from sleep.src import clustering

In [None]:
useful_data = google_drive.useful_datasets()
useful_data

In [None]:
exp_path = google_drive.return_exp_path('5HT2afl05b_1', '7_12_SD2', 'TSeries-07122021-0901-002')
exp_path

In [None]:
processed_sleep_df = pd.read_csv(join(exp_path, 'eeg', 'velo_eeg.csv'), )
processed_sleep_df

In [None]:
file_name = 'dfof_mc.csv'
dfof_data = pd.read_csv(join(exp_path, file_name))
dfof_data.set_index('roi_label', drop=True, inplace=True)

In [None]:
# Generate summary dataframes
nrem_summary = clustering.interval_length_calculator(processed_sleep_df, 'NREM', 1)
awake_summary = clustering.interval_length_calculator(processed_sleep_df, 'awake', 1)

# Filter summary dataframes
del_indices = nrem_summary.loc[nrem_summary["length"] < 600, "n"].values
summary_nrem = nrem_summary[~nrem_summary["n"].isin(del_indices)].reset_index(drop=True).copy()

del_indices = awake_summary.loc[awake_summary["length"] < 600, "n"].values
summary_awake = awake_summary[~awake_summary["n"].isin(del_indices)].reset_index(drop=True).copy()

In [None]:
summary_awake

In [None]:
awake_dfof_intervals = clustering.process_dfof_intervals(dfof_data, summary_awake)
nrem_dfof_intervals = clustering.process_dfof_intervals(dfof_data, summary_nrem)

In [None]:
nrem_dfof_intervals.shape

In [None]:
cos_dist_nrem = clustering.calculate_cosine_distance(nrem_dfof_intervals, 'nrem', normalize=True)
cos_dist_awake = clustering.calculate_cosine_distance(awake_dfof_intervals, 'awake', normalize=True)

In [None]:
sorted_cos_dist_nrem = clustering.sort_distance_matrix(cos_dist_nrem)
sorted_cos_dist_awake = clustering.sort_distance_matrix(cos_dist_awake)

# Now plot the sorted matrices using seaborn's heatmap function
fig, axs = plt.subplots(1, 2, figsize=(16, 8), sharey=True)

sns.heatmap(sorted_cos_dist_nrem, ax=axs[0], cmap="viridis")
axs[0].set_title("Sorted Cosine Distance - NREM")

sns.heatmap(sorted_cos_dist_awake, ax=axs[1], cmap="viridis")
axs[1].set_title("Sorted Cosine Distance - Awake")

plt.tight_layout()

# saving the fig
fig.savefig(join(exp_path, "MC_sorted_cosine_dist.png"), dpi=300)

plt.show()

In [None]:
# Flatten the matrices and remove the diagonal (self-distance) values
distance_values_nrem = cos_dist_nrem.values.flatten()
distance_values_nrem = distance_values_nrem[~np.isnan(distance_values_nrem)]  # Remove NaN values if any
distance_values_nrem = distance_values_nrem[distance_values_nrem != 0]  # Remove self-distances

distance_values_awake = cos_dist_awake.values.flatten()
distance_values_awake = distance_values_awake[~np.isnan(distance_values_awake)]  # Remove NaN values if any
distance_values_awake = distance_values_awake[distance_values_awake != 0]  # Remove self-distances

# Combine the distance values into a single array with an associated label array
distance_values_combined = np.concatenate([distance_values_nrem, distance_values_awake])
states_combined = ['NREM'] * len(distance_values_nrem) + ['Awake'] * len(distance_values_awake)

# Create DataFrame
distances_df = pd.DataFrame({
    'Cosine Distance': distance_values_combined,
    'State': states_combined
})

distances_df['State'] = distances_df['State'].astype('category')

# Plot
plt.figure(figsize=(10, 6))
ax = sns.histplot(
    data=distances_df,
    x='Cosine Distance',
    hue='State',
    kde=True,
    element='step',
    bins=30,
    palette=['green', 'blue']  # Explicitly set the palette here to match your desired colors
)

plt.title('Distribution of Cosine Distance Values by State')
plt.xlabel('Cosine Distance')
plt.ylabel('Frequency')

# Create custom legend handles with colors matching the plot
legend_handles = [
    Patch(facecolor='green', label='NREM'),
    Patch(facecolor='blue', label='Awake')
]

# Use the custom handles to create the legend, ensuring colors match the plot
plt.legend(handles=legend_handles, title='State')

# Saving the figure
plt.savefig(join(exp_path, 'MC_distrib_cosine_distance_values.png'), dpi=300)

plt.show()