In [None]:
from mne.datasets.sleep_physionet.age import fetch_data
import mne
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Reading annotation files


In [None]:
df_annot0 = pd.read_csv('sleep_annotations0.csv')
df_annot0.head()

In [None]:
df_annot1 = pd.read_csv('sleep_annotations1.csv')
df_annot2 = pd.read_csv('sleep_annotations2.csv')
df_annot3 = pd.read_csv('sleep_annotations3.csv')
df_annot40 = pd.read_csv('sleep_annotations40.csv')
df_annot41 = pd.read_csv('sleep_annotations41.csv')
df_annot4 = pd.read_csv('sleep_annotations4.csv')
df_annot5 = pd.read_csv('sleep_annotations5.csv')

df_annots = pd.concat([df_annot0, df_annot1, df_annot2, df_annot3, df_annot40, df_annot41, df_annot4, df_annot5], ignore_index=True)
df_annots.shape

In [None]:
df_annots.to_csv('annotationsfirst55.csv', index=False)

## Plots

In [None]:
plt.rcParams['figure.figsize'] = (8, 4)
sns.set(style="whitegrid")

### Stage distribution per subject

In [None]:
# Total duration per stage per subject
dur_per_subj = df_annots.groupby(["subject", "description"])["duration"].sum().reset_index()
# Total sleep time per subject
total_dur = df_annots.groupby("subject")["duration"].sum()
# Add percentages
dur_per_subj["pct"] = dur_per_subj.apply(
    lambda r: 100 * r["duration"] / total_dur.loc[r["subject"]], axis=1
)
# Pivot to wide format
dist_pivot = dur_per_subj.pivot(index="subject", columns="description", values="pct")
dist_pivot = dist_pivot.fillna(0)

# Plot
dist_pivot.plot(kind="bar", stacked=True, figsize=(12, 7))
plt.ylabel("Percentage of Night (%)")
plt.title("Sleep Stage Distribution per Subject")
plt.legend(title="Stage")
plt.tight_layout()
plt.show()

### Average duration per stage

In [None]:
avg_dur = (
    df_annots.groupby("description")["duration"]
      .sum()
      .reset_index(name="total_duration")
)

# Convert seconds â†’ minutes for readability
avg_dur["minutes"] = avg_dur["total_duration"] / 60

sns.barplot(data=avg_dur, x="description", y="minutes")
plt.xlabel("Sleep Stage")
plt.ylabel("Average Duration (min)")
plt.title("Average Total Duration per Stage Across Subjects")
plt.tight_layout()
plt.show()


### Transition probability matrix

In [None]:
# Sort by subject and onset
df_sorted = df_annots.sort_values(["subject", "onset"])

# Next stage column
df_sorted["next_stage"] = df_sorted.groupby("subject")["description"].shift(-1)

# Remove transitions between subjects
valid = df_sorted.dropna(subset=["next_stage"])

# Count transitions
trans_counts = valid.groupby(["description", "next_stage"]).size().unstack(fill_value=0)

# Convert to probabilities
trans_probs = trans_counts.div(trans_counts.sum(axis=1), axis=0)

plt.figure(figsize=(8, 5))
sns.heatmap(trans_probs, annot=True, fmt=".2f", cmap="Blues")
plt.title("Stage Transition Probability Matrix")
plt.ylabel("Current Stage")
plt.xlabel("Next Stage")
plt.tight_layout()
plt.show()


### Stage count per subject

In [None]:
pivot_counts = stage_counts.pivot(
    index="subject",
    columns="description",
    values="count"
).fillna(0)

plt.figure(figsize=(8, 8))
sns.heatmap(pivot_counts, cmap="crest", annot=False)
plt.title("Sleep Stage Counts per Subject (Heatmap)")
plt.tight_layout()
plt.savefig("sleep_stage_heatmap.png", dpi=300, bbox_inches="tight")
plt.show()
