In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Clearly identify columns to drop (any redundant label columns + epoch)
cols_to_drop = [col for col in final_df.columns if 'Label' in col] + ['Epoch', 'Sleep_Stage']
X = final_df.drop(columns=cols_to_drop)
y = final_df['Sleep_Stage'].values

# Standardize features explicitly
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Explicitly handle NaNs with mean imputation
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X_scaled)

# PCA explicitly performed
pca = PCA(n_components=min(10, X_imputed.shape[1]))
principal_components = pca.fit_transform(X_imputed)

# Explained variance explicitly calculated
explained_variance = pca.explained_variance_ratio_
print("Explained Variance per PCA component:", explained_variance)

# Final PCA dataframe explicitly including Sleep_Stage labels
pca_df = pd.DataFrame(principal_components, 
                      columns=[f'PC{i+1}' for i in range(principal_components.shape[1])])
pca_df['Sleep_Stage'] = final_df['Sleep_Stage'].values
pca_df['Epoch'] = final_df['Epoch'].values

print(pca_df)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Create features dataframe explicitly from your existing final_df
features_df = final_df.drop(columns=['Epoch', 'Sleep_Stage', 'Label'], errors='ignore')

# Compute correlation matrix explicitly
correlation_matrix = features_df.join(final_df['Sleep_Stage']).corr()

# Explicitly isolate correlation with Sleep_Stage
sleep_corr = correlation_matrix[['Sleep_Stage']].drop('Sleep_Stage')

# Explicit heatmap visualization
plt.figure(figsize=(10, 8))
sns.heatmap(sleep_corr, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlations with Sleep Stage')
plt.show()


In [None]:
# Clearly extract PCA loadings
loadings = pd.DataFrame(pca.components_, columns=final_df.drop(['Epoch', 'Sleep_Stage'], axis=1).columns)

plt.figure(figsize=(12, 6))
sns.heatmap(loadings.T, annot=True, cmap='coolwarm', center=0)
plt.title('PCA Feature Loadings')
plt.xlabel('Principal Components')
plt.ylabel('Features')
plt.show()


In [None]:
final_df['Gamma'].value_counts()

In [None]:
from scipy.stats import f_oneway

feature = 'Theta'  # try different EEG/EMG/EOG features
groups = [final_df[final_df.Sleep_Stage == stage][feature].dropna() for stage in final_df.Sleep_Stage.unique()]
F, p = f_oneway(*groups)

print(f"ANOVA for {feature}: F-statistic={F}, p-value={p}")


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Prepare explicitly:
X = final_df.drop(['Epoch', 'Sleep_Stage', 'Label'], axis=1, errors='ignore')
y = final_df['Sleep_Stage']

# Mean imputation explicitly:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Explicit train/test split:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest explicitly:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate explicitly:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

X = final_df.drop(['Epoch', 'Sleep_Stage', 'Label'], axis=1, errors='ignore')
y = final_df['Sleep_Stage']

# Explicit imputation and split:
X_imputed = SimpleImputer(strategy='mean').fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, random_state=42, test_size=0.2)

# Train explicitly:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Feature importance explicitly:
importance_df = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)
print(importance_df)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Drop unnecessary columns explicitly:
features_df = final_df.drop(columns=['Epoch', 'Sleep_Stage', 'Label'], errors='ignore')

# Compute correlation matrix explicitly:
corr_matrix = features_df.corrwith(final_df['Sleep_Stage'])

# Explicitly visualize:
plt.figure(figsize=(8, 6))
sns.barplot(y=corr_matrix.index, x=corr_matrix.values, palette='coolwarm')
plt.xlabel("Correlation with Sleep Stage")
plt.ylabel("Features")
plt.title("Feature Correlations with Sleep Stage")
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Explicitly drop non-feature columns
X_epoch = final_df.drop(columns=['Epoch', 'Sleep_Stage', 'Label'], errors='ignore')

# Handle NaNs explicitly by imputing missing values:
imputer = SimpleImputer(strategy='mean')  # Replace NaNs with mean
X_imputed = imputer.fit_transform(X_epoch)

# Standardize explicitly
X_scaled_epoch = StandardScaler().fit_transform(X_imputed)

# Run PCA explicitly
pca_epoch = PCA(n_components=2).fit_transform(X_scaled_epoch)

# Create explicit dataframe for visualization:
pca_plot_df = pd.DataFrame(pca_epoch, columns=['PC1', 'PC2'])
pca_plot_df['Sleep_Stage'] = final_df['Sleep_Stage'].values

# Explicit fix: Pass `data=pca_plot_df`
plt.figure(figsize=(8, 6))
sns.scatterplot(data=pca_plot_df, x="PC1", y="PC2", hue="Sleep_Stage", palette="viridis")
plt.title("PCA Projection of Sleep Stages")
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Explicitly visualize feature distribution for each sleep stage
features_to_plot = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']  # Modify for your dataset

for feature in features_to_plot:
    plt.figure(figsize=(8, 5))
    sns.boxplot(x='Sleep_Stage', y=feature, data=final_df, palette='viridis')
    plt.title(f"{feature} Distribution Across Sleep Stages")
    plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import pandas as pd

# Prepare dataset explicitly
X = final_df.drop(['Epoch', 'Sleep_Stage', 'Label'], axis=1, errors='ignore')

# Iterate through sleep stages explicitly:
for stage in final_df['Sleep_Stage'].unique():
    print(f"\nFeature Importance for Sleep Stage {stage}:")

    # Create binary labels (1 if current sleep stage, 0 otherwise)
    y_binary = (final_df['Sleep_Stage'] == stage).astype(int)

    # Handle NaNs explicitly:
    X_imputed = SimpleImputer(strategy='mean').fit_transform(X)

    # Split explicitly:
    X_train, X_test, y_train, y_test = train_test_split(X_imputed, y_binary, test_size=0.2, random_state=42)

    # Train classifier explicitly:
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Feature importance explicitly:
    importance_df = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)

    # Print top 5 most important features for this stage explicitly:
    print(importance_df.head(5))

    # Plot explicitly:
    importance_df.head(10).plot(kind='bar', figsize=(8, 5))
    plt.title(f"Feature Importance for Sleep Stage {stage}")
    plt.xlabel("Features")
    plt.ylabel("Importance Score")
    plt.show()


In [None]:
stage_corr_df = pd.DataFrame(index=X.columns)

# Compute correlation for each sleep stage explicitly
for stage in final_df['Sleep_Stage'].unique():
    y_binary = (final_df['Sleep_Stage'] == stage).astype(int)
    stage_corr_df[f'Stage {stage} Corr'] = X.corrwith(y_binary)

# Explicitly visualize:
plt.figure(figsize=(10, 6))
sns.heatmap(stage_corr_df, annot=True, cmap='coolwarm', center=0)
plt.title("Feature Correlations with Each Sleep Stage")
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Select EEG frequency bands explicitly:
features_to_plot = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']  # Modify if needed

# Compute the mean EEG power for each sleep stage explicitly:
mean_values = final_df.groupby("Sleep_Stage")[features_to_plot].mean()

# Explicitly plot trendlines:
plt.figure(figsize=(10, 6))
for feature in features_to_plot:
    sns.lineplot(x=mean_values.index, y=mean_values[feature], label=feature, marker="o")

plt.xlabel("Sleep Stage")
plt.ylabel("EEG Power (Mean)")
plt.title("Trend of EEG Bands Across Sleep Stages")
plt.legend(title="EEG Band")
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()


In [None]:
from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from scipy.stats import chi2_contingency

# ✅ Load and preprocess data
features = final_df.drop(columns=['Sleep_Stage'])  # Remove labels for clustering
labels = final_df['Sleep_Stage']

# ✅ Handle Missing Values (Impute NaNs)
imputer = SimpleImputer(strategy="mean")  # Replace NaNs with column mean
X_imputed = imputer.fit_transform(features)

# ✅ Normalize features for PCA, t-SNE, and clustering
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# ✅ 1️⃣ Dimensionality Reduction (PCA & t-SNE)
# PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
X_tsne = tsne.fit_transform(X_scaled)

# ✅ 2️⃣ Clustering (K-Means & Hierarchical)
# K-Means Clustering
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans_labels = kmeans.fit_predict(X_scaled)

# Hierarchical Clustering
hierarchical = AgglomerativeClustering(n_clusters=5)
hierarchical_labels = hierarchical.fit_predict(X_scaled)

# ✅ 3️⃣ Sleep Stage Transition Matrix
# Compute transition probabilities
transitions = pd.crosstab(labels[:-1], labels[1:])
transitions_normalized = transitions.div(transitions.sum(axis=1), axis=0)

# ✅ 4️⃣ Multimodal vs. Unimodal Analysis
# PCA for EEG-only vs. EEG+EOG+EMG
eeg_features = final_df.filter(like='EEG')
multimodal_features = final_df.drop(columns=['Sleep_Stage'])

X_eeg_scaled = scaler.fit_transform(imputer.fit_transform(eeg_features))
X_multimodal_scaled = scaler.fit_transform(imputer.fit_transform(multimodal_features))

pca_eeg = PCA(n_components=2).fit_transform(X_eeg_scaled)
pca_multimodal = PCA(n_components=2).fit_transform(X_multimodal_scaled)

In [None]:

# ✅ Visualization
fig, axes = plt.subplots(3, 2, figsize=(14, 12))

# PCA Scatterplot
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=labels, palette="tab10", ax=axes[0, 0])
axes[0, 0].set_title("PCA: Sleep Stage Separation")

# t-SNE Scatterplot
sns.scatterplot(x=X_tsne[:, 0], y=X_tsne[:, 1], hue=labels, palette="tab10", ax=axes[0, 1])
axes[0, 1].set_title("t-SNE: Sleep Stage Clustering")

# K-Means Clustering
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=kmeans_labels, palette="tab10", ax=axes[1, 0])
axes[1, 0].set_title("K-Means Clustering (PCA)")

# Hierarchical Clustering
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=hierarchical_labels, palette="tab10", ax=axes[1, 1])
axes[1, 1].set_title("Hierarchical Clustering (PCA)")

# Sleep Stage Transition Matrix (Heatmap)
sns.heatmap(transitions_normalized, annot=True, cmap="coolwarm", fmt=".2f", ax=axes[2, 0])
axes[2, 0].set_title("Sleep Stage Transition Matrix")

# Compare EEG-only vs. Multimodal Features (PCA)
sns.scatterplot(x=pca_eeg[:, 0], y=pca_eeg[:, 1], hue=labels, palette="tab10", ax=axes[2, 1])
sns.scatterplot(x=pca_multimodal[:, 0], y=pca_multimodal[:, 1], hue=labels, palette="tab20", alpha=0.5, ax=axes[2, 1])
axes[2, 1].set_title("PCA: EEG-only vs. EEG+EOG+EMG")

plt.tight_layout()
plt.show()
