In [None]:
import os
desktop_dir = os.path.expanduser('~/Desktop/math_results')

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import torch

# 1. Load your extracted features
X = np.load(os.path.join(desktop_dir, 'X_features.npy'))  # Shape: (30, 79800)
y = np.load(os.path.join(desktop_dir, 'y_labels.npy'))    # Shape: (30,)

print(f"Original shape: {X.shape}")

# --- STAGE 1: Univariate Selection (Filter) ---
# We pick the top 1000 connections that correlate most with the group label
selector = SelectKBest(score_func=f_classif, k=1000)
X_selected = selector.fit_transform(X, y)
print(f"After SelectKBest: {X_selected.shape}")

# --- STAGE 2: PCA (Compress) ---
# We compress those 1000 features into 20 Principal Components
# 20 is a safe number for a sample size of 30
pca = PCA(n_components=20)
X_pca = pca.fit_transform(X_selected)
print(f"Final PCA shape: {X_pca.shape}")

# --- STAGE 3: Final Scaling ---
# Deep Learning models perform best when features are mean=0, std=1
scaler = StandardScaler()
X_final = scaler.fit_transform(X_pca)

# Convert to PyTorch Tensors
X_tensor = torch.tensor(X_final, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

print("âœ… Data is ready for PyTorch MLP.")