In [None]:
### SETUP ###
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

from SCRIPTS.config import *
from SCRIPTS.xyzcoords import (
    load_coordinates,
    fix_outlier_coordinates,
    plot_3d_adj_matrix
)
from SCRIPTS.dataprep import (
    load_fnirs_data,
    verify_data_match,
    combine_fnirs_data,
    generate_scatter_coefficients,
    combine_scattering_data
)

#Visualization setup
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

# Create directories
create_directories()
print(f"Data directory: {PREPARED_FNIRS_DIR}")
print(f"Scattering directory: {SCATTERING_DIR}")

Data directory: /Users/judesack/Neurospectrum_Creativity/DATA/PREPARED_FNIRS_DATA
Scattering directory: /Users/judesack/Neurospectrum_Creativity/DATA/SCATTERING_COEFFICIENTS


In [4]:
### CONFIGURATION ###

# Visualize 3D adjacency matrices? T/F
VISUALIZE_ADJACENCY = False

# Generate scattering coefficients? T/F
GENERATE_COEFFICIENTS = False

#Combine fNIRS data files? T/F
COMBINE_FNIRS = False

#Combine scattering files? T/F
COMBINE_SCATTERING = False


In [6]:
### LOAD 3D COORDINATES ###

subject_probe_coords, subject_channel_coords = load_coordinates()
subject_ids = [subj_id for subj_id, _ in subject_channel_coords]
print(f"\nSubjects found: {len(subject_ids)}")
print(f"IDs: {', '.join(sorted(subject_ids))}")

# Fix known outliers
subject_channel_coords = fix_outlier_coordinates(
    subject_channel_coords, 
    problem_subject="15052902", 
    problem_nodes=[0, 3]
)
print("✓ Outlier coordinates fixed")

if VISUALIZE_ADJACENCY  == True:
    print(f"\nVisualizing 3D channel networks (threshold={THRESHOLD})...")
    plot_3d_adj_matrix(subject_channel_coords, threshold=THRESHOLD)


Loading 3D channel coordinates from .pos files...

Processed 20 subjects
Probe matrix shape: (32, 3)
Channel matrix shape: (48, 3)

Subjects found: 20
IDs: 14073001, 14091102, 14091701, 14092201, 14101601, 15012001, 15040901, 15052902, 15053001sub1, 15053001sub2, 15072703, 15080601, 15081202sub1, 15081202sub2, 15111101, 16100101, 16100601, 16100801, 16101401, 16102002
Replacing coords for Subject 15052902, Node 0:
  Before: [-209.435   91.875  100.625]
  After: [ 70.2175 -52.3625  28.575 ]
Replacing coords for Subject 15052902, Node 3:
  Before: [-203.655  108.15   108.975]
  After: [ 70.2175 -52.3625  28.575 ]
✓ Outlier coordinates fixed


In [None]:
### LOAD RAW FNIRS DATA ###

subject_data_matrices = load_fnirs_data()

match_ok = verify_data_match(subject_channel_coords, subject_data_matrices)
if match_ok:
    print("✓ All subject IDs match between datasets")
else:
    print("✗ Warning: Subject ID mismatch detected")
    
    # Find common subjects
    xyz_ids = {sid for sid, _ in subject_channel_coords}
    fnr_ids = {sid for sid, _ in subject_data_matrices}
    common = xyz_ids & fnr_ids
    
    subject_channel_coords = [(sid, coords) for sid, coords in subject_channel_coords if sid in common]
    subject_data_matrices = [(sid, data) for sid, data in subject_data_matrices if sid in common]
    

Excluding Subject 15081202sub1: shape (750, 48) ≠ (7850, 48)
Excluding Subject 15072703: shape (750, 48) ≠ (7850, 48)

Processed 17 subjects
Only 17/20 subjects processed
Subject IDs match between datasets
✓ All subject IDs match between datasets


In [17]:
### SCATTERING COEFFICIENTS ###

if GENERATE_COEFFICIENTS == True:
    print("\nGenerating scattering coefficients...")
    generate_scatter_coefficients(subject_channel_coords, subject_data_matrices)
    print("✓ Scattering coefficients generated")
else:
    print("✓ Skipping coefficient generation (using existing files)")

✓ Skipping coefficient generation (using existing files)


In [12]:
### COMBINE RAW FNIRS DATA ###

if COMBINE_FNIRS == True:
    fnirs_df = combine_fnirs_data()
    print(f"✓ Saved to {COMBINED_FNIRS}")
else:
    print(f"✓ Loading existing combined fNIRS data from {COMBINED_FNIRS}")
    fnirs_df = pd.read_csv(COMBINED_FNIRS)

print(f"\nCombined fNIRS data shape: {fnirs_df.shape}")
print(f"Features: {len([c for c in fnirs_df.columns if c.startswith('node_')])} nodes")
print(f"Samples per subject: {len(fnirs_df[fnirs_df['subject_id'] == fnirs_df['subject_id'].iloc[0]])}")


Excluding Subject 15081202sub1: shape (750, 48) ≠ (7850, 48)
Excluding Subject 15072703: shape (750, 48) ≠ (7850, 48)

Dataset: 133450 samples, 48 features
Data saved to /Users/judesack/Neurospectrum_Creativity/DATA/PREPARED_FNIRS_DATA/combined_fnirs_data.csv
✓ Saved to /Users/judesack/Neurospectrum_Creativity/DATA/PREPARED_FNIRS_DATA/combined_fnirs_data.csv

Combined fNIRS data shape: (133450, 51)
Features: 48 nodes
Samples per subject: 7850


In [13]:
### COMBINE SCATTERING DATA ###

if COMBINE_SCATTERING == True:
    scat_df = combine_scattering_data()
    print(f"✓ Saved to {COMBINED_SCATTERING}")
else:
    print(f"✓ Loading existing combined scattering data from {COMBINED_SCATTERING}")
    scat_df = pd.read_csv(COMBINED_SCATTERING)

print(f"\nCombined scattering data shape: {scat_df.shape}")
print(f"Features: {len([c for c in scat_df.columns if c.startswith('feature_')])} scattering features")


Processing 17 subjects...
Processing subject 15080601...
Processing subject 15052902...
Processing subject 16102002...
Processing subject 14091102...
Processing subject 16100801...
Processing subject 16101401...
Processing subject 14101601...
Processing subject 14092201...
Processing subject 15053001sub1...
Processing subject 14091701...
Processing subject 15040901...
Processing subject 16100601...
Processing subject 15111101...
Processing subject 15081202sub2...
Processing subject 16100101...
Processing subject 15012001...
Processing subject 15053001sub2...

Dataset: 133450 samples, 768 features
Task distribution:
task
Rest      68000
Improv    27200
Scale     27200
Other     11050
Name: count, dtype: int64
Data saved to /Users/judesack/Neurospectrum_Creativity/DATA/SCATTERING_COEFFICIENTS/combined_scattering_data.csv
✓ Saved to /Users/judesack/Neurospectrum_Creativity/DATA/SCATTERING_COEFFICIENTS/combined_scattering_data.csv

Combined scattering data shape: (133450, 771)
Features: 76

In [None]:
### CHECK FOR MISSING VALUES ###

fnirs_missing = fnirs_df.isnull().sum().sum()
scat_missing = scat_df.isnull().sum().sum()

# Check missing NAN values
print(f"\nMissing values:")
print(f"  fNIRS data: {fnirs_missing}")
print(f"  Scattering data: {scat_missing}")

# Verify subject counts
fnirs_subjects = fnirs_df['subject_id'].nunique()
scat_subjects = scat_df['subject_id'].nunique()

print(f"\nSubject counts:")
print(f"  fNIRS: {fnirs_subjects} subjects")
print(f"  Scattering: {scat_subjects} subjects")

# Check data shapes per subject
print(f"\nData points per subject:")
print(f"  Expected: {EXPECTED_FNIRS_SHAPE[0]} timepoints")
print(f"  fNIRS: {fnirs_df.groupby('subject_id').size().iloc[0]} timepoints")
print(f"  Scattering: {scat_df.groupby('subject_id').size().iloc[0]} timepoints")


Missing values:
  fNIRS data: 0
  Scattering data: 0

Subject counts:
  fNIRS: 17 subjects
  Scattering: 17 subjects

Data points per subject:
  Expected: 7850 timepoints
  fNIRS: 7850 timepoints
  Scattering: 7850 timepoints
