## STEP 1: Understand and Prepare the Data

In [None]:
# Step 1: Imports and Setup
import pandas as pd
from pathlib import Path
from google.colab import drive
import json

drive.mount('/content/drive')
DATASET_ROOT = Path("/content/drive/MyDrive/STData/STData")  # Fixed: Removed duplicate "STData"
RESULTS_DIR = Path("/content/results")
RESULTS_DIR.mkdir(exist_ok=True)

print(f"‚úÖ Dataset root set to {DATASET_ROOT}")

# Step 2: Detect or Create Target Function (Fixed for TIVA Engagement and PSY verdict)
def detect_target_column(participant_data, pid):
    tiva_df = participant_data.get("TIVA")
    psy_df = participant_data.get("PSY")

    # Try TIVA columns first (case-insensitive for Engagement/Accuracy)
    for col in ["Accuracy", "Engagement"]:
        if tiva_df is not None and col in tiva_df.columns:
            print(f"‚úÖ P{pid}: Target column from TIVA ‚Üí {col}")
            return col

    # Fallback: PSY matchOrnomatch
    if psy_df is not None and 'matchOrnomatch' in psy_df.columns:
        psy_df['Target'] = psy_df['matchOrnomatch'].map({'match':1, 'nomatch':0})
        print(f"‚úÖ P{pid}: Target from PSY 'matchOrnomatch' ‚Üí 'Target'")
        return 'Target'

    # Fallback: PSY verdict (Fixed: Binary 1/0 for Correct/Incorrect)
    if psy_df is not None and 'verdict' in psy_df.columns:
        psy_df['Target'] = psy_df['verdict'].apply(lambda x: 1 if x == 'Correct' else 0)
        print(f"‚úÖ P{pid}: Target from PSY 'verdict' ‚Üí 'Target'")
        return 'Target'

    print(f"‚ö†Ô∏è P{pid}: No known target column found")
    return None

# Step 3: Load Participant Data Function (Fixed Path, Added Copy for P1-P9)
def step1_identify_inputs(pid):
    folder = DATASET_ROOT / str(pid)  # Fixed path
    modalities = ["EEG", "GSR", "EYE", "IVT", "TIVA", "PSY"]
    data = {}

    # Load CSVs if available
    for mod in modalities:
        file_path = folder / f"{pid}_{mod}.csv"
        if file_path.exists():
            try:
                data[mod] = pd.read_csv(file_path, low_memory=False)
            except Exception as e:
                print(f"‚ö†Ô∏è P{pid}: Failed to read {mod} ‚Üí {e}")
                data[mod] = None
        else:
            print(f"‚ö†Ô∏è P{pid}: Missing file ‚Üí {mod}.csv")
            data[mod] = None

    # Identify target column (now catches Engagement in TIVA)
    target_col = detect_target_column(data, pid)

    # Print shapes
    shapes_info = ", ".join([f"{mod}:{df.shape if df is not None else 'Missing'}" for mod, df in data.items()])
    print(f"P{pid} Shapes ‚Üí {shapes_info}")
    if target_col:
        print(f"P{pid}: Target = {target_col}")
    print("-"*60)

    return data, target_col

# Step 4: Run for All Participants (P1-38, Fixed to Load P1-P9 if Exists)
participants = range(1, 39)
all_data = {}
for pid in participants:
    data, target_col = step1_identify_inputs(pid)
    if data:
        all_data[pid] = {"data": data, "target_col": target_col}

# Step 5: Generate all_trials.csv (Merge PSY/TIVA with Target)
all_trials = []
for pid, info in all_data.items():
    psy = info['data'].get('PSY')
    tiva = info['data'].get('TIVA')
    target_col = info['target_col']
    if target_col:
        if psy is not None and target_col in psy.columns:
            df = psy[[target_col]].copy()
        elif tiva is not None and target_col in tiva.columns:
            df = tiva[[target_col]].copy()
        else:
            continue
        df['participant_id'] = pid
        all_trials.append(df)

if all_trials:
    all_trials_df = pd.concat(all_trials, ignore_index=True)
    all_trials_df.to_csv(RESULTS_DIR / "all_trials.csv", index=False)
    print(f"‚úÖ all_trials.csv saved (Shape: {all_trials_df.shape})")
else:
    print("‚ö†Ô∏è No trials with target generated")

# Step 6: Save all_data.json
with open(RESULTS_DIR / "all_data.json", "w") as f:
    json.dump({str(k): {"target_col": v["target_col"]} for k, v in all_data.items()}, f)
print(f"‚úÖ all_data.json saved")

print("‚úÖ Step 1 Complete - Ready for Step 2")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Dataset root set to /content/drive/MyDrive/STData/STData
‚úÖ P1: Target column from TIVA ‚Üí Engagement
P1 Shapes ‚Üí EEG:(203240, 41), GSR:(95263, 14), EYE:(89327, 19), IVT:(89327, 29), TIVA:(7538, 44), PSY:(40, 11)
P1: Target = Engagement
------------------------------------------------------------
‚úÖ P2: Target column from TIVA ‚Üí Engagement
P2 Shapes ‚Üí EEG:(294961, 41), GSR:(113969, 14), EYE:(106869, 19), IVT:(106869, 29), TIVA:(9018, 44), PSY:(33, 11)
P2: Target = Engagement
------------------------------------------------------------
‚úÖ P3: Target column from TIVA ‚Üí Engagement
P3 Shapes ‚Üí EEG:(369917, 41), GSR:(175344, 14), EYE:(164413, 19), IVT:(164413, 29), TIVA:(13872, 44), PSY:(42, 11)
P3: Target = Engagement
------------------------------------------------------------
‚úÖ P4: Target column from TIVA ‚Üí Engagement
P4 Shapes ‚Üí EEG:(28

In [None]:
# ============================================
# STEP 2: Preprocessing Pipeline
# Problem ID-15: Cross-Modal Knowledge Transfer
# ============================================

import pandas as pd
import numpy as np
import json
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder

# -------------------------------
# Load Data from Step 1
# -------------------------------
RESULTS_DIR = Path("/content/results")
print(f"üîç Starting Step 2 at {pd.Timestamp.now().strftime('%H:%M:%S IST on %Y-%m-%d')}")

try:
    print(f"üîç Checking {RESULTS_DIR / 'all_trials.csv'}: {Path(RESULTS_DIR / 'all_trials.csv').exists()}")
    all_trials_df = pd.read_csv(RESULTS_DIR / "all_trials.csv")
    print(f"‚úÖ Loaded all_trials.csv (Shape: {all_trials_df.shape}, Columns: {all_trials_df.columns.tolist()})")
except Exception as e:
    print(f"‚ùå Error loading all_trials.csv: {str(e)}")
    raise

try:
    print(f"üîç Checking {RESULTS_DIR / 'all_data.json'}: {Path(RESULTS_DIR / 'all_data.json').exists()}")
    with open(RESULTS_DIR / "all_data.json", "r") as f:
        all_data = json.load(f)
    print(f"‚úÖ Loaded all_data.json (Keys: {list(all_data.keys())})")
except Exception as e:
    print(f"‚ùå Error loading all_data.json: {str(e)}")
    raise

# -------------------------------
# 2.1 Feature Extraction
# -------------------------------
def extract_features(pid, mod, data_df):
    if data_df is None or data_df.empty:
        print(f"‚ö†Ô∏è P{pid}: No data for {mod}")
        return {}

    print(f"‚ö†Ô∏è P{pid}: Available columns for {mod}: {data_df.columns.tolist()}")

    if mod == 'EEG':
        band_cols = [col for col in data_df.columns if any(band in col for band in ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'])]
        if not band_cols:
            print(f"‚ö†Ô∏è P{pid}: No frequency band columns found in EEG")
            return {}
        mean_var = data_df[band_cols].agg(['mean', 'var']).unstack().to_dict()
        return {f"{mod}_{k}": v for k, v in mean_var.items()}

    elif mod == 'EYE':
        pupil_cols = [col for col in data_df.columns if 'Pupil' in col]
        if not pupil_cols:
            print(f"‚ö†Ô∏è P{pid}: No Pupil columns found in EYE")
            return {}
        return {'EYE_mean_pupil': data_df[pupil_cols].mean().mean()}

    elif mod == 'IVT':
        stats = {}
        if 'Fixation Duration' in data_df.columns:
            stats['IVT_avg_fixation_duration'] = data_df['Fixation Duration'].mean()
        if 'Saccade Amplitude' in data_df.columns:
            stats['IVT_avg_saccade_amplitude'] = data_df['Saccade Amplitude'].mean()
        if not stats:
            print(f"‚ö†Ô∏è P{pid}: No expected columns found in IVT")
        return stats

    elif mod == 'GSR':
        stats = {}
        if 'GSR Conductance CAL' in data_df.columns and 'Timestamp' in data_df.columns:
            conductance = pd.to_numeric(data_df['GSR Conductance CAL'], errors='coerce')
            timestamp = pd.to_numeric(data_df['Timestamp'], errors='coerce')
            stats['GSR_mean_conductance'] = conductance.mean()
            conductance_diff = conductance.diff()
            timestamp_diff = timestamp.diff()
            valid_mask = (conductance_diff.notna() & timestamp_diff.notna() & (timestamp_diff != 0))
            if valid_mask.any():
                stats['GSR_slope'] = (conductance_diff[valid_mask] / timestamp_diff[valid_mask]).mean()
            else:
                stats['GSR_slope'] = np.nan
            stats['GSR_recovery'] = conductance.pct_change().mean()
        if not stats:
            print(f"‚ö†Ô∏è P{pid}: No GSR Conductance or Timestamp found")
        return stats

    elif mod == 'TIVA':
        au_cols = [col for col in data_df.columns if 'AU' in col or col in ['Anger', 'Joy', 'Sadness', 'Surprise']]
        if not au_cols:
            print(f"‚ö†Ô∏è P{pid}: No AU or emotion columns found in TIVA")
            return {}
        return {'TIVA_avg_AU': data_df[au_cols].mean().mean()}

    return {}

# Extract features for all modalities per participant
feature_list = []
for pid in all_trials_df['participant_id'].unique():
    pid_features = {'participant_id': pid}
    for mod in ['EEG', 'GSR', 'EYE', 'IVT', 'TIVA']:
        file_path = Path(f"/content/drive/MyDrive/STData/{pid}/{pid}_{mod}.csv")
        if file_path.exists():
            data_df = pd.read_csv(file_path, low_memory=False)
            print(f"‚ö†Ô∏è P{pid}: {mod} columns: {data_df.columns.tolist()}")
            pid_features.update(extract_features(pid, mod, data_df))
        else:
            print(f"‚ö†Ô∏è P{pid}: No data for {mod}")
    feature_list.append(pid_features)

feature_df = pd.DataFrame(feature_list)
print(f"Columns in feature_df: {feature_df.columns.tolist()}")
print(f"‚úÖ Extracted features (Shape: {feature_df.shape})")

# -------------------------------
# 2.2 Feature Alignment
# -------------------------------
numerical_cols = feature_df.columns.drop('participant_id')
if not numerical_cols.empty:
    scaler = StandardScaler()
    feature_df[numerical_cols] = scaler.fit_transform(feature_df[numerical_cols])

    pca = PCA(n_components=10)
    pca_features = pca.fit_transform(feature_df[numerical_cols])
    pca_df = pd.DataFrame(pca_features, columns=[f'PCA_{i}' for i in range(1, 11)])
    pca_df['participant_id'] = feature_df['participant_id']
    print(f"‚úÖ Aligned features with PCA (Shape: {pca_df.shape})")
else:
    print("‚ö†Ô∏è No numerical columns available for normalization and PCA. Check feature extraction.")
    pca_df = feature_df.copy()

# -------------------------------
# 2.3 Label Encoding
# -------------------------------
processed_df = pd.merge(pca_df, all_trials_df, on='participant_id', how='inner')

le = LabelEncoder()
processed_df['Target_encoded'] = le.fit_transform(processed_df['Target'])
print(f"‚úÖ Encoded Target (Unique classes: {processed_df['Target_encoded'].nunique()})")

# Save processed data
processed_df.to_csv(RESULTS_DIR / "preprocessed_trials.csv", index=False)
print(f"‚úÖ Preprocessed data saved to {RESULTS_DIR / 'preprocessed_trials.csv'}")

print("‚úÖ Step 2 Complete - Data is ready for modeling (Step 3)")

üîç Starting Step 2 at 09:28:51 IST on 2025-09-27
üîç Checking /content/results/all_trials.csv: True
‚úÖ Loaded all_trials.csv (Shape: (425096, 2), Columns: ['participant_id', 'Target'])
üîç Checking /content/results/all_data.json: True
‚úÖ Loaded all_data.json (Keys: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38'])
‚ö†Ô∏è P1: No data for EEG
‚ö†Ô∏è P1: No data for GSR
‚ö†Ô∏è P1: No data for EYE
‚ö†Ô∏è P1: No data for IVT
‚ö†Ô∏è P1: No data for TIVA
‚ö†Ô∏è P2: No data for EEG
‚ö†Ô∏è P2: No data for GSR
‚ö†Ô∏è P2: No data for EYE
‚ö†Ô∏è P2: No data for IVT
‚ö†Ô∏è P2: No data for TIVA
‚ö†Ô∏è P3: No data for EEG
‚ö†Ô∏è P3: No data for GSR
‚ö†Ô∏è P3: No data for EYE
‚ö†Ô∏è P3: No data for IVT
‚ö†Ô∏è P3: No data for TIVA
‚ö†Ô∏è P4: No data for EEG
‚ö†Ô∏è P4: No data for GSR
‚ö†Ô∏è P4: No data for EYE
‚ö†Ô∏è P4: No data

In [None]:
from pathlib import Path
pid = 1
for mod in ['EEG', 'GSR', 'EYE', 'IVT', 'TIVA']:
    file_path = Path(f"/content/drive/MyDrive/STData/STData/{pid}/{pid}_{mod}.csv")
    print(f"Checking {file_path}: {file_path.exists()}")

Checking /content/drive/MyDrive/STData/STData/1/1_EEG.csv: True
Checking /content/drive/MyDrive/STData/STData/1/1_GSR.csv: True
Checking /content/drive/MyDrive/STData/STData/1/1_EYE.csv: True
Checking /content/drive/MyDrive/STData/STData/1/1_IVT.csv: True
Checking /content/drive/MyDrive/STData/STData/1/1_TIVA.csv: True


In [None]:
print(f"Numerical columns for PCA: {numerical_cols.tolist()}")

Numerical columns for PCA: []


In [None]:
pid = 1
file_path = Path(f"/content/drive/MyDrive/STData/STData/{pid}/{pid}_EEG.csv")
if file_path.exists():
    df = pd.read_csv(file_path)
    print(f"P{pid} EEG sample:\n{df.head()}")

P1 EEG sample:
                 TimeStamp      UnixTime QuestionKey  Delta_TP9  Delta_AF7  \
0  2023-03-28 17:03:16.773  1.680003e+09         NaN   0.558597  -0.100425   
1  2023-03-28 17:03:16.776  1.680003e+09         NaN   0.558597  -0.100425   
2  2023-03-28 17:03:16.778  1.680003e+09         NaN   0.558597  -0.100425   
3  2023-03-28 17:03:16.798  1.680003e+09         NaN   0.558597  -0.100425   
4  2023-03-28 17:03:16.801  1.680003e+09         NaN   0.558597  -0.100425   

   Delta_AF8  Delta_TP10  Theta_TP9  Theta_AF7  Theta_AF8  ...    Gyro_X  \
0   0.475775     1.85526   0.509486  -0.135816  -0.222424  ... -0.710297   
1   0.475775     1.85526   0.509486  -0.135816  -0.222424  ... -0.710297   
2   0.475775     1.85526   0.509486  -0.135816  -0.222424  ... -0.710297   
3   0.475775     1.85526   0.509486  -0.135816  -0.222424  ... -0.710297   
4   0.475775     1.85526   0.509486  -0.135816  -0.222424  ... -0.710297   

     Gyro_Y   Gyro_Z  HeadBandOn  HSI_TP9  HSI_AF7  HSI_AF8

  df = pd.read_csv(file_path)


In [None]:
# ============================================
# STEP 2: Preprocessing Pipeline
# Problem ID-15: Cross-Modal Knowledge Transfer
# ============================================

import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# -------------------------------
# Load and Prepare Data
# -------------------------------
RESULTS_DIR = Path("/content/results")
print(f"üîç Starting Step 2 at {pd.Timestamp.now().strftime('%H:%M:%S IST on %Y-%m-%d')}")

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load trial data
all_trials_df = pd.read_csv("/content/results/all_trials.csv")
print(f"‚úÖ Loaded all_trials.csv (Shape: {all_trials_df.shape}, Columns: {all_trials_df.columns.tolist()})")

# Binarize Target
if 'Engagement' in all_trials_df.columns:
    all_trials_df['Target_encoded'] = (all_trials_df['Engagement'] > 0.5).astype(int)
else:
    print("‚ö†Ô∏è 'Engagement' column not found. Using default binarization on Target.")
    all_trials_df['Target_encoded'] = LabelEncoder().fit_transform(all_trials_df['Target']) % 2
print(f"‚úÖ Encoded Target (Unique classes: {all_trials_df['Target_encoded'].nunique()})")

# Explore directory contents
base_dir = Path("/content/drive/MyDrive/STData/STData/")  # Corrected base directory
print(f"üìÇ Contents of {base_dir}: {[f.name for f in base_dir.glob('*') if f.is_file()]}")
print(f"üìÇ Subdirectories of {base_dir}: {[d.name for d in base_dir.glob('*') if d.is_dir()]}")

# Extract features from modality files
feature_df = pd.DataFrame(columns=['participant_id'])
for pid in range(1, 39):  # 38 participants
    participant_features = {}
    # Try different possible file patterns
    for mod in ['EEG', 'GSR', 'EYE', 'IVT', 'TIVA']:
        subdir_path = base_dir / str(pid)
        if subdir_path.exists() and subdir_path.is_dir():
            for file in subdir_path.glob(f'*{pid}*{mod}*.csv'):
                if file.exists():
                    data_df = pd.read_csv(file, low_memory=False)  # Avoid DtypeWarning
                    print(f"‚úÖ P{pid}: Loaded {mod} from {file} (Shape: {data_df.shape}, Columns: {data_df.columns.tolist()})")
                    numeric_cols = data_df.select_dtypes(include=[np.number]).columns
                    if not numeric_cols.empty:
                        if mod == 'EEG':
                            freq_cols = [col for col in numeric_cols if any(band.lower() in col.lower() for band in ['delta', 'theta', 'alpha', 'beta', 'gamma'])]
                            if freq_cols:
                                mean_var = data_df[freq_cols].agg(['mean', 'var']).unstack()
                                participant_features.update({f"{mod}_{col}_{stat}": val for (col, stat), val in mean_var.items()})
                            else:
                                print(f"‚ö†Ô∏è P{pid}: No frequency band columns found in EEG")
                        elif mod == 'GSR':
                            if any('conductance' in col.lower() for col in numeric_cols):
                                conductance_col = next(col for col in numeric_cols if 'conductance' in col.lower())
                                slope = np.polyfit(data_df.index, data_df[conductance_col], 1)[0]
                                participant_features[f"{mod}_slope"] = slope
                            else:
                                print(f"‚ö†Ô∏è P{pid}: No conductance column found in GSR")
                        elif mod in ['EYE', 'IVT', 'TIVA']:
                            mean_vals = data_df[numeric_cols].mean()
                            participant_features.update({f"{mod}_{col}": val for col, val in mean_vals.items()})
                    else:
                        print(f"‚ö†Ô∏è P{pid}: No numerical columns in {mod}")
                else:
                    print(f"‚ùå P{pid}: No {mod} file matching pattern in {subdir_path}")
        else:
            print(f"‚ùå P{pid}: No subdirectory found at {subdir_path}")

    if participant_features:
        feature_df = pd.concat([feature_df, pd.DataFrame([participant_features | {'participant_id': pid}])], ignore_index=True)
    else:
        feature_df = pd.concat([feature_df, pd.DataFrame({'participant_id': [pid]})], ignore_index=True)
print(f"‚úÖ Extracted features (Shape: {feature_df.shape}, Columns: {feature_df.columns.tolist()})")

# Align features with trials
merged_df = pd.merge(feature_df, all_trials_df[['participant_id', 'Target_encoded']], on='participant_id', how='left')
if merged_df['Target_encoded'].isnull().all():
    print("‚ùå No Target_encoded aligned. Check participant_id matching.")
    raise ValueError("No target alignment")
# Normalize and apply PCA
X = merged_df.drop(columns=['participant_id', 'Target_encoded']).select_dtypes(include=[np.number])
if X.empty:
    print("‚ùå No numerical columns for PCA. Check feature extraction.")
    raise ValueError("No numerical features")

# Check and clean X for inf/NaN/large values
print(f"üîç Checking X for inf/NaN: {np.any(np.isnan(X)) or np.any(np.isinf(X))}")
X = np.nan_to_num(X, posinf=0, neginf=0)  # Replace inf with 0
X = np.clip(X, -1e10, 1e10)  # Clip extreme values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X_scaled)
print(f"‚úÖ Aligned features with PCA (Shape: {X_pca.shape})")
# Combine results
pca_df = pd.DataFrame(X_pca, columns=[f'PCA_{i+1}' for i in range(10)])
final_df = pd.concat([merged_df[['participant_id', 'Target_encoded']].reset_index(drop=True), pca_df], axis=1)
final_df.to_csv(RESULTS_DIR / "preprocessed_trials.csv", index=False)
print(f"‚úÖ Saved preprocessed_trials.csv (Shape: {final_df.shape})")

print("‚úÖ Step 2 Complete - Preprocessing done")

üîç Starting Step 2 at 09:29:39 IST on 2025-09-27
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Loaded all_trials.csv (Shape: (425096, 2), Columns: ['participant_id', 'Target'])
‚ö†Ô∏è 'Engagement' column not found. Using default binarization on Target.
‚úÖ Encoded Target (Unique classes: 2)
üìÇ Contents of /content/drive/MyDrive/STData/STData: ['exposure_summary.txt', 'Pre-survery for MMA Research.csv', '.DS_Store']
üìÇ Subdirectories of /content/drive/MyDrive/STData/STData: ['2', '18', '26', '23', '6', '5', '12', '33', '19', '24', '22', '17', '7', '28', '32', '8', '34', '15', '29', '10', '13', '11', '36', '35', '16', '3', '20', '30', '14', '37', '25', '4', '31', '38', '21', '9', '1', '27']
‚úÖ P1: Loaded EEG from /content/drive/MyDrive/STData/STData/1/1_EEG.csv (Shape: (203240, 41), Columns: ['TimeStamp', 'UnixTime', 'QuestionKey', 'Delta_TP9', 'Delta_AF7', 'Delta_AF8', 'Delta_TP10', 'Theta_TP9'

In [None]:
print(f"üìÇ Contents of {base_dir}: {[f.name for f in base_dir.glob('*') if f.is_file()]}")
print(f"üìÇ Subdirectories of {base_dir}: {[d.name for d in base_dir.glob('*') if d.is_dir()]}")

üìÇ Contents of /content/drive/MyDrive/STData/STData: ['exposure_summary.txt', 'Pre-survery for MMA Research.csv', '.DS_Store']
üìÇ Subdirectories of /content/drive/MyDrive/STData/STData: ['2', '18', '26', '23', '6', '5', '12', '33', '19', '24', '22', '17', '7', '28', '32', '8', '34', '15', '29', '10', '13', '11', '36', '35', '16', '3', '20', '30', '14', '37', '25', '4', '31', '38', '21', '9', '1', '27']


In [None]:
print(f"üîç P{pid}: Checking {subdir_path}: {[f.name for f in subdir_path.glob('*')]}")

üîç P38: Checking /content/drive/MyDrive/STData/STData/38: ['38_GSR.csv', '38_externalEvents.csv', '38_IVT.csv', '.DS_Store', '38_NSTLX.csv', '38_EEG.csv', '38_DLOT.xlsx', '38_BlankScreenData.csv', '38_EYE.csv', '38_TIVA.csv', '38_PSY.csv']


In [None]:
for file in subdir_path.glob(f'*{pid}*{mod}*.csv'):
    if not file.exists():
        print(f"‚ùå P{pid}: File {file} does not exist, skipping")
        continue
    # Proceed with file processing (e.g., data_df = pd.read_csv(file))