In [2]:
# First, let's see what's actually in the .mat file
mat_path = r"D:\impress_project\eeg_signals\data\LRMI-21679035\organized_data_v2\raw_data\sub-01_task-motor-imagery_eeg.mat"
data = loadmat(mat_path)

print("Keys in the .mat file:")
for key in data.keys():
    if not key.startswith('__'):
        print(f"  '{key}': shape = {data[key].shape}, dtype = {data[key].dtype}")

# Let me check the structure of the main data
print("\nAvailable keys (excluding __ headers):")
available_keys = [k for k in data.keys() if not k.startswith('__')]
print(available_keys)

# Try different possible key names
for key in available_keys:
    arr = data[key]
    print(f"\n{key}: shape={arr.shape}, dtype={arr.dtype}")
    if len(arr.shape) == 3:
        print(f"  This looks like trial data! (trials, channels, samples)")
        print(f"  Trials: {arr.shape[0]}, Channels: {arr.shape[1]}, Samples: {arr.shape[2]}")
        
# Let me also check if it's stored differently
if len(available_keys) == 1:
    main_key = available_keys[0]
    print(f"\nUsing '{main_key}' as the main data")
    rawdata = data[main_key]
    
    # Check if labels are in a separate array or part of the data
    if rawdata.shape[1] == 33:  # 30 EEG + 2 EOG + 1 marker (from paper)
        print(f"Data shape matches expected: {rawdata.shape}")
        print(f"This should be: (40 trials, 33 channels, 4000 samples)")

Keys in the .mat file:
  'eeg': shape = (1, 1), dtype = [('rawdata', 'O'), ('label', 'O')]

Available keys (excluding __ headers):
['eeg']

eeg: shape=(1, 1), dtype=[('rawdata', 'O'), ('label', 'O')]

Using 'eeg' as the main data


In [3]:
# The data is in a structured array
eeg_struct = data['eeg'][0, 0]  # Get the structure
print(f"Field names in eeg struct: {eeg_struct.dtype.names}")

# Access the rawdata and labels
rawdata = eeg_struct['rawdata'][0]  # Get the actual array
labels = eeg_struct['label'][0].flatten()  # Get labels

print(f"Raw data shape: {rawdata.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Labels: {labels}")
print(f"Unique labels: {np.unique(labels)}")
print(f"Left hand (1): {np.sum(labels == 1)}, Right hand (2): {np.sum(labels == 2)}")

Field names in eeg struct: ('rawdata', 'label')
Raw data shape: (33, 4000)
Labels shape: (1,)
Labels: [1]
Unique labels: [1]
Left hand (1): 1, Right hand (2): 0


In [4]:
# Check the total samples
total_samples = rawdata.shape[1]
print(f"Total samples: {total_samples}")
print(f"Expected for 40 trials: 40 * 4000 = 160000 samples")

# Check if it's concatenated trials
if total_samples == 160000:
    print("âœ“ Likely 40 trials concatenated (160000 samples)")
    # Reshape to (40 trials, 33 channels, 4000 samples)
    rawdata_reshaped = rawdata.reshape(33, 40, 4000).transpose(1, 0, 2)
    print(f"Reshaped data: {rawdata_reshaped.shape}")
    
    # Check labels - might be 40 labels
    print(f"\nCurrent labels: {labels}")
    print(f"Labels shape: {labels.shape}")
    
    # Try to get all labels
    all_labels = eeg_struct['label'][0]
    print(f"All labels shape: {all_labels.shape}")
    print(f"All labels: {all_labels}")
    
elif total_samples == 4000:
    print("Only 1 trial? Something might be wrong")
    print("Let me check if there are multiple .mat files")
else:
    print(f"Unexpected sample count: {total_samples}")

Total samples: 4000
Expected for 40 trials: 40 * 4000 = 160000 samples
Only 1 trial? Something might be wrong
Let me check if there are multiple .mat files


In [5]:
# Check what's in channel 33 (the marker channel)
if rawdata.shape[0] == 33:
    print(f"\nChecking channel 33 (event markers):")
    marker_channel = rawdata[32, :]  # Last channel
    print(f"Marker channel shape: {marker_channel.shape}")
    print(f"Unique values in marker channel: {np.unique(marker_channel[:1000])}")
    
    # Look for event markers (non-zero values)
    marker_indices = np.where(marker_channel != 0)[0]
    print(f"Number of markers found: {len(marker_indices)}")
    if len(marker_indices) > 0:
        print(f"First few marker indices: {marker_indices[:10]}")
        print(f"Marker values: {marker_channel[marker_indices[:10]]}")


Checking channel 33 (event markers):
Marker channel shape: (4000,)
Unique values in marker channel: [0. 1.]
Number of markers found: 3
First few marker indices: [   0 1003 3005]
Marker values: [1. 2. 3.]


In [7]:
import pandas as pd

# First, let's look at the event markers file
events_path = r"D:\impress_project\eeg_signals\data\LRMI-21679035\organized_data_v2\events\task-motor-imagery_events.tsv"

try:
    events_df = pd.read_csv(events_path, sep='\t')
    print("Event markers file loaded:")
    print(events_df.head(20))
    print(f"\nTotal events: {len(events_df)}")
    print(f"\nColumns: {events_df.columns.tolist()}")
    
    # Check for subject 01 specifically
    if 'subject' in events_df.columns:
        sub01_events = events_df[events_df['subject'] == '01']
        print(f"\nEvents for subject 01: {len(sub01_events)}")
        print(sub01_events.head())
        
except Exception as e:
    print(f"Error loading events file: {e}")
    print("\nTrying to find the file...")
    import os
    events_dir = os.path.join(base_path, "events")
    print(f"Files in events directory: {os.listdir(events_dir)}")

Event markers file loaded:
    onset  duration  trial_type  response_time  value  stim_file
0       1      2000           1            NaN      1  stim1.png
1    2001      4000           1            NaN      2  stim2.mp4
2    6001      2000           1            NaN      3  stim3.png
3    8001      2000           2            NaN      1  stim1.png
4   10001      4000           2            NaN      2  stim2.mp4
5   14001      2000           2            NaN      3  stim3.png
6   16001      2000           1            NaN      1  stim1.png
7   18001      4000           1            NaN      2  stim2.mp4
8   22001      2000           1            NaN      3  stim3.png
9   24001      2000           2            NaN      1  stim1.png
10  26001      4000           2            NaN      2  stim2.mp4
11  30001      2000           2            NaN      3  stim3.png
12  32001      2000           1            NaN      1  stim1.png
13  34001      4000           1            NaN      2  stim2.mp

In [9]:
# Check participants.tsv
base_path = r"D:\impress_project\eeg_signals\data\LRMI-21679035\organized_data_v2"
participants_path = os.path.join(base_path, "patient_info", "participants.tsv")
try:
    participants_df = pd.read_csv(participants_path, sep='\t')
    print("\nParticipants file:")
    print(participants_df.head())
    print(f"\nTotal participants: {len(participants_df)}")
except Exception as e:
    print(f"Error loading participants file: {e}")


Participants file:
  Participant_ID  Gender  Age  Duration ParalysisSide Handedness IsFirstTime  \
0         sub-01    male   45         1         right      right         yes   
1         sub-02    male   60         2          left      right         yes   
2         sub-03    male   60         2          left      right          no   
3         sub-04    male   56        14         right      right         yes   
4         sub-05  female   44         4          left      right         yes   

                                      StrokeLocation  NIHSS  MBI  mRS  
0                                          Left pons     11   50    4  
1                                         Right pons      3   55    4  
2  Left cerebellum, bilateral paraventricular, Ri...      3   90    1  
3  Left frontal parietal cortex, Left centrum sem...      6   90    3  
4                                          Left pons      4   60    4  

Total participants: 50
