# Import Libraries

In [1]:
import pandas as pd
import glob

# Load EEG Files for Subject 4

In [2]:
column_names = [
    "Sample Index", "EXG Channel 0", "EXG Channel 1", "EXG Channel 2", "EXG Channel 3",
    "EXG Channel 4", "EXG Channel 5", "EXG Channel 6", "EXG Channel 7", "EXG Channel 8",
    "EXG Channel 9", "EXG Channel 10", "EXG Channel 11", "EXG Channel 12", "EXG Channel 13",
    "EXG Channel 14", "EXG Channel 15", "Accel Channel 0", "Accel Channel 1", "Accel Channel 2",
    "Not Used 1", "Digital Channel 0 (D11)", "Digital Channel 1 (D12)", "Digital Channel 2 (D13)",
    "Digital Channel 3 (D17)", "Not Used 2", "Digital Channel 4 (D18)", "Analog Channel 0",
    "Analog Channel 1", "Analog Channel 2", "Timestamp", "Marker Channel", "Timestamp (Formatted)"
]

eeg_files = sorted(glob.glob("/data0/HAR-datasets/PLHI-HAR_EEG-2025/OpenBCISession_s4-*/BrainFlow-RAW_s4-*.csv"))

def load_eeg(file):
    df = pd.read_csv(file, sep="\t", skiprows=3, names=column_names, engine="python")
    df["Activity"] = file.split("/")[-2]
    return df

eeg_df_list = [load_eeg(file) for file in eeg_files]
eeg_df = pd.concat(eeg_df_list, ignore_index=True)

print(f"Successfully loaded {len(eeg_files)} EEG files for Subject 4 across all activities.")
print("Unique activities in EEG data:", eeg_df["Activity"].unique())
display(eeg_df.head())

Successfully loaded 50 EEG files for Subject 4 across all activities.
Unique activities in EEG data: ['OpenBCISession_s4-chair squats'
 'OpenBCISession_s4-light stationary cycling'
 'OpenBCISession_s4-marching in place'
 'OpenBCISession_s4-seated boxing hooks'
 'OpenBCISession_s4-seated leg extensions'
 'OpenBCISession_s4-seated medicine ball twists'
 'OpenBCISession_s4-seated side bends' 'OpenBCISession_s4-side-stepping'
 'OpenBCISession_s4-standing heel to toe walk'
 'OpenBCISession_s4-wall push-ups']


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
0,6.0,-187500.022352,-187500.022352,-55956.787581,-187500.022352,-187500.022352,-51450.43003,-69029.764119,-187500.022352,-81732.975749,...,0.0,0.0,0.0,0.0,0.0,0.0,1739048000.0,0.0,,OpenBCISession_s4-chair squats
1,8.0,-187500.022352,-187500.022352,-55974.3337,-187500.022352,-187500.022352,-51236.926167,-69619.93958,-187500.022352,-78892.024325,...,104.0,3.0,80.0,0.0,0.0,0.0,1739048000.0,0.0,,OpenBCISession_s4-chair squats
2,10.0,-187500.022352,-187500.022352,-57272.970053,-187500.022352,-187500.022352,-52769.205304,-70386.850284,-187500.022352,-82665.7811,...,0.0,0.0,0.0,0.0,0.0,0.0,1739048000.0,0.0,,OpenBCISession_s4-chair squats
3,12.0,-187500.022352,-187500.022352,-56677.966616,-187500.022352,-187500.022352,-52102.050436,-70274.666879,-187500.022352,-79903.329003,...,0.0,0.0,0.0,0.0,0.0,0.0,1739048000.0,0.0,,OpenBCISession_s4-chair squats
4,14.0,-187500.022352,-187500.022352,-59069.402107,-187500.022352,-187500.022352,-54448.603624,-72328.278104,-187500.022352,-83850.423557,...,0.0,0.0,0.0,0.0,0.0,0.0,1739048000.0,0.0,,OpenBCISession_s4-chair squats


# Display Last Rows of EEG Data

In [3]:
display(eeg_df.tail()) 

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
543664,224.0,-187500.022352,-50903.549898,-964.790698,-187500.022352,-171810.058869,4033.238176,-11341.006916,-187500.022352,-19596.48962,...,0.0,0.0,0.0,0.0,0.0,0.0,1739046000.0,0.0,,OpenBCISession_s4-wall push-ups
543665,226.0,-187500.022352,-51397.612858,-801.824129,-187500.022352,-172259.597154,4038.781409,-11127.279535,-187500.022352,-19734.645752,...,0.0,0.0,0.0,0.0,0.0,0.0,1739046000.0,0.0,,OpenBCISession_s4-wall push-ups
543666,228.0,-187500.022352,-52053.122467,-1036.83037,-187500.022352,-172611.14539,3892.623352,-11407.682169,-187500.022352,-19669.624528,...,8.0,5.0,16.0,0.0,0.0,0.0,1739046000.0,0.0,,OpenBCISession_s4-wall push-ups
543667,230.0,-187500.022352,-52375.993416,-875.987217,-187500.022352,-172779.655192,3977.470574,-11213.624324,-187500.022352,-19845.95744,...,0.0,0.0,0.0,0.0,0.0,0.0,1739046000.0,0.0,,OpenBCISession_s4-wall push-ups
543668,232.0,-187500.022352,-52798.441386,-1042.507713,-187500.022352,-172956.993932,3886.32016,-11440.673344,-187500.022352,-19687.841199,...,0.0,0.0,0.0,0.0,0.0,0.0,1739046000.0,0.0,,OpenBCISession_s4-wall push-ups


# Extract EEG Start and End Timestamps

In [4]:
eeg_timestamps = eeg_df[['Timestamp', 'Marker Channel']]
start_time = eeg_timestamps['Timestamp'].min()
end_time = eeg_timestamps['Timestamp'].max()
print(f" Extracted EEG activity start and end timestamps:")
print(f" Start Timestamp: {start_time}")
print(f" End Timestamp: {end_time}")

 Extracted EEG activity start and end timestamps:
 Start Timestamp: 1739044975.962612
 End Timestamp: 1739048526.121979


# Load and Filter HAR Data for Subject 4

In [5]:
har_file_path = "/data0/HAR-datasets/PLHI-HAR_EEG-2025/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv"
har_df = pd.read_csv(har_file_path)

har_df['Timestamp_Accel'] = pd.to_numeric(har_df['Timestamp_Accel'], errors='coerce')
har_df['Timestamp_Gyro'] = pd.to_numeric(har_df['Timestamp_Gyro'], errors='coerce')

if har_df['Timestamp_Accel'].max() > 1e12:  
    har_df['Timestamp_Accel'] = har_df['Timestamp_Accel'] / 1000
    har_df['Timestamp_Gyro'] = har_df['Timestamp_Gyro'] / 1000

print("Before filtering:", har_df.shape)
har_df = har_df[har_df['Subject_ID_x'] == "Subject 4"]
print("After filtering:", har_df.shape)

valid_har = har_df[
    (har_df['Timestamp_Accel'].between(start_time, end_time)) |
    (har_df['Timestamp_Gyro'].between(start_time, end_time))
]

filtered_har = valid_har

print("HAR data is clipped to the EEG activity time range. All activities for Subject 4 are included.")
display(filtered_har.head())

Before filtering: (944460, 12)
After filtering: (177520, 12)
HAR data is clipped to the EEG activity time range. All activities for Subject 4 are included.


  har_df = pd.read_csv(har_file_path)


Unnamed: 0,Timestamp_Gyro,Gyro X (°/s),Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
547985,1739045000.0,0.665843,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
547986,1739045000.0,0.665843,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
547987,1739045000.0,0.665843,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
547988,1739045000.0,0.665843,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
547989,1739045000.0,0.665843,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,6.904878,-6.236896,-1.319205,Seated Leg Extensions,Subject 4


# Display Shape and Info of Filtered HAR Data

In [6]:
print(filtered_har.shape) 
print(filtered_har.info())  

(159475, 12)
<class 'pandas.core.frame.DataFrame'>
Index: 159475 entries, 547985 to 724899
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Timestamp_Gyro    159475 non-null  float64
 1   Gyro X (°/s)      159475 non-null  float64
 2   Gyro Y (°/s)      159455 non-null  float64
 3   Gyro Z (°/s)      159447 non-null  float64
 4   Activity_Label_x  159475 non-null  object 
 5   Subject_ID_x      159475 non-null  object 
 6   Timestamp_Accel   159475 non-null  float64
 7   Accel X (g)       159475 non-null  float64
 8   Accel Y (g)       159471 non-null  object 
 9   Accel Z (g)       159467 non-null  float64
 10  Activity_Label_y  159475 non-null  object 
 11  Subject_ID_y      159475 non-null  object 
dtypes: float64(7), object(5)
memory usage: 15.8+ MB
None


# Merge EEG and HAR Data by Timestamp

In [7]:
eeg_df_sorted = eeg_df.sort_values("Timestamp")
har_df_sorted = filtered_har.sort_values("Timestamp_Accel")

merged_df = pd.merge_asof(
    eeg_df_sorted, 
    har_df_sorted, 
    left_on="Timestamp", 
    right_on="Timestamp_Accel", 
    direction="nearest"
)

print(" EEG & HAR merged.")
display(merged_df.head())

 EEG & HAR merged.


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-187500.022352,-61496.645093,-3612.600698,-187500.022352,-177651.039082,-6056.383974,-19431.489042,-187500.022352,-28378.378258,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
1,8.0,-187500.022352,-62571.205207,-3609.382046,-187500.022352,-177513.844074,-6162.979443,-19418.077996,-187500.022352,-28387.944804,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
2,10.0,-187500.022352,-63922.077587,-3597.177994,-187500.022352,-177373.676285,-6238.394229,-19403.839934,-187500.022352,-28359.200461,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
3,12.0,-187500.022352,-65122.388616,-3572.792241,-187500.022352,-177356.197221,-6116.800739,-19394.519257,-187500.022352,-28366.330667,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
4,14.0,-187500.022352,-66104.412509,-3607.951535,-187500.022352,-177420.726707,-5992.413281,-19427.264563,-187500.022352,-28374.354944,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1739045000.0,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4


# Check Timestamp Alignment

In [8]:
print("Timestamp alignment (EEG vs HAR):")
print(merged_df[['Timestamp', 'Timestamp_Accel']])

Timestamp alignment (EEG vs HAR):
           Timestamp  Timestamp_Accel
0       1.739045e+09     1.739045e+09
1       1.739045e+09     1.739045e+09
2       1.739045e+09     1.739045e+09
3       1.739045e+09     1.739045e+09
4       1.739045e+09     1.739045e+09
...              ...              ...
543664  1.739049e+09     1.739049e+09
543665  1.739049e+09     1.739049e+09
543666  1.739049e+09     1.739049e+09
543667  1.739049e+09     1.739049e+09
543668  1.739049e+09     1.739049e+09

[543669 rows x 2 columns]


# Show Merged Data Columns

In [9]:
print("Unique Columns in Merged Data:")
print(merged_df.columns)

Unique Columns in Merged Data:
Index(['Sample Index', 'EXG Channel 0', 'EXG Channel 1', 'EXG Channel 2',
       'EXG Channel 3', 'EXG Channel 4', 'EXG Channel 5', 'EXG Channel 6',
       'EXG Channel 7', 'EXG Channel 8', 'EXG Channel 9', 'EXG Channel 10',
       'EXG Channel 11', 'EXG Channel 12', 'EXG Channel 13', 'EXG Channel 14',
       'EXG Channel 15', 'Accel Channel 0', 'Accel Channel 1',
       'Accel Channel 2', 'Not Used 1', 'Digital Channel 0 (D11)',
       'Digital Channel 1 (D12)', 'Digital Channel 2 (D13)',
       'Digital Channel 3 (D17)', 'Not Used 2', 'Digital Channel 4 (D18)',
       'Analog Channel 0', 'Analog Channel 1', 'Analog Channel 2', 'Timestamp',
       'Marker Channel', 'Timestamp (Formatted)', 'Activity', 'Timestamp_Gyro',
       'Gyro X (°/s)', 'Gyro Y (°/s)', 'Gyro Z (°/s)', 'Activity_Label_x',
       'Subject_ID_x', 'Timestamp_Accel', 'Accel X (g)', 'Accel Y (g)',
       'Accel Z (g)', 'Activity_Label_y', 'Subject_ID_y'],
      dtype='object')


# Display Final Merged Data

In [10]:
display(merged_df)

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-187500.022352,-61496.645093,-3612.600698,-187500.022352,-177651.039082,-6056.383974,-19431.489042,-187500.022352,-28378.378258,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1.739045e+09,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
1,8.0,-187500.022352,-62571.205207,-3609.382046,-187500.022352,-177513.844074,-6162.979443,-19418.077996,-187500.022352,-28387.944804,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1.739045e+09,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
2,10.0,-187500.022352,-63922.077587,-3597.177994,-187500.022352,-177373.676285,-6238.394229,-19403.839934,-187500.022352,-28359.200461,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1.739045e+09,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
3,12.0,-187500.022352,-65122.388616,-3572.792241,-187500.022352,-177356.197221,-6116.800739,-19394.519257,-187500.022352,-28366.330667,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1.739045e+09,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
4,14.0,-187500.022352,-66104.412509,-3607.951535,-187500.022352,-177420.726707,-5992.413281,-19427.264563,-187500.022352,-28374.354944,...,-0.085521,0.167377,Seated Leg Extensions,Subject 4,1.739045e+09,7.230489,-6.969521,-0.833182,Seated Leg Extensions,Subject 4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543664,70.0,-187500.022352,-187500.022352,-63766.598852,-187500.022352,-187500.022352,-49210.628773,-74359.179957,-187500.022352,-87247.709900,...,0.041539,0.007330,Light Stationary Cycling,Subject 4,1.739049e+09,4.510676,-3.981557,8.068459,Light Stationary Cycling,Subject 4
543665,70.0,-187500.022352,-187500.022352,-63766.598852,-187500.022352,-187500.022352,-49210.628773,-74359.179957,-187500.022352,-87247.709900,...,0.041539,0.007330,Light Stationary Cycling,Subject 4,1.739049e+09,4.510676,-3.981557,8.068459,Light Stationary Cycling,Subject 4
543666,70.0,-187500.022352,-187500.022352,-63766.598852,-187500.022352,-187500.022352,-49210.628773,-74359.179957,-187500.022352,-87247.709900,...,0.041539,0.007330,Light Stationary Cycling,Subject 4,1.739049e+09,4.510676,-3.981557,8.068459,Light Stationary Cycling,Subject 4
543667,70.0,-187500.022352,-187500.022352,-63766.598852,-187500.022352,-187500.022352,-49210.628773,-74359.179957,-187500.022352,-87247.709900,...,0.041539,0.007330,Light Stationary Cycling,Subject 4,1.739049e+09,4.510676,-3.981557,8.068459,Light Stationary Cycling,Subject 4


# Save Merged Data to CSV

In [11]:
try:
    merged_df.to_csv("EEG-HAR_Subject_4_Merged.csv", index=False)
    print("File saved successfully as EEG-HAR_Subject_4_Merged.csv.")
except Exception as e:
    print(f"An error occurred while saving the file: {e}")

File saved successfully as EEG-HAR_Subject_4_Merged.csv.
