# Import Libraries

In [1]:
import pandas as pd
import glob

# Load EEG Files for Subject 1

In [2]:
column_names = [
    "Sample Index", "EXG Channel 0", "EXG Channel 1", "EXG Channel 2", "EXG Channel 3",
    "EXG Channel 4", "EXG Channel 5", "EXG Channel 6", "EXG Channel 7", "EXG Channel 8",
    "EXG Channel 9", "EXG Channel 10", "EXG Channel 11", "EXG Channel 12", "EXG Channel 13",
    "EXG Channel 14", "EXG Channel 15", "Accel Channel 0", "Accel Channel 1", "Accel Channel 2",
    "Not Used 1", "Digital Channel 0 (D11)", "Digital Channel 1 (D12)", "Digital Channel 2 (D13)",
    "Digital Channel 3 (D17)", "Not Used 2", "Digital Channel 4 (D18)", "Analog Channel 0",
    "Analog Channel 1", "Analog Channel 2", "Timestamp", "Marker Channel", "Timestamp (Formatted)"
]

eeg_files = sorted(glob.glob("/data0/HAR-datasets/PLHI-HAR_EEG-2025/OpenBCISession_s1-*/BrainFlow-RAW_s1-*.csv"))

def load_eeg(file):
    df = pd.read_csv(file, sep="\t", skiprows=3, names=column_names, engine="python")
    df["Activity"] = file.split("/")[-2]
    return df

eeg_df_list = [load_eeg(file) for file in eeg_files]
eeg_df = pd.concat(eeg_df_list, ignore_index=True)

print(f"Successfully loaded {len(eeg_files)} EEG files for Subject 1 across all activities.")
print("Unique activities in EEG data:", eeg_df["Activity"].unique())
display(eeg_df.head())

Successfully loaded 58 EEG files for Subject 1 across all activities.
Unique activities in EEG data: ['OpenBCISession_s1-chair squats'
 'OpenBCISession_s1-light stationary cycling'
 'OpenBCISession_s1-marching in place'
 'OpenBCISession_s1-seated boxing hooks'
 'OpenBCISession_s1-seated leg extensions'
 'OpenBCISession_s1-seated medicine ball twists'
 'OpenBCISession_s1-seated side bends' 'OpenBCISession_s1-side-stepping'
 'OpenBCISession_s1-standing heel to toe walk'
 'OpenBCISession_s1-wall push-ups']


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
0,6.0,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,...,0.0,0.0,0.0,0.0,0.0,0.0,1739028000.0,0.0,,OpenBCISession_s1-chair squats
1,8.0,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,...,16.0,0.0,104.0,0.0,0.0,0.0,1739028000.0,0.0,,OpenBCISession_s1-chair squats
2,10.0,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,...,0.0,0.0,0.0,0.0,0.0,0.0,1739028000.0,0.0,,OpenBCISession_s1-chair squats
3,12.0,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,...,0.0,0.0,0.0,0.0,0.0,0.0,1739028000.0,0.0,,OpenBCISession_s1-chair squats
4,14.0,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,-187500.022352,...,0.0,0.0,0.0,0.0,0.0,0.0,1739028000.0,0.0,,OpenBCISession_s1-chair squats


# Display Last Rows of EEG Data

In [3]:
display(eeg_df.tail()) 

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
737803,38.0,-181447.63934,3435.194902,-181431.88136,-172576.813111,-168771.33921,-181498.869538,-177937.76726,-181517.73441,-181360.89222,...,20.0,0.5,40.0,0.0,0.0,0.0,1739026000.0,0.0,,OpenBCISession_s1-wall push-ups
737804,38.0,-181421.934834,7684.99913,-181412.815322,-161921.445956,-165574.10098,-181476.830718,-181152.752775,-181498.936593,-181360.89222,...,10.0,0.25,20.0,0.0,0.0,0.0,1739026000.0,0.0,,OpenBCISession_s1-wall push-ups
737805,38.0,-181444.085413,4304.722465,-181427.411011,-170701.412344,-168189.411544,-181494.466245,-177425.241759,-181514.225187,-181360.89222,...,5.0,0.125,10.0,0.0,0.0,0.0,1739026000.0,0.0,,OpenBCISession_s1-wall push-ups
737806,38.0,-181431.054345,6555.833704,-181417.799761,-165540.998047,-166677.5619,-181483.424483,-178988.545118,-181502.781093,-181360.89222,...,2.5,0.0625,5.0,0.0,0.0,0.0,1739026000.0,0.0,,OpenBCISession_s1-wall push-ups
737807,52.0,-181439.235084,5683.266304,-181425.9805,-168450.144643,-167552.543289,-181492.119311,-179493.426322,-181510.715963,-174546.828812,...,0.0,0.0,0.0,0.0,0.0,0.0,1739026000.0,0.0,,OpenBCISession_s1-wall push-ups


# Extract EEG Start and End Timestamps

In [4]:
eeg_timestamps = eeg_df[['Timestamp', 'Marker Channel']]
start_time = eeg_timestamps['Timestamp'].min()
end_time = eeg_timestamps['Timestamp'].max()
print(f" Extracted EEG activity start and end timestamps:")
print(f" Start Timestamp: {start_time}")
print(f" End Timestamp: {end_time}")

 Extracted EEG activity start and end timestamps:
 Start Timestamp: 1739025350.139361
 End Timestamp: 1739046207.22878


# Load and Filter HAR Data for Subject 1

In [5]:
har_file_path = "/data0/HAR-datasets/PLHI-HAR_EEG-2025/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv"
har_df = pd.read_csv(har_file_path)

har_df['Timestamp_Accel'] = pd.to_numeric(har_df['Timestamp_Accel'], errors='coerce')
har_df['Timestamp_Gyro'] = pd.to_numeric(har_df['Timestamp_Gyro'], errors='coerce')

if har_df['Timestamp_Accel'].max() > 1e12:  
    har_df['Timestamp_Accel'] = har_df['Timestamp_Accel'] / 1000
    har_df['Timestamp_Gyro'] = har_df['Timestamp_Gyro'] / 1000

print("Before filtering:", har_df.shape)
har_df = har_df[har_df['Subject_ID_x'] == "Subject 1"]
print("After filtering:", har_df.shape)

valid_har = har_df[
    (har_df['Timestamp_Accel'].between(start_time, end_time)) |
    (har_df['Timestamp_Gyro'].between(start_time, end_time))
]

filtered_har = valid_har

print("HAR data is clipped to the EEG activity time range. All activities for Subject 1 are included.")
display(filtered_har.head())

Before filtering: (944460, 12)
After filtering: (187577, 12)
HAR data is clipped to the EEG activity time range. All activities for Subject 1 are included.


  har_df = pd.read_csv(har_file_path)


Unnamed: 0,Timestamp_Gyro,Gyro X (°/s),Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
10449,1739025000.0,0.053756,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
10450,1739025000.0,0.053756,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
10451,1739025000.0,0.053756,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
10452,1739025000.0,0.053756,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
10453,1739025000.0,0.053756,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1


# Display Shape and Info of Filtered HAR Data

In [6]:
print(filtered_har.shape) 
print(filtered_har.info())  

(123001, 12)
<class 'pandas.core.frame.DataFrame'>
Index: 123001 entries, 10449 to 187576
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Timestamp_Gyro    123001 non-null  float64
 1   Gyro X (°/s)      122998 non-null  float64
 2   Gyro Y (°/s)      122996 non-null  float64
 3   Gyro Z (°/s)      122996 non-null  float64
 4   Activity_Label_x  123001 non-null  object 
 5   Subject_ID_x      123001 non-null  object 
 6   Timestamp_Accel   123001 non-null  float64
 7   Accel X (g)       123001 non-null  float64
 8   Accel Y (g)       123001 non-null  object 
 9   Accel Z (g)       122996 non-null  float64
 10  Activity_Label_y  123001 non-null  object 
 11  Subject_ID_y      123001 non-null  object 
dtypes: float64(7), object(5)
memory usage: 12.2+ MB
None


# Merge EEG and HAR Data by Timestamp

In [7]:
eeg_df_sorted = eeg_df.sort_values("Timestamp")
har_df_sorted = filtered_har.sort_values("Timestamp_Accel")

merged_df = pd.merge_asof(
    eeg_df_sorted, 
    har_df_sorted, 
    left_on="Timestamp", 
    right_on="Timestamp_Accel", 
    direction="nearest"
)

print(" EEG & HAR merged.")
display(merged_df.head())

 EEG & HAR merged.


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-166914.311578,-72778.933976,-162089.933406,42949.145192,-149585.562895,-163687.613748,-164460.179443,-166998.465895,-147736.224262,...,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
1,8.0,-166882.616804,-72250.270516,-158688.869618,43413.860311,-149614.396645,-159750.622183,-165931.795947,-166969.542738,-165048.857337,...,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
2,10.0,-166881.119237,-73205.584074,-160614.897086,42795.812225,-149898.77789,-162515.778841,-164265.831025,-166965.2065,-148667.040308,...,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
3,12.0,-166840.126138,-73581.33925,-160860.855682,42779.60721,-150113.734616,-161619.473889,-166378.562615,-166928.840211,-164713.201191,...,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
4,14.0,-166840.863745,-73833.131651,-159276.966366,42689.417921,-150154.973585,-161393.006014,-164366.816207,-166925.152174,-149120.557203,...,-0.023213,-0.006109,Marching in Place,Subject 1,1739025000.0,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1


# Check Timestamp Alignment

In [8]:
print("Timestamp alignment (EEG vs HAR):")
print(merged_df[['Timestamp', 'Timestamp_Accel']])

Timestamp alignment (EEG vs HAR):
           Timestamp  Timestamp_Accel
0       1.739025e+09     1.739025e+09
1       1.739025e+09     1.739025e+09
2       1.739025e+09     1.739025e+09
3       1.739025e+09     1.739025e+09
4       1.739025e+09     1.739025e+09
...              ...              ...
737803  1.739046e+09     1.739029e+09
737804  1.739046e+09     1.739029e+09
737805  1.739046e+09     1.739029e+09
737806  1.739046e+09     1.739029e+09
737807  1.739046e+09     1.739029e+09

[737808 rows x 2 columns]


# Show Merged Data Columns

In [9]:
print("Unique Columns in Merged Data:")
print(merged_df.columns)

Unique Columns in Merged Data:
Index(['Sample Index', 'EXG Channel 0', 'EXG Channel 1', 'EXG Channel 2',
       'EXG Channel 3', 'EXG Channel 4', 'EXG Channel 5', 'EXG Channel 6',
       'EXG Channel 7', 'EXG Channel 8', 'EXG Channel 9', 'EXG Channel 10',
       'EXG Channel 11', 'EXG Channel 12', 'EXG Channel 13', 'EXG Channel 14',
       'EXG Channel 15', 'Accel Channel 0', 'Accel Channel 1',
       'Accel Channel 2', 'Not Used 1', 'Digital Channel 0 (D11)',
       'Digital Channel 1 (D12)', 'Digital Channel 2 (D13)',
       'Digital Channel 3 (D17)', 'Not Used 2', 'Digital Channel 4 (D18)',
       'Analog Channel 0', 'Analog Channel 1', 'Analog Channel 2', 'Timestamp',
       'Marker Channel', 'Timestamp (Formatted)', 'Activity', 'Timestamp_Gyro',
       'Gyro X (°/s)', 'Gyro Y (°/s)', 'Gyro Z (°/s)', 'Activity_Label_x',
       'Subject_ID_x', 'Timestamp_Accel', 'Accel X (g)', 'Accel Y (g)',
       'Accel Z (g)', 'Activity_Label_y', 'Subject_ID_y'],
      dtype='object')


# Display Final Merged Data

In [10]:
display(merged_df)

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-166914.311578,-72778.933976,-162089.933406,42949.145192,-149585.562895,-163687.613748,-164460.179443,-166998.465895,-147736.224262,...,-0.023213,-0.006109,Marching in Place,Subject 1,1.739025e+09,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
1,8.0,-166882.616804,-72250.270516,-158688.869618,43413.860311,-149614.396645,-159750.622183,-165931.795947,-166969.542738,-165048.857337,...,-0.023213,-0.006109,Marching in Place,Subject 1,1.739025e+09,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
2,10.0,-166881.119237,-73205.584074,-160614.897086,42795.812225,-149898.777890,-162515.778841,-164265.831025,-166965.206500,-148667.040308,...,-0.023213,-0.006109,Marching in Place,Subject 1,1.739025e+09,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
3,12.0,-166840.126138,-73581.339250,-160860.855682,42779.607210,-150113.734616,-161619.473889,-166378.562615,-166928.840211,-164713.201191,...,-0.023213,-0.006109,Marching in Place,Subject 1,1.739025e+09,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
4,14.0,-166840.863745,-73833.131651,-159276.966366,42689.417921,-150154.973585,-161393.006014,-164366.816207,-166925.152174,-149120.557203,...,-0.023213,-0.006109,Marching in Place,Subject 1,1.739025e+09,-4.012682,-4.826711,7.534553,Marching in Place,Subject 1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
737803,240.0,-187500.022352,-69177.397391,-3457.591350,-187500.022352,-187500.022352,-2021.223548,-15931.183509,-187500.022352,-22948.200756,...,-0.057421,-0.133160,Light Stationary Cycling,Subject 1,1.739029e+09,-5.837064,2.9759927,6.959944,Light Stationary Cycling,Subject 1
737804,240.0,-187500.022352,-69177.397391,-3457.591350,-187500.022352,-187500.022352,-2021.223548,-15931.183509,-187500.022352,-22948.200756,...,-0.057421,-0.133160,Light Stationary Cycling,Subject 1,1.739029e+09,-5.837064,2.9759927,6.959944,Light Stationary Cycling,Subject 1
737805,240.0,-187500.022352,-69177.397391,-3457.591350,-187500.022352,-187500.022352,-2021.223548,-15931.183509,-187500.022352,-22948.200756,...,-0.057421,-0.133160,Light Stationary Cycling,Subject 1,1.739029e+09,-5.837064,2.9759927,6.959944,Light Stationary Cycling,Subject 1
737806,240.0,-187500.022352,-69177.397391,-3457.591350,-187500.022352,-187500.022352,-2021.223548,-15931.183509,-187500.022352,-22948.200756,...,-0.057421,-0.133160,Light Stationary Cycling,Subject 1,1.739029e+09,-5.837064,2.9759927,6.959944,Light Stationary Cycling,Subject 1


# Save Merged Data to CSV

In [11]:
try:
    merged_df.to_csv("EEG-HAR_Subject_1_Merged.csv", index=False)
    print("File saved successfully as EEG-HAR_Subject_1_Merged.csv.")
except Exception as e:
    print(f"An error occurred while saving the file: {e}")

File saved successfully as EEG-HAR_Subject_1_Merged.csv.
