# Import Libraries

In [1]:
import pandas as pd
import glob

# Load EEG Files for Subject 5

In [2]:
column_names = [
    "Sample Index", "EXG Channel 0", "EXG Channel 1", "EXG Channel 2", "EXG Channel 3",
    "EXG Channel 4", "EXG Channel 5", "EXG Channel 6", "EXG Channel 7", "EXG Channel 8",
    "EXG Channel 9", "EXG Channel 10", "EXG Channel 11", "EXG Channel 12", "EXG Channel 13",
    "EXG Channel 14", "EXG Channel 15", "Accel Channel 0", "Accel Channel 1", "Accel Channel 2",
    "Not Used 1", "Digital Channel 0 (D11)", "Digital Channel 1 (D12)", "Digital Channel 2 (D13)",
    "Digital Channel 3 (D17)", "Not Used 2", "Digital Channel 4 (D18)", "Analog Channel 0",
    "Analog Channel 1", "Analog Channel 2", "Timestamp", "Marker Channel", "Timestamp (Formatted)"
]

eeg_files = sorted(glob.glob("/data0/HAR-datasets/PLHI-HAR_EEG-2025/OpenBCISession_s5-*/BrainFlow-RAW_s5-*.csv"))

def load_eeg(file):
    df = pd.read_csv(file, sep="\t", skiprows=3, names=column_names, engine="python")
    df["Activity"] = file.split("/")[-2]
    return df

eeg_df_list = [load_eeg(file) for file in eeg_files]
eeg_df = pd.concat(eeg_df_list, ignore_index=True)

print(f" Successfully loaded {len(eeg_files)} EEG files for Subject 5 across all activities.")
print("Unique activities in EEG data:", eeg_df["Activity"].unique())
display(eeg_df.head())

 Successfully loaded 12 EEG files for Subject 5 across all activities.
Unique activities in EEG data: ['OpenBCISession_s5-marching in place'
 'OpenBCISession_s5-seated boxing hooks'
 'OpenBCISession_s5-standing heel to toe walk'
 'OpenBCISession_s5-wall push-ups']


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
0,6.0,-187500.022352,-187500.022352,-95681.827746,-187500.022352,-79511.59233,-63431.143872,-73532.500688,-187500.022352,-85267.680617,...,0.0,0.0,0.0,0.0,0.0,0.0,1739049000.0,0.0,,OpenBCISession_s5-marching in place
1,8.0,-187500.022352,-187500.022352,-95959.03408,-187500.022352,-79513.827504,-63390.955435,-73657.648105,-187500.022352,-84424.729279,...,96.0,126.5,96.0,0.0,0.0,0.0,1739049000.0,0.0,,OpenBCISession_s5-marching in place
2,10.0,-187500.022352,-187500.022352,-95751.900465,-187500.022352,-79737.657873,-63640.132682,-73717.28256,-187500.022352,-85352.103156,...,0.0,0.0,0.0,0.0,0.0,0.0,1739049000.0,0.0,,OpenBCISession_s5-marching in place
3,12.0,-187500.022352,-187500.022352,-95912.676562,-187500.022352,-79541.387205,-63425.73475,-73720.88119,-187500.022352,-84548.423832,...,0.0,0.0,0.0,0.0,0.0,0.0,1739049000.0,0.0,,OpenBCISession_s5-marching in place
4,14.0,-187500.022352,-187500.022352,-95820.296803,-187500.022352,-79945.484393,-63826.814452,-73858.836157,-187500.022352,-85439.744346,...,0.0,0.0,0.0,0.0,0.0,0.0,1739049000.0,0.0,,OpenBCISession_s5-marching in place


# Show Last Rows of EEG Data

In [3]:
display(eeg_df.tail()) 

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
146308,50.0,-187500.022352,-187500.022352,-85720.236387,-187500.022352,-103088.905285,-113209.574009,-101831.776122,-81578.189919,-99941.153817,...,0.0,0.0,0.0,0.0,0.0,0.0,1739050000.0,0.0,,OpenBCISession_s5-wall push-ups
146309,52.0,-187500.022352,-187500.022352,-85273.782644,-187500.022352,-101969.015833,-112031.458262,-100749.862283,-80395.648527,-102831.457893,...,0.0,0.0,0.0,0.0,0.0,0.0,1739050000.0,0.0,,OpenBCISession_s5-wall push-ups
146310,54.0,-187500.022352,-187500.022352,-85358.383996,-187500.022352,-102667.865475,-112793.630397,-101417.285373,-81159.921725,-99799.399054,...,0.0,0.0,0.0,0.0,0.0,0.0,1739050000.0,0.0,,OpenBCISession_s5-wall push-ups
146311,56.0,-187500.022352,-187500.022352,-85560.913153,-187500.022352,-102332.298736,-112404.553581,-101118.219032,-80776.857528,-102878.776536,...,40.0,3.5,24.0,0.0,0.0,0.0,1739050000.0,0.0,,OpenBCISession_s5-wall push-ups
146312,58.0,-187500.022352,-187500.022352,-84925.050727,-187500.022352,-102165.487667,-112291.699623,-100928.184501,-80634.074585,-99693.988227,...,0.0,0.0,0.0,0.0,0.0,0.0,1739050000.0,0.0,,OpenBCISession_s5-wall push-ups


# Extract Start and End Timestamps

In [4]:
eeg_timestamps = eeg_df[['Timestamp', 'Marker Channel']]
start_time = eeg_timestamps['Timestamp'].min()
end_time = eeg_timestamps['Timestamp'].max()
print(f" Extracted EEG activity start and end timestamps:")
print(f" Start Timestamp: {start_time}")
print(f" End Timestamp: {end_time}")

 Extracted EEG activity start and end timestamps:
 Start Timestamp: 1739048968.619637
 End Timestamp: 1739050140.62928


# Load and Filter HAR Data for Subject 5

In [5]:
har_file_path = "/data0/HAR-datasets/PLHI-HAR_EEG-2025/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv"
har_df = pd.read_csv(har_file_path)

har_df['Timestamp_Accel'] = pd.to_numeric(har_df['Timestamp_Accel'], errors='coerce')
har_df['Timestamp_Gyro'] = pd.to_numeric(har_df['Timestamp_Gyro'], errors='coerce')

if har_df['Timestamp_Accel'].max() > 1e12:  
    har_df['Timestamp_Accel'] = har_df['Timestamp_Accel'] / 1000
    har_df['Timestamp_Gyro'] = har_df['Timestamp_Gyro'] / 1000

print("Before filtering:", har_df.shape)
har_df = har_df[har_df['Subject_ID_x'] == "Subject 5"]
print("After filtering:", har_df.shape)

valid_har = har_df[
    (har_df['Timestamp_Accel'].between(start_time, end_time)) |
    (har_df['Timestamp_Gyro'].between(start_time, end_time))
]

filtered_har = valid_har

print(" HAR data is clipped to the EEG activity time range. All activities for Subject 5 are included.")
display(filtered_har.head())

Before filtering: (944460, 12)
After filtering: (43411, 12)
 HAR data is clipped to the EEG activity time range. All activities for Subject 5 are included.


  har_df = pd.read_csv(har_file_path)


Unnamed: 0,Timestamp_Gyro,Gyro X (°/s),Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
725447,1739049000.0,-0.634078,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
725448,1739049000.0,-0.634078,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
725449,1739049000.0,-0.634078,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
725450,1739049000.0,-0.634078,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
725451,1739049000.0,-0.634078,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5


# Show Shape and Info of Filtered HAR Data

In [6]:
print(filtered_har.shape) 
print(filtered_har.info())  

(43069, 12)
<class 'pandas.core.frame.DataFrame'>
Index: 43069 entries, 725447 to 768769
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Timestamp_Gyro    43069 non-null  float64
 1   Gyro X (°/s)      43069 non-null  float64
 2   Gyro Y (°/s)      43069 non-null  float64
 3   Gyro Z (°/s)      43061 non-null  float64
 4   Activity_Label_x  43069 non-null  object 
 5   Subject_ID_x      43069 non-null  object 
 6   Timestamp_Accel   43069 non-null  float64
 7   Accel X (g)       43069 non-null  float64
 8   Accel Y (g)       43069 non-null  object 
 9   Accel Z (g)       43048 non-null  float64
 10  Activity_Label_y  43069 non-null  object 
 11  Subject_ID_y      43069 non-null  object 
dtypes: float64(7), object(5)
memory usage: 4.3+ MB
None


# Merge EEG and HAR Data by Nearest Timestamps

In [7]:
eeg_df_sorted = eeg_df.sort_values("Timestamp")
har_df_sorted = filtered_har.sort_values("Timestamp_Accel")

merged_df = pd.merge_asof(
    eeg_df_sorted, 
    har_df_sorted, 
    left_on="Timestamp", 
    right_on="Timestamp_Accel", 
    direction="nearest"
)

print(" EEG & HAR merged.")
display(merged_df.head())

 EEG & HAR merged.


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-187500.022352,-187500.022352,-95681.827746,-187500.022352,-79511.59233,-63431.143872,-73532.500688,-187500.022352,-85267.680617,...,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
1,8.0,-187500.022352,-187500.022352,-95959.03408,-187500.022352,-79513.827504,-63390.955435,-73657.648105,-187500.022352,-84424.729279,...,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
2,10.0,-187500.022352,-187500.022352,-95751.900465,-187500.022352,-79737.657873,-63640.132682,-73717.28256,-187500.022352,-85352.103156,...,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
3,12.0,-187500.022352,-187500.022352,-95912.676562,-187500.022352,-79541.387205,-63425.73475,-73720.88119,-187500.022352,-84548.423832,...,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
4,14.0,-187500.022352,-187500.022352,-95820.296803,-187500.022352,-79945.484393,-63826.814452,-73858.836157,-187500.022352,-85439.744346,...,0.172264,-0.128282,Marching in Place,Subject 5,1739049000.0,10.065224,-1.534683,2.006341,Marching in Place,Subject 5


# Show Timestamp Alignment

In [8]:
print("Timestamp alignment (EEG vs HAR):")
print(merged_df[['Timestamp', 'Timestamp_Accel']])

Timestamp alignment (EEG vs HAR):
           Timestamp  Timestamp_Accel
0       1.739049e+09     1.739049e+09
1       1.739049e+09     1.739049e+09
2       1.739049e+09     1.739049e+09
3       1.739049e+09     1.739049e+09
4       1.739049e+09     1.739049e+09
...              ...              ...
146308  1.739050e+09     1.739050e+09
146309  1.739050e+09     1.739050e+09
146310  1.739050e+09     1.739050e+09
146311  1.739050e+09     1.739050e+09
146312  1.739050e+09     1.739050e+09

[146313 rows x 2 columns]


# Show Merged Data Columns

In [9]:
print("Unique Columns in Merged Data:")
print(merged_df.columns)

Unique Columns in Merged Data:
Index(['Sample Index', 'EXG Channel 0', 'EXG Channel 1', 'EXG Channel 2',
       'EXG Channel 3', 'EXG Channel 4', 'EXG Channel 5', 'EXG Channel 6',
       'EXG Channel 7', 'EXG Channel 8', 'EXG Channel 9', 'EXG Channel 10',
       'EXG Channel 11', 'EXG Channel 12', 'EXG Channel 13', 'EXG Channel 14',
       'EXG Channel 15', 'Accel Channel 0', 'Accel Channel 1',
       'Accel Channel 2', 'Not Used 1', 'Digital Channel 0 (D11)',
       'Digital Channel 1 (D12)', 'Digital Channel 2 (D13)',
       'Digital Channel 3 (D17)', 'Not Used 2', 'Digital Channel 4 (D18)',
       'Analog Channel 0', 'Analog Channel 1', 'Analog Channel 2', 'Timestamp',
       'Marker Channel', 'Timestamp (Formatted)', 'Activity', 'Timestamp_Gyro',
       'Gyro X (°/s)', 'Gyro Y (°/s)', 'Gyro Z (°/s)', 'Activity_Label_x',
       'Subject_ID_x', 'Timestamp_Accel', 'Accel X (g)', 'Accel Y (g)',
       'Accel Z (g)', 'Activity_Label_y', 'Subject_ID_y'],
      dtype='object')


# Display Merged Data

In [10]:
display(merged_df)

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-187500.022352,-187500.022352,-95681.827746,-187500.022352,-79511.592330,-63431.143872,-73532.500688,-187500.022352,-85267.680617,...,0.172264,-0.128282,Marching in Place,Subject 5,1.739049e+09,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
1,8.0,-187500.022352,-187500.022352,-95959.034080,-187500.022352,-79513.827504,-63390.955435,-73657.648105,-187500.022352,-84424.729279,...,0.172264,-0.128282,Marching in Place,Subject 5,1.739049e+09,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
2,10.0,-187500.022352,-187500.022352,-95751.900465,-187500.022352,-79737.657873,-63640.132682,-73717.282560,-187500.022352,-85352.103156,...,0.172264,-0.128282,Marching in Place,Subject 5,1.739049e+09,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
3,12.0,-187500.022352,-187500.022352,-95912.676562,-187500.022352,-79541.387205,-63425.734750,-73720.881190,-187500.022352,-84548.423832,...,0.172264,-0.128282,Marching in Place,Subject 5,1.739049e+09,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
4,14.0,-187500.022352,-187500.022352,-95820.296803,-187500.022352,-79945.484393,-63826.814452,-73858.836157,-187500.022352,-85439.744346,...,0.172264,-0.128282,Marching in Place,Subject 5,1.739049e+09,10.065224,-1.534683,2.006341,Marching in Place,Subject 5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146308,112.0,-187500.022352,-187500.022352,-110451.212281,-187500.022352,-116857.177539,-116518.034520,-126059.971876,-103618.328705,-117849.013847,...,-0.245568,-0.340863,Seated Boxing Hooks,Subject 5,1.739050e+09,0.665588,2.738967,9.071630,Seated Boxing Hooks,Subject 5
146309,114.0,-187500.022352,-187500.022352,-110401.703167,-187500.022352,-116811.244704,-116448.095912,-125994.682431,-103546.691364,-118155.590374,...,-0.245568,-0.340863,Seated Boxing Hooks,Subject 5,1.739050e+09,0.665588,2.738967,9.071630,Seated Boxing Hooks,Subject 5
146310,116.0,-187500.022352,-187500.022352,-110409.369816,-187500.022352,-116832.478861,-116467.877205,-126013.569655,-103581.604788,-117849.773806,...,-0.245568,-0.340863,Seated Boxing Hooks,Subject 5,1.739050e+09,0.665588,2.738967,9.071630,Seated Boxing Hooks,Subject 5
146311,118.0,-187500.022352,-187500.022352,-110428.748778,-187500.022352,-116849.846166,-116473.241624,-126020.632806,-103597.116899,-118189.877950,...,-0.245568,-0.340863,Seated Boxing Hooks,Subject 5,1.739050e+09,0.665588,2.738967,9.071630,Seated Boxing Hooks,Subject 5


# Save Merged Data to CSV

In [11]:
try:
    merged_df.to_csv("EEG-HAR_Subject_5_Merged.csv", index=False)
    print("File saved successfully as EEG-HAR_Subject_5_Merged.csv.")
except Exception as e:
    print(f"An error occurred while saving the file: {e}")

File saved successfully as EEG-HAR_Subject_5_Merged.csv.
