# Import Libraries

In [1]:
import pandas as pd
import glob

# Load EEG Files for Subject 6

In [2]:
column_names = [
    "Sample Index", "EXG Channel 0", "EXG Channel 1", "EXG Channel 2", "EXG Channel 3",
    "EXG Channel 4", "EXG Channel 5", "EXG Channel 6", "EXG Channel 7", "EXG Channel 8",
    "EXG Channel 9", "EXG Channel 10", "EXG Channel 11", "EXG Channel 12", "EXG Channel 13",
    "EXG Channel 14", "EXG Channel 15", "Accel Channel 0", "Accel Channel 1", "Accel Channel 2",
    "Not Used 1", "Digital Channel 0 (D11)", "Digital Channel 1 (D12)", "Digital Channel 2 (D13)",
    "Digital Channel 3 (D17)", "Not Used 2", "Digital Channel 4 (D18)", "Analog Channel 0",
    "Analog Channel 1", "Analog Channel 2", "Timestamp", "Marker Channel", "Timestamp (Formatted)"
]

eeg_files = sorted(glob.glob("/data0/HAR-datasets/PLHI-HAR_EEG-2025/OpenBCISession_s6-*/BrainFlow-RAW_s6-*.csv"))

def load_eeg(file):
    df = pd.read_csv(file, sep="\t", skiprows=3, names=column_names, engine="python")
    df["Activity"] = file.split("/")[-2]
    return df

eeg_df_list = [load_eeg(file) for file in eeg_files]
eeg_df = pd.concat(eeg_df_list, ignore_index=True)

print(f" Successfully loaded {len(eeg_files)} EEG files for Subject 6 across all activities.")
print("Unique activities in EEG data:", eeg_df["Activity"].unique())
display(eeg_df.head())

 Successfully loaded 46 EEG files for Subject 6 across all activities.
Unique activities in EEG data: ['OpenBCISession_s6- standing heel to toe walk'
 'OpenBCISession_s6-chair squats'
 'OpenBCISession_s6-light stationary cycling'
 'OpenBCISession_s6-marchinginplace'
 'OpenBCISession_s6-seated boxing hooks'
 'OpenBCISession_s6-seated leg extensions'
 'OpenBCISession_s6-seated medicine ball twists'
 'OpenBCISession_s6-seated side bends' 'OpenBCISession_s6-side-stepping'
 'OpenBCISession_s6-wallpushups']


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
0,6.0,187500.0,-187500.022352,-187500.022352,-187500.022352,-152721.713808,-103351.381821,-177632.196561,-187500.022352,-102538.337116,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6- standing heel to toe walk
1,8.0,187500.0,-187500.022352,-187500.022352,-187500.022352,-151128.43676,-103442.241662,-177611.275329,-187500.022352,-102296.871221,...,80.0,8.0,80.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6- standing heel to toe walk
2,10.0,187500.0,-187500.022352,-187500.022352,-187500.022352,-149709.100987,-103328.31482,-177576.071331,-187500.022352,-101825.450578,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6- standing heel to toe walk
3,12.0,187500.0,-187500.022352,-187500.022352,-187500.022352,-148225.302783,-103457.798476,-177496.789693,-187500.022352,-101568.807849,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6- standing heel to toe walk
4,14.0,187500.0,-187500.022352,-187500.022352,-187500.022352,-146541.210597,-103444.365078,-177405.996907,-187500.022352,-100813.765921,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6- standing heel to toe walk


# Display Last Rows of EEG Data

In [3]:
display(eeg_df.tail()) 

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Digital Channel 3 (D17),Not Used 2,Digital Channel 4 (D18),Analog Channel 0,Analog Channel 1,Analog Channel 2,Timestamp,Marker Channel,Timestamp (Formatted),Activity
532702,140.0,-187500.022352,-34438.182049,187500.0,-179410.210778,-130859.249039,187500.0,187500.0,-117356.381399,187500.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6-wallpushups
532703,142.0,-187500.022352,-34409.169484,187500.0,-179416.335156,-131219.000366,187500.0,187500.0,-117693.333947,187500.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6-wallpushups
532704,144.0,-187500.022352,-34525.219741,187500.0,-179188.682638,-131475.151357,187500.0,187500.0,-118039.853041,187500.0,...,8.0,7.0,72.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6-wallpushups
532705,146.0,-187500.022352,-34501.325727,187500.0,-179329.163352,-132038.080041,187500.0,187500.0,-118347.658914,187500.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6-wallpushups
532706,148.0,-187500.022352,-34668.717941,187500.0,-179096.884024,-132538.557951,187500.0,187500.0,-118708.41607,187500.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1739052000.0,0.0,,OpenBCISession_s6-wallpushups


# Extract EEG Start and End Timestamps

In [4]:
eeg_timestamps = eeg_df[['Timestamp', 'Marker Channel']]
start_time = eeg_timestamps['Timestamp'].min()
end_time = eeg_timestamps['Timestamp'].max()
print(f" Extracted EEG activity start and end timestamps:")
print(f" Start Timestamp: {start_time}")
print(f" End Timestamp: {end_time}")

 Extracted EEG activity start and end timestamps:
 Start Timestamp: 1739050687.33644
 End Timestamp: 1739054138.620786


# Load and Filter HAR Data for Subject 6

In [5]:
har_file_path = "/data0/HAR-datasets/PLHI-HAR_EEG-2025/Combined_Gyro_Acg_Data_Subjects_1_to_6.csv"
har_df = pd.read_csv(har_file_path)

har_df['Timestamp_Accel'] = pd.to_numeric(har_df['Timestamp_Accel'], errors='coerce')
har_df['Timestamp_Gyro'] = pd.to_numeric(har_df['Timestamp_Gyro'], errors='coerce')

if har_df['Timestamp_Accel'].max() > 1e12:  
    har_df['Timestamp_Accel'] = har_df['Timestamp_Accel'] / 1000
    har_df['Timestamp_Gyro'] = har_df['Timestamp_Gyro'] / 1000

print("Before filtering:", har_df.shape)
har_df = har_df[har_df['Subject_ID_x'] == "Subject 6"]
print("After filtering:", har_df.shape)

valid_har = har_df[
    (har_df['Timestamp_Accel'].between(start_time, end_time)) |
    (har_df['Timestamp_Gyro'].between(start_time, end_time))
]

filtered_har = valid_har

print("HAR data is clipped to the EEG activity time range. All activities for Subject 6 are included.")
display(filtered_har.head())

Before filtering: (944460, 12)
After filtering: (175690, 12)
HAR data is clipped to the EEG activity time range. All activities for Subject 6 are included.


  har_df = pd.read_csv(har_file_path)


Unnamed: 0,Timestamp_Gyro,Gyro X (°/s),Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
768770,1739051000.0,-0.023213,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
768771,1739051000.0,-0.023213,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
768772,1739051000.0,-0.058643,0.026878,0.002443,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
768773,1739051000.0,-0.058643,0.026878,0.002443,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
768774,1739051000.0,-0.058643,0.026878,0.002443,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6


# Display Shape and Info of Filtered HAR Data

In [6]:
print(filtered_har.shape) 
print(filtered_har.info())

(137327, 12)
<class 'pandas.core.frame.DataFrame'>
Index: 137327 entries, 768770 to 944254
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Timestamp_Gyro    137327 non-null  float64
 1   Gyro X (°/s)      137327 non-null  float64
 2   Gyro Y (°/s)      137316 non-null  float64
 3   Gyro Z (°/s)      137316 non-null  float64
 4   Activity_Label_x  137327 non-null  object 
 5   Subject_ID_x      137327 non-null  object 
 6   Timestamp_Accel   137327 non-null  float64
 7   Accel X (g)       137327 non-null  float64
 8   Accel Y (g)       137313 non-null  object 
 9   Accel Z (g)       137309 non-null  float64
 10  Activity_Label_y  137327 non-null  object 
 11  Subject_ID_y      137327 non-null  object 
dtypes: float64(7), object(5)
memory usage: 13.6+ MB
None


# Merge EEG and HAR Data by Timestamp

In [7]:
eeg_df_sorted = eeg_df.sort_values("Timestamp")
har_df_sorted = filtered_har.sort_values("Timestamp_Accel")

merged_df = pd.merge_asof(
    eeg_df_sorted, 
    har_df_sorted, 
    left_on="Timestamp", 
    right_on="Timestamp_Accel", 
    direction="nearest"
)

print(" EEG & HAR merged.")
display(merged_df.head())

 EEG & HAR merged.


Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-187500.022352,-145365.0171,-155078.906426,-173815.367975,-121219.142821,187500.0,187500.0,-187500.022352,187500.0,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
1,8.0,-187500.022352,-144901.084292,-155199.739957,-173965.638753,-120902.530361,187500.0,187500.0,-187500.022352,187500.0,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
2,10.0,-187500.022352,-144769.834849,-155450.638288,-174066.802748,-120390.407192,187500.0,187500.0,-187500.022352,187500.0,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
3,12.0,-187500.022352,-144468.645092,-155465.01046,-174205.987061,-120049.163109,187500.0,187500.0,-187500.022352,187500.0,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
4,14.0,-187500.022352,-144460.129077,-155691.232466,-174355.453176,-119469.694134,187500.0,187500.0,-187500.022352,187500.0,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1739051000.0,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6


# Check Timestamp Alignment

In [8]:
print("Timestamp alignment (EEG vs HAR):")
print(merged_df[['Timestamp', 'Timestamp_Accel']])

Timestamp alignment (EEG vs HAR):
           Timestamp  Timestamp_Accel
0       1.739051e+09     1.739051e+09
1       1.739051e+09     1.739051e+09
2       1.739051e+09     1.739051e+09
3       1.739051e+09     1.739051e+09
4       1.739051e+09     1.739051e+09
...              ...              ...
532702  1.739054e+09     1.739054e+09
532703  1.739054e+09     1.739054e+09
532704  1.739054e+09     1.739054e+09
532705  1.739054e+09     1.739054e+09
532706  1.739054e+09     1.739054e+09

[532707 rows x 2 columns]


# Show Merged Data Columns

In [9]:
print("Unique Columns in Merged Data:")
print(merged_df.columns)

Unique Columns in Merged Data:
Index(['Sample Index', 'EXG Channel 0', 'EXG Channel 1', 'EXG Channel 2',
       'EXG Channel 3', 'EXG Channel 4', 'EXG Channel 5', 'EXG Channel 6',
       'EXG Channel 7', 'EXG Channel 8', 'EXG Channel 9', 'EXG Channel 10',
       'EXG Channel 11', 'EXG Channel 12', 'EXG Channel 13', 'EXG Channel 14',
       'EXG Channel 15', 'Accel Channel 0', 'Accel Channel 1',
       'Accel Channel 2', 'Not Used 1', 'Digital Channel 0 (D11)',
       'Digital Channel 1 (D12)', 'Digital Channel 2 (D13)',
       'Digital Channel 3 (D17)', 'Not Used 2', 'Digital Channel 4 (D18)',
       'Analog Channel 0', 'Analog Channel 1', 'Analog Channel 2', 'Timestamp',
       'Marker Channel', 'Timestamp (Formatted)', 'Activity', 'Timestamp_Gyro',
       'Gyro X (°/s)', 'Gyro Y (°/s)', 'Gyro Z (°/s)', 'Activity_Label_x',
       'Subject_ID_x', 'Timestamp_Accel', 'Accel X (g)', 'Accel Y (g)',
       'Accel Z (g)', 'Activity_Label_y', 'Subject_ID_y'],
      dtype='object')


# Display Final Merged Data

In [10]:
display(merged_df)

Unnamed: 0,Sample Index,EXG Channel 0,EXG Channel 1,EXG Channel 2,EXG Channel 3,EXG Channel 4,EXG Channel 5,EXG Channel 6,EXG Channel 7,EXG Channel 8,...,Gyro Y (°/s),Gyro Z (°/s),Activity_Label_x,Subject_ID_x,Timestamp_Accel,Accel X (g),Accel Y (g),Accel Z (g),Activity_Label_y,Subject_ID_y
0,6.0,-187500.022352,-145365.017100,-155078.906426,-173815.367975,-121219.142821,187500.000000,187500.000000,-187500.022352,187500.000000,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1.739051e+09,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
1,8.0,-187500.022352,-144901.084292,-155199.739957,-173965.638753,-120902.530361,187500.000000,187500.000000,-187500.022352,187500.000000,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1.739051e+09,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
2,10.0,-187500.022352,-144769.834849,-155450.638288,-174066.802748,-120390.407192,187500.000000,187500.000000,-187500.022352,187500.000000,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1.739051e+09,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
3,12.0,-187500.022352,-144468.645092,-155465.010460,-174205.987061,-120049.163109,187500.000000,187500.000000,-187500.022352,187500.000000,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1.739051e+09,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
4,14.0,-187500.022352,-144460.129077,-155691.232466,-174355.453176,-119469.694134,187500.000000,187500.000000,-187500.022352,187500.000000,...,0.023213,0.014661,Seated Leg Extensions,Subject 6,1.739051e+09,1.841141,-1.242591,9.411607,Seated Leg Extensions,Subject 6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
532702,198.0,187500.000000,-187500.022352,-187500.022352,-187500.022352,-187500.022352,23305.582798,11907.198954,-187500.022352,5359.076602,...,-0.046426,-0.057421,Light Stationary Cycling,Subject 6,1.739054e+09,1.996764,3.107674,8.841787,Light Stationary Cycling,Subject 6
532703,200.0,187500.000000,-187500.022352,-187500.022352,-187500.022352,-187500.022352,23927.028349,12049.892491,-187500.022352,5721.376028,...,-0.046426,-0.057421,Light Stationary Cycling,Subject 6,1.739054e+09,1.996764,3.107674,8.841787,Light Stationary Cycling,Subject 6
532704,202.0,187500.000000,-187500.022352,-187500.022352,-187500.022352,-187500.022352,23721.280542,12064.912863,-187500.022352,5997.218907,...,-0.046426,-0.057421,Light Stationary Cycling,Subject 6,1.739054e+09,1.996764,3.107674,8.841787,Light Stationary Cycling,Subject 6
532705,204.0,187500.000000,-187500.022352,-187500.022352,-187500.022352,-187500.022352,23718.464222,11898.057091,-187500.022352,5793.370997,...,-0.046426,-0.057421,Light Stationary Cycling,Subject 6,1.739054e+09,1.996764,3.107674,8.841787,Light Stationary Cycling,Subject 6


# Save Merged Data to CSV

In [11]:
try:
    merged_df.to_csv("EEG-HAR_Subject_6_Merged.csv", index=False)
    print("File saved successfully as EEG-HAR_Subject_6_Merged.csv.")
except Exception as e:
    print(f"An error occurred while saving the file: {e}")

File saved successfully as EEG-HAR_Subject_6_Merged.csv.
