In [None]:
import pandas as pd
import os

def merge_csv_files_in_folder(folder_path, activity_label):
    csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

    if not csv_files:
        print(f"No CSV files found in {folder_path}")
        return pd.DataFrame()

    dataframes = []
    columns_added = []

    for csv_file in csv_files:
        df = pd.read_csv(csv_file)
        df['Activity'] = activity_label
        base_time = pd.Timestamp('1970-01-01')
        df['Timestamp'] = base_time + pd.to_timedelta(df[df.columns[0]], unit='s')
        dataframes.append(df)

    if not dataframes:
        print(f"No valid dataframes to merge in {folder_path}")
        return pd.DataFrame()

    merged_df = dataframes[0]
    for df in dataframes[1:]:
        added_columns = [col for col in df.columns if col not in merged_df.columns and col != 'Timestamp']
        columns_added.extend(added_columns)
        merged_df = pd.merge_asof(merged_df.sort_values('Timestamp'),
                                  df.sort_values('Timestamp'),
                                  on='Timestamp',
                                  suffixes=('', '_drop'))
        merged_df.drop([col for col in merged_df.columns if 'drop' in col], axis=1, inplace=True)

    print(f"Columns added in {activity_label}: {columns_added}")
    return merged_df


In [None]:
folder_paths = [
    '/content/drive/MyDrive/MLQL/biking',
    '/content/drive/MyDrive/MLQL/running',
    '/content/drive/MyDrive/MLQL/sitting_1',
    '/content/drive/MyDrive/MLQL/sitting_2',
    '/content/drive/MyDrive/MLQL/standing',
    '/content/drive/MyDrive/MLQL/walking_1'
]

activities = [
    'biking',
    'running',
    'sitting',
    'sitting',
    'standing',
    'walking'
]

all_dataframes = []

for idx, folder_path in enumerate(folder_paths):
    activity_label = activities[idx]
    merged_df = merge_csv_files_in_folder(folder_path, activity_label)
    all_dataframes.append(merged_df)

In [None]:
final_df = pd.concat(all_dataframes, ignore_index=True)
final_df = final_df.drop(columns=['Timestamp'])
final_df.to_csv('/content/drive/MyDrive/MLQL/final_merged_data.csv', index=False)