In [35]:
import os
import pandas as pd
import re

# ✅ Base dataset path
base_path = r'C:\Users\88019\Downloads\p2\BB-MAS_Dataset (keystroke and swipe)\BB-MAS_Dataset\BB-MAS_Dataset'

keystroke_dfs = []
touch_dfs = []
keystroke_count = 0
touch_count = 0

# ✅ Iterate over user folders
for user_folder in os.listdir(base_path):
    user_path = os.path.join(base_path, user_folder)

    if os.path.isdir(user_path):
        try:
            # ✅ Extract numeric user ID
            match_folder = re.match(r"(\d+)", user_folder)
            if not match_folder:
                print(f"⚠️ Skipping folder with unexpected name format: {user_folder}")
                continue

            user_id = int(match_folder.group(1))
            if user_id > 100:
                continue  # ✅ Skip users beyond 100

            # ✅ Get keyboard and touch files (for any device)
            key_files = [f for f in os.listdir(user_path) if "Keyboard" in f and f.endswith(".csv")]
            touch_files = [f for f in os.listdir(user_path) if "TouchEvent" in f and f.endswith(".csv")]

            # ✅ Handle keystroke file
            if len(key_files) == 1:
                key_path = os.path.join(user_path, key_files[0])
                df_key = pd.read_csv(key_path)
                df_key['user'] = user_id
                keystroke_dfs.append(df_key)
                keystroke_count += 1
                print(f"✅ Added keystroke CSV for user {user_id}")
            elif len(key_files) == 0:
                print(f"⚠️ No keystroke CSV found for user {user_id}")
            else:
                print(f"⚠️ Multiple keystroke files for user {user_id}, skipping...")

            # ✅ Handle touch gesture file
            if len(touch_files) == 1:
                touch_path = os.path.join(user_path, touch_files[0])
                df_touch = pd.read_csv(touch_path)
                df_touch['user'] = user_id
                touch_dfs.append(df_touch)
                touch_count += 1
                print(f"✅ Added touch gesture CSV for user {user_id}")
            elif len(touch_files) == 0:
                print(f"⚠️ No touch gesture CSV found for user {user_id}")
            else:
                print(f"⚠️ Multiple touch gesture files for user {user_id}, skipping...")

        except Exception as e:
            print(f"❌ Error processing {user_folder}: {e}")

# ✅ Merge and sort by 'user' and 'eid' (if exists)
def save_data(dataframes, file_prefix):
    if dataframes:
        merged = pd.concat(dataframes, ignore_index=True)

        # ✅ Check if 'eid' exists, then sort
        sort_cols = ['user']
        if 'eid' in merged.columns:
            sort_cols.append('eid')
        elif 'EID' in merged.columns:
            sort_cols.append('EID')

        merged.sort_values(by=sort_cols, inplace=True)

        # ✅ Save as CSV
        csv_file = f'{file_prefix}.csv'
        merged.to_csv(csv_file, index=False)

        # ✅ Save as JSON
        json_file = f'{file_prefix}.json'
        merged.to_json(json_file, orient='records', lines=True)

        print(f"\n📁 {file_prefix.capitalize()} saved as CSV and JSON.")
        print(f"👤 Unique users in {file_prefix} data:")
        print(" ".join(str(uid) for uid in sorted(merged['user'].unique())))
    else:
        print(f"\n⚠️ No {file_prefix} files were added.")

# ✅ Final save
save_data(keystroke_dfs, 'merged_keystroke')
save_data(touch_dfs, 'merged_touch_gesture')


✅ Added keystroke CSV for user 1
✅ Added touch gesture CSV for user 1
✅ Added keystroke CSV for user 10
✅ Added touch gesture CSV for user 10
✅ Added keystroke CSV for user 100
✅ Added touch gesture CSV for user 100
✅ Added keystroke CSV for user 11
✅ Added touch gesture CSV for user 11
✅ Added keystroke CSV for user 12
✅ Added touch gesture CSV for user 12
✅ Added keystroke CSV for user 13
✅ Added touch gesture CSV for user 13
✅ Added keystroke CSV for user 14
✅ Added touch gesture CSV for user 14
✅ Added keystroke CSV for user 15
✅ Added touch gesture CSV for user 15
✅ Added keystroke CSV for user 16
✅ Added touch gesture CSV for user 16
✅ Added keystroke CSV for user 17
✅ Added touch gesture CSV for user 17
✅ Added keystroke CSV for user 18
✅ Added touch gesture CSV for user 18
✅ Added keystroke CSV for user 19
✅ Added touch gesture CSV for user 19
✅ Added keystroke CSV for user 2
✅ Added touch gesture CSV for user 2
✅ Added keystroke CSV for user 20
✅ Added touch gesture CSV for us