In [1]:
import pandas as pd
import os
from tqdm import tqdm

ModuleNotFoundError: No module named 'pandas'

In [None]:
# adjust the input and output directories to the correct path
input_dir = "../raw/watch_acc_ml"
output_dir = "../processed/watch_acc_ml"
os.makedirs(output_dir, exist_ok=True)

# get all the file paths in the input directory
file_paths = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]

# process each file
for file_path in file_paths:
    try:
        # read the file - potentially adjust the separator and header
        df = pd.read_csv(file_path, sep=" ", header=None)
        df.columns = ["timestamp", "x", "y", "z"]

        # the following code reconstructs the timestamp from the filename (assuming the base timestamp is in the filename) - this might vary depending on the data source
        base_timestamp = float(os.path.basename(file_path).split(".")[0])
        df["timestamp"] = (base_timestamp + df["timestamp"].values / 1000)

        df.set_index("timestamp", inplace=True)
        df = df[~df.index.duplicated(keep='first')]
        df = df.reset_index()
        df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
        df.set_index("timestamp", inplace=True)

        # resample data to 1-minute level
        df_resampled = df.resample("1min").mean().interpolate("linear")
        df_resampled = df_resampled.reset_index()

        # convert timestamp to UTC assuming the data is in US/Eastern timezone
        df_resampled["timestamp"] = df_resampled["timestamp"].dt.tz_localize("US/Eastern").dt.tz_convert("UTC")

        # convert timestamp to Unix seconds
        df_resampled["timestamp"] = df_resampled["timestamp"].astype(int) / 10**9  
        df_resampled.set_index("timestamp", inplace=True)

        # export data to csv
        output_filename = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(file_path))[0]}_watch_acc_ml.csv")
        df_resampled.to_csv(output_filename, index=True)
        print(f"Processed and saved: {output_filename}")

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        continue