In [1]:
import jupyter_black
from IPython.display import display


jupyter_black.load(line_length=999)

In [2]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
from tqdm.notebook import tqdm

In [3]:
date = pd.read_csv("data_csv/ACCELERATION/2024.11.05.csv")
print(date.head())

  SID     Timestamp   Column1   Column2   Column3
0  hm  07:22:26.993  6.326678  3.201048 -6.657078
1  hm  07:22:26.993  6.326678  3.201048 -6.657078
2  hm  07:22:27.012  6.579865  3.186084 -6.359000
3  hm  07:22:26.993  6.326678  3.201048 -6.657078
4  hm  07:22:27.012  6.579865  3.186084 -6.359000


In [None]:
def clean_incremental_timestamps(df: pd.DataFrame, bar: bool = False):
    cleaned_data = []
    grouped = df.groupby("SID")

    for sid, group in grouped:
        previous_time = None

        for _, row in tqdm(group.iterrows(), total=len(group)) if bar else group.iterrows():
            try:
                current_time = datetime.strptime(row["Timestamp"], "%H:%M:%S.%f")
                if previous_time is None or current_time > previous_time:
                    cleaned_data.append(row)
                    previous_time = current_time
            except ValueError:
                continue

    return pd.DataFrame(cleaned_data, columns=df.columns)

In [8]:
def process_time_window(df, date_str, window="1min", interpolate=False):
    # 1. Data Preprocessing
    df["Timestamp"] = pd.to_datetime(date_str + " " + df["Timestamp"])
    df = df.set_index("Timestamp")

    # 2. Group and Resample
    def resample_group(group):
        return group.resample(window).mean()

    df_resampled = df.groupby("SID").apply(resample_group)

    if interpolate:
        df_resampled = df_resampled.interpolate(method="linear")

    return df_resampled.reset_index()


date_str = "2024.11.05"
date = clean_incremental_timestamps(date, bar=True)

processed_df = process_time_window(date.copy(), date_str, window="1min", interpolate=True)
processed_df

  0%|          | 0/734149 [00:00<?, ?it/s]

  0%|          | 0/291570 [00:00<?, ?it/s]

Unnamed: 0,SID,Timestamp,Column1,Column2,Column3
0,gzj,2024-11-05 09:04:00,0.046955,1.547882,9.767433
1,gzj,2024-11-05 09:05:00,0.047049,1.546342,9.790846
2,gzj,2024-11-05 09:06:00,0.048713,1.546277,9.807136
3,gzj,2024-11-05 09:07:00,0.623564,2.571579,9.362404
4,gzj,2024-11-05 09:08:00,0.259619,2.536268,9.469775
...,...,...,...,...,...
1374,hm,2024-11-05 22:44:00,0.148401,4.977747,8.069240
1375,hm,2024-11-05 22:45:00,0.205018,4.762940,8.229225
1376,hm,2024-11-05 22:46:00,0.261635,4.548134,8.389209
1377,hm,2024-11-05 22:47:00,1.967541,4.924504,8.170405
