In [1]:
import pathlib
import pickle 
import time
from typing import Union


import numpy as np
import pandas as pd

In [72]:
def pretty_time(sec: int) -> str:
    return time.strftime("%H:%M:%S", time.gmtime(sec))


def make_df(data: np.ndarray, cols: list[str]) -> pd.DataFrame:
    return pd.DataFrame(data, columns=cols)


def generate_time_intervals(
    series: pd.Series, rolling_window: int = 5, threshold: int = 2, pretty: bool = True
) -> list[Union[tuple[str, str], tuple[int, int]]]:
    assert rolling_window >= threshold, "rolling_window must be greater than threshold"

    return (
        series.rolling(rolling_window, center=True)
        .sum()[lambda x: x >= threshold]
        .index.to_series()
        .pipe(
            lambda ser_: ser_.groupby(ser_.diff().ne(1).cumsum()).agg(["first", "last"])
        )
        .loc[lambda df_: (df_["last"] - df_["first"]) >= rolling_window]
        .apply(
            {
                "first": lambda x: max(0, x - rolling_window - threshold),
                "last": lambda x: max(0, x + rolling_window + threshold),
            }
        )
        .applymap(pretty_time if pretty else lambda x: x)
        .apply(tuple, 1)
        .tolist()
    )


def generate_json(
    file: pathlib.Path,
    data: np.ndarray,
    cols: list[str],
    rolling_window: int = 5,
    threshold: int = 2,
    pretty: bool = True,
) -> None:
    (
        make_df(data, cols)
        .apply(
            lambda ser: generate_time_intervals(ser, rolling_window, threshold, pretty)
        )
        .to_json(file, indent=4),
    )


In [3]:
json_file = pathlib.Path('/home/mahyar/test/result_new.json')
data_file = np.load('/home/mahyar/test/arr.np')
with open('/home/mahyar/test/PICS.PICKLE', 'rb') as f:
    pics = pickle.load(f)
cols = list(pics.keys())

In [22]:
generate_json(json_file, data_file, cols, pretty=True)

In [73]:
generate_time_intervals(make_df(data_file, cols).iloc[:, 1])

[('00:04:53', '00:05:25'),
 ('00:05:19', '00:05:43'),
 ('00:08:59', '00:09:18'),
 ('00:16:55', '00:17:16'),
 ('00:21:28', '00:21:47'),
 ('00:34:13', '00:34:51'),
 ('00:37:05', '00:37:27'),
 ('00:37:35', '00:37:56'),
 ('00:42:28', '00:42:51'),
 ('00:47:11', '00:47:31'),
 ('00:47:22', '00:47:41')]

In [60]:
make_df(data_file, cols).iloc[:, 1].loc[lambda x: x][:50].rolling(5).sum()

22      NaN
301     NaN
302     NaN
303     NaN
304     5.0
305     5.0
306     5.0
307     5.0
308     5.0
309     5.0
310     5.0
311     5.0
314     5.0
315     5.0
316     5.0
317     5.0
327     5.0
328     5.0
330     5.0
333     5.0
334     5.0
336     5.0
378     5.0
379     5.0
547     5.0
548     5.0
549     5.0
550     5.0
559     5.0
560     5.0
561     5.0
566     5.0
698     5.0
798     5.0
910     5.0
1023    5.0
1024    5.0
1025    5.0
1026    5.0
1027    5.0
1028    5.0
1204    5.0
1230    5.0
1295    5.0
1297    5.0
1298    5.0
1299    5.0
1757    5.0
1767    5.0
1768    5.0
Name: Hoseyn Alaei, dtype: float64