In [10]:
from itertools import islice


def batched(iterable, n):
    # batched('ABCDEFG', 3) --> ABC DEF G
    if n < 1:
        raise ValueError("n must be at least one")
    it = iter(iterable)
    while batch := tuple(islice(it, n)):
        yield batch

In [12]:
# Python code to
# demonstrate readlines()
from itertools import islice
from collections import deque

L = ["L{}\n".format(i) for i in range(0, 10)]

# writing to file
file1 = open(file="./myfile.txt", mode="w")
file1.writelines(L)
file1.close()

# Using readlines()
with open("myfile.txt", "r") as file1:
    while True:
        r1: deque[str] = deque(islice(file1, 3))
        if not r1:
            break
        print(r1)

deque(['L0\n', 'L1\n', 'L2\n'])
deque(['L3\n', 'L4\n', 'L5\n'])
deque(['L6\n', 'L7\n', 'L8\n'])
deque(['L9\n'])


In [1]:
l1: list[str] = [1, 2, 3, 4, 5, 100, 120]
l2: list[str] = [2, 3, 4, 5, 6]

l2 = l2 - l1
print(l2)

TypeError: unsupported operand type(s) for -: 'list' and 'list'

In [1]:
INPUT_PATH: str = "/home/python/host/data/"

In [2]:
import pandas as pd
import os


files: list[str] = os.listdir(path=INPUT_PATH)
files = [f for f in files if f.endswith(".zip")]

sensor_names: list[str] = [name.split("_")[0] for name in files]
print(sensor_names)

['MOS2E03230475']


In [6]:
from utils import standardize_dataframe

for f in files:
    full_data: pd.DataFrame = pd.read_csv(
        filepath_or_buffer="{}/{}".format(INPUT_PATH, f),
    )

    # Drop duplicated rows.
    full_data.drop_duplicates(inplace=True)

    # Sort values based on timestamps.
    full_data.sort_values(
        by=["timestamps"],
        ascending=True,
        inplace=True,
    )
    full_data = full_data.reset_index(drop=True)

    # Convert numerical values (input data) to float types.
    full_data_export: pd.DataFrame = full_data.copy()
    full_data.iloc[:, 5:] = full_data.iloc[:, 5:].astype("float")

    # Keep only the timestamp and the data columns.
    timestamp_col_index: int = full_data.columns.get_loc("timestamps")
    kept_cols: list[str] = full_data.columns[timestamp_col_index:]
    full_data = full_data[kept_cols]

    # Standardize data.


full_data.head()

Unnamed: 0,serial_number,nickname,animalID,sample_rate,timestamps,acc_axis1,acc_axis2,acc_axis3,acc_mag,lpf_axis1,...,dis_axis1_denoised,dis_axis2_denoised,dis_axis3_denoised,dis_mag_denoised,tilt_axis1,tilt_axis2,tilt_axis3,tilt_axis1_denoised,tilt_axis2_denoised,tilt_axis3_denoised
0,MOS2E03230475,,,30,2023-04-14 18:01:01.000,0.645,0.309,0.715,1.011302,0.636648,...,1874.156185,1694.906462,192.005856,2534.173547,39.120814,18.021817,44.646077,39.105397,18.027755,44.634478
1,MOS2E03230475,,,30,2023-04-14 18:01:01.034,0.633,0.309,0.719,1.006544,0.636649,...,1875.197894,1695.848197,192.113417,2535.581948,39.120815,18.021889,44.64612,39.105401,18.027766,44.634488
2,MOS2E03230475,,,30,2023-04-14 18:01:01.067,0.629,0.313,0.715,1.002415,0.63665,...,1876.239602,1696.789933,192.220978,2536.990348,39.120819,18.021961,44.646166,39.105405,18.027776,44.634498
3,MOS2E03230475,,,30,2023-04-14 18:01:01.100,0.633,0.309,0.715,1.003691,0.636651,...,1877.281309,1697.731668,192.328538,2538.398747,39.12082,18.022032,44.64621,39.105409,18.027787,44.634509
4,MOS2E03230475,,,30,2023-04-14 18:01:01.134,0.633,0.309,0.719,1.006544,0.636651,...,1878.323016,1698.673403,192.436099,2539.807146,39.120823,18.022104,44.646254,39.105413,18.027798,44.634519
