In [None]:
num_rows = "10000"
num_files = "40"

In [None]:
!pip install faker

In [None]:
import itertools
import json
from datetime import datetime
from functools import partial
from itertools import chain
from pathlib import Path
from datetime import datetime, timedelta
from faker import Faker
from faker.providers import BaseProvider
from random import randint

In [None]:
logs_dir = Path('/') / 'hmd' / 'data' / 'raw'

In [None]:
fake = Faker()
localized = False

def _rand_int(base):
    bottom = base - 2
    top = base + 2
    if base == 0:
        bottom = base
        top = base + 4
    elif base == 1:
        bottom = base - 1
        top = base + 3
    return randint(bottom, top)

def _rand_float(min, max, base):
    bottom = base - 2
    top = base + 2
    if bottom < min:
        bottom = min
        top = min + 4
    elif top > max:
        bottom = max - 4
        top = max
    return fake.pyfloat(right_digits=2, min_value=bottom, max_value=top)


rand_pitch = partial(_rand_float, -90.0, 90.0)
rand_roll = partial(_rand_float, -90.0, 90.0)
rand_yaw = partial(_rand_float, -90.0, 90.0)


class RotationProvider(BaseProvider):
    last = {"pitch": 0.0, "roll": 0.0, "yaw": 0.0}

    def rotation(self) -> str:
        self.last = {
            "pitch": rand_pitch(self.last["pitch"]),
            "roll": rand_roll(self.last["roll"]),
            "yaw": rand_yaw(self.last["yaw"]),
        }

        return self.last

class TimestampRollingProvider(BaseProvider):
    last: datetime = fake.past_datetime()

    def set_base_timestamp(self, x):
        self.last = x

    def timestamp_rolling(self) -> str:
        self.last += timedelta(milliseconds=randint(1, 100))
        return self.last.isoformat()

class PositionProvider(BaseProvider):
    last = {"x": 0, "y": 0, "z": 0}

    def position(self) -> str:
        self.last = {
            "x": _rand_int(self.last["x"]),
            "y": _rand_int(self.last["y"]),
            "z": _rand_int(self.last["z"]),
        }

        return self.last

def pluck(key, dictionary, default=None):
    return dictionary.get(key, default)

timestamp_getter = partial(pluck, "timestamp")
fake = Faker()

fake.add_provider(PositionProvider)
fake.add_provider(RotationProvider)
fake.add_provider(TimestampRollingProvider)

print('faker providers defined')

In [None]:
num_files_int = int(num_files)
num_rows_int = int(num_rows)
for i in range(0, num_files_int):
    base_dt = fake.past_datetime()
    fake.set_base_timestamp(base_dt)
    telem_data_str = fake.json(
        data_columns={
            "timestamp": "timestamp_rolling",
            "type": "@telemetry",
            "position": "position",
            "rotation": "rotation",
        },
        num_rows=num_rows_int,
    )
    telem_data = json.loads(telem_data_str)

    fake.set_base_timestamp(base_dt)
    event_data_str = fake.json(
        data_columns=[
            ("timestamp", "timestamp_rolling"),
            ("type", "@event"),
            (
                "detail",
                (
                    (
                        "code",
                        "random_element",
                        {
                            "elements": (
                                100,
                                104,
                                107,
                                109,
                                110,
                                111,
                                112,
                                200,
                                250,
                                400,
                            )
                        },
                    ),
                    ("message", "text"),
                ),
            ),
        ],
        num_rows=num_rows_int,
    )
    event_data = json.loads(event_data_str)

    data = sorted(chain(telem_data, event_data), key=timestamp_getter)
    log_lines = map(lambda x: f"{json.dumps(x)}\n", data)
    now = datetime.now()
    day = base_dt.strftime("%Y-%m-%d")
    file_path = logs_dir / day / f"{now.isoformat()}.demo.json"
    file_path.parent.mkdir(exist_ok=True, parents=True)

    print(f"{i+1}/{num_files}".rjust(len(str(num_files)) * 2 + 1), str(file_path))
    with file_path.open("w") as file_handle:
        file_handle.writelines(log_lines)