In [None]:
num_dates = "50"
num_devices = "4"
num_cases = "6"
num_files = "5"
num_rows = "10000"

In [None]:
!pip install faker

In [None]:
import copy
import itertools
import json
import sys
from datetime import datetime, date
from functools import partial
from itertools import chain
from pathlib import Path
from datetime import datetime, timedelta
from faker import Faker
from faker.providers import BaseProvider
from random import randint
from dateutil.relativedelta import relativedelta

In [None]:
project_name = 'demo-notebooks-idl'
hmd_dir = Path('/') / 'hmd'
logs_dir = hmd_dir / 'data' / 'raw'
project_dir = hmd_dir / 'projects' / project_name
idl_dir = project_dir / 'src' / 'idl'

In [None]:
fake = Faker()
localized = False

def _rand_int(base):
    bottom = base - 2
    top = base + 2
    if base == 0:
        bottom = base
        top = base + 4
    elif base == 1:
        bottom = base - 1
        top = base + 3
    return randint(bottom, top)

def _rand_float(min, max, base):
    bottom = base - 2
    top = base + 2
    if bottom < min:
        bottom = min
        top = min + 4
    elif top > max:
        bottom = max - 4
        top = max
    return fake.pyfloat(right_digits=2, min_value=bottom, max_value=top)


rand_pitch = partial(_rand_float, -90.0, 90.0)
rand_roll = partial(_rand_float, -90.0, 90.0)
rand_yaw = partial(_rand_float, -90.0, 90.0)


class RotationProvider(BaseProvider):
    last = {"pitch": 0.0, "roll": 0.0, "yaw": 0.0}

    def rotation(self) -> str:
        self.last = {
            "pitch": rand_pitch(self.last["pitch"]),
            "roll": rand_roll(self.last["roll"]),
            "yaw": rand_yaw(self.last["yaw"]),
        }

        return self.last

class TimestampRollingProvider(BaseProvider):
    last: datetime = fake.past_datetime()

    def set_base_timestamp(self, x):
        self.last = x

    def timestamp_rolling(self) -> str:
        self.last += timedelta(milliseconds=randint(1, 100))
        return self.last.isoformat()

class PositionProvider(BaseProvider):
    last = {"x": 0, "y": 0, "z": 0}

    def position(self) -> str:
        self.last = {
            "x": _rand_int(self.last["x"]),
            "y": _rand_int(self.last["y"]),
            "z": _rand_int(self.last["z"]),
        }

        return self.last

def pluck(key, dictionary, default=None):
    return dictionary.get(key, default)

timestamp_getter = partial(pluck, "timestamp")
fake = Faker()

fake.add_provider(PositionProvider)
fake.add_provider(RotationProvider)
fake.add_provider(TimestampRollingProvider)

print('faker providers defined')

In [None]:
def gen_telem_data(case_num_str, serial_number):
    telem_data_str = fake.json(
        data_columns={
            "case": f"@{case_num_str}",
            "serial_number": f"@{serial_number}",
            "timestamp": "timestamp_rolling",
            "type": "@telemetry",
            "position": "position",
            "rotation": "rotation",
        },
        num_rows=num_rows_int,
    )
    return json.loads(telem_data_str)

def gen_event_data(case_num_str, serial_number):
    event_data_str = fake.json(
        data_columns=[
            ("case", f"@{case_num_str}"),
            ("serial_number", f"@{serial_number}"),
            ("timestamp", "timestamp_rolling"),
            ("type", "@event"),
            (
                "detail",
                (
                    (
                        "code",
                        "random_element",
                        {
                            "elements": (
                                100,
                                104,
                                107,
                                109,
                                110,
                                111,
                                112,
                                200,
                                250,
                                400,
                            )
                        },
                    ),
                    ("message", "text"),
                ),
            ),
        ],
        num_rows=num_rows_int,
    )
    return json.loads(event_data_str)
print('data generators defined')

In [None]:
num_files_int = int(num_files)
num_rows_int = int(num_rows)
num_cases_int = int(num_cases)
num_dates_int = int(num_dates)
num_devices_int = int(num_devices)
total_num = num_dates_int * num_devices_int * num_cases_int * num_files_int

current_num = 1
first_day = date.today() - relativedelta(months=3)
for date_num in range(1, num_dates_int + 1):
    base_dt = fake.past_datetime(first_day)
    day = base_dt.strftime("%Y-%m-%d")
    for device_num in range(1, num_devices_int + 1):
        new_dt = fake.past_datetime(first_day)
        base_dt = datetime(base_dt.year, base_dt.month, base_dt.day, new_dt.hour, new_dt.minute, new_dt.second)

        day = base_dt.strftime("%Y-%m-%d")
        serial_number = str(fake.random_element([1001, 1004, 2205, 4423, 5555]))
        for case_num in range(1, num_cases_int + 1):
            case_num_str = f"case_{base_dt.strftime('%Y%m%d_%H%M%S')}"
            for file_num in range(1, num_files_int + 1):
                fake.set_base_timestamp(base_dt)
                telem_data = gen_telem_data(case_num_str, serial_number)

                fake.set_base_timestamp(base_dt)
                event_data = gen_event_data(case_num_str, serial_number)

                data = sorted(chain(telem_data, event_data), key=timestamp_getter)
                log_lines = map(lambda x: f"{json.dumps(x)}\n", data)
                
                file_path = logs_dir / day / serial_number / case_num_str / f"{base_dt.isoformat()}.demo.json"
                file_path.parent.mkdir(exist_ok=True, parents=True)


                print(f"{current_num}/{total_num}".rjust(len(str(total_num)) * 2 + 1), str(file_path))
                with file_path.open("w") as file_handle:
                    file_handle.writelines(log_lines)
                current_num += 1