In [5]:
import pandas as pd
import json
import os

In [6]:
origin_json = "202409PROD01JVMHEAP"
data_file_path =  os.path.join('..','data', 'taxone_infra_reqs_13092024_1500_1600_jvm_heap.json')

In [7]:

# with open(data_file_path) as f:
#     data = json.load(f)

data = pd.read_json(data_file_path)

times = data["data"]["attributes"]["times"]
values = data["data"]["attributes"]["values"][0]

times, values

([1726250400000,
  1726250700000,
  1726251000000,
  1726251300000,
  1726251600000,
  1726251900000,
  1726252200000,
  1726252500000,
  1726252800000,
  1726253100000,
  1726253400000,
  1726253700000],
 [36.978315,
  40.770189,
  43.723675,
  40.639361,
  36.641989,
  40.785003,
  43.060353,
  42.009965,
  40.181335,
  41.938788,
  36.945715,
  44.707987])

In [8]:
dataset = pd.DataFrame({
    'timestamp': times,
    'cpu_usage': values
})

dataset

Unnamed: 0,timestamp,cpu_usage
0,1726250400000,36.978315
1,1726250700000,40.770189
2,1726251000000,43.723675
3,1726251300000,40.639361
4,1726251600000,36.641989
5,1726251900000,40.785003
6,1726252200000,43.060353
7,1726252500000,42.009965
8,1726252800000,40.181335
9,1726253100000,41.938788


In [9]:
dataset['timestamp'] = pd.to_datetime(dataset['timestamp'], unit='ms')
dataset

Unnamed: 0,timestamp,cpu_usage
0,2024-09-13 18:00:00,36.978315
1,2024-09-13 18:05:00,40.770189
2,2024-09-13 18:10:00,43.723675
3,2024-09-13 18:15:00,40.639361
4,2024-09-13 18:20:00,36.641989
5,2024-09-13 18:25:00,40.785003
6,2024-09-13 18:30:00,43.060353
7,2024-09-13 18:35:00,42.009965
8,2024-09-13 18:40:00,40.181335
9,2024-09-13 18:45:00,41.938788


In [10]:
import time

def create_custom_fields(row):
    fields = ["timestamp", "cpu_usage"]
    field_types = {"timestamp": "datetime", "cpu_usage": "numeric"}

    custom_fields = []
    for field in fields:
        field_type = field_types.get(field, "string")
        field_value = row[field]

        if field_type == "datetime" and pd.notnull(field_value):
            field_value = field_value.strftime("%d/%m/%Y %H:%M:%S")

        custom_fields.append(
            {
                "FieldName": field,
                "FieldType": field_type,
                "FieldValue": field_value,
            }
        )

    return custom_fields


def generate_json_files_in_chunks(origin, df, chunk_size):
    num_chunks = (
        len(df) + chunk_size - 1
    ) // chunk_size  # Calcula o número de partes necessárias

    for i in range(num_chunks):
        chunk_df = df.iloc[i * chunk_size : (i + 1) * chunk_size]

        data = {
            "Origin": origin,
            "Data": [
                {
                    "Id": f"{origin}-{index:010}",
                    "Date": row["timestamp"].strftime("%d/%m/%Y %H:%M:%S"),
                    "quantity": 1,
                    "CustomFields": create_custom_fields(row),
                }
                for index, row in chunk_df.iterrows()
            ],
        }

        timestamp_file = time.strftime("%Y%m%d%H%M%S")
        chunk_filename = (
            f"../output/taxone_{origin}_comportamental_{timestamp_file}.json"
        )
        with open(chunk_filename, "w", encoding="utf-8") as json_file:
            json.dump(data, json_file, separators=(",", ":"), ensure_ascii=False)
        print(f"Arquivo {chunk_filename} gerado com sucesso.")


generate_json_files_in_chunks(
    origin_json, dataset, 100000
)  # Divide o JSON em partes de X registros cada

Arquivo ../output/taxone_202409PROD01JVMHEAP_comportamental_20240918102523.json gerado com sucesso.
