In [1]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import numpy as np

In [2]:
import os

trans_data = {
    'geographical_location_oid': [i+1 for i in range(100)],
    'video_camera_oid': np.random.choice(range(1, 100), size=100, replace=True),
    'detection_oid': [i for i in range(100)],
    'item_name': ['item_' + str(i) for i in range(100)],
    'timestamp_detected': np.random.randint(1609459200, 1640995200, size=100)  # Random timestamps in 2021
}

df = pd.DataFrame(trans_data)

schema = pa.schema([
    ('geographical_location_oid', pa.int64()),
    ('video_camera_oid', pa.int64()),
    ('detection_oid', pa.int64()),
    ('item_name', pa.string()),
    ('timestamp_detected', pa.int64())
])

table = pa.Table.from_pandas(df, schema=schema)
display(table)
os.makedirs('./data', exist_ok=True)
pq.write_table(table, './data/trans.parquet')


pyarrow.Table
geographical_location_oid: int64
video_camera_oid: int64
detection_oid: int64
item_name: string
timestamp_detected: int64
----
geographical_location_oid: [[1,2,3,4,5,...,96,97,98,99,100]]
video_camera_oid: [[21,86,82,38,73,...,34,24,87,45,50]]
detection_oid: [[0,1,2,3,4,...,95,96,97,98,99]]
item_name: [["item_0","item_1","item_2","item_3","item_4",...,"item_95","item_96","item_97","item_98","item_99"]]
timestamp_detected: [[1638409500,1638955689,1624946747,1612021762,1630675440,...,1631977510,1632600580,1624492628,1639167816,1613540319]]

In [7]:
sample_data = {
    'geographical_location_oid': [i+1 for i in range(100)],
    'geographical_location': ['location_' + str(i) for i in range(100)]
}

df = pd.DataFrame(sample_data)

schema = pa.schema([
    ('geographical_location_oid', pa.int64()),
    ('geographical_location', pa.string()),
])

table = pa.Table.from_pandas(df, schema=schema)
display(table)
pq.write_table(table, './data/ref_table.parquet')


pyarrow.Table
geographical_location_oid: int64
geographical_location: string
----
geographical_location_oid: [[1,2,3,4,5,...,96,97,98,99,100]]
geographical_location: [["location_0","location_1","location_2","location_3","location_4",...,"location_95","location_96","location_97","location_98","location_99"]]