In [1]:
import pandas as pd

import numpy as np

def make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None):
    index = pd.date_range(start=start, end=end, freq=freq, name="timestamp")
    n = len(index)
    state = np.random.RandomState(seed)
    columns = {
        "name": state.choice(["Alice", "Bob", "Charlie"], size=n),
        "id": state.poisson(1000, size=n),
        "x": state.rand(n) * 2 - 1,
        "y": state.rand(n) * 2 - 1,
    }
    df = pd.DataFrame(columns, index=index, columns=sorted(columns))
    if df.index[-1] == end:
        df = df.iloc[:-1]
    return df

In [2]:
ts = make_timeseries(freq="30s", seed=0)

ts.to_parquet("timeseries.parquet")

ts = pd.read_parquet("timeseries.parquet")

In [3]:
ts

Unnamed: 0_level_0,id,name,x,y
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01 00:00:00,1041,Alice,0.889987,0.281011
2000-01-01 00:00:30,988,Bob,-0.455299,0.488153
2000-01-01 00:01:00,1018,Alice,0.096061,0.580473
2000-01-01 00:01:30,992,Bob,0.142482,0.041665
2000-01-01 00:02:00,960,Bob,-0.036235,0.802159
...,...,...,...,...
2000-12-30 23:58:00,1022,Alice,0.266191,0.875579
2000-12-30 23:58:30,974,Alice,-0.009826,0.413686
2000-12-30 23:59:00,1028,Charlie,0.307108,-0.656789
2000-12-30 23:59:30,1002,Alice,0.202602,0.541335


In [4]:
ts.memory_usage(deep=True)

Index     8409608
id        8409608
name     56766826
x         8409608
y         8409608
dtype: int64

In [5]:
ts2 = ts.copy()

ts2["name"] = ts2["name"].astype("category")

ts2.memory_usage(deep=True)

Index    8409608
id       8409608
name     1051471
x        8409608
y        8409608
dtype: int64

In [6]:
ts2["id"] = pd.to_numeric(ts2["id"], downcast="unsigned")

ts2[["x", "y"]] = ts2[["x", "y"]].apply(pd.to_numeric, downcast="float")

ts2.dtypes

id        uint16
name    category
x        float32
y        float32
dtype: object

In [7]:
ts2.memory_usage(deep=True)

Index    8409608
id       2102402
name     1051471
x        4204804
y        4204804
dtype: int64

In [11]:
reduction = ts2.memory_usage(deep=True).sum() / ts.memory_usage(deep=True).sum()

print(f"{reduction*100:0.2f}%")

22.09%
