In [None]:
import os
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import boto3

In [None]:
load_dotenv()

aws_key = os.getenv("ACCESS_KEY_ID")
aws_secret = os.getenv("SECRET_ACCESS_KEY")
region = os.getenv("REGION", "eu-north-1")

In [None]:
n = 1_000
df = pd.DataFrame({
    "experiment_id": np.random.choice(["expA", "expB", "expC"], size=n),
    "timestamp": pd.date_range("2025-01-01", periods=n, freq="min"),
    "intensity": np.random.random(size=n) * 100,
    "temperature": 20 + np.random.randn(n)
})

In [None]:
df.to_parquet(
    "scattering_data.parquet",
    engine="pyarrow",
    partition_cols=["experiment_id"],
    index=False
)

In [None]:
s3 = boto3.client(
    "s3",
    aws_access_key_id=aws_key,
    aws_secret_access_key=aws_secret,
    region_name=region
)
bucket = "scatterin-master-thesis"
prefix = "parquet-test/"

In [None]:
for root, _, files in os.walk("scattering_data.parquet"):
    for fname in files:
        local_path = os.path.join(root, fname)
        key = prefix + os.path.relpath(local_path, "scattering_data.parquet")
        s3.upload_file(local_path, bucket, key)

# Read

In [None]:
storage_opts = {
    "key":    aws_key,
    "secret": aws_secret,
    "client_kwargs": {"region_name": region}
}

df_intensity = pd.read_parquet(
    f"s3://{bucket}/{prefix}",
    columns=["intensity"],
    engine="pyarrow",
    storage_options=storage_opts
)

In [None]:
df_intensity.head()