In [14]:
!pip -q install minio pandas pyarrow

In [15]:
from minio import Minio
from minio.error import S3Error

In [16]:
MINIO_ENDPOINT = "localhost:9000"
MINIO_ACCESS_KEY = "minioadmin"
MINIO_SECRET_KEY = "minioadmin123"
MINIO_SECURE = False

In [17]:
client = Minio(
    MINIO_ENDPOINT,
    access_key=MINIO_ACCESS_KEY,
    secret_key=MINIO_SECRET_KEY,
    secure=MINIO_SECURE,
)

In [18]:
try:
    buckets = client.list_buckets()
    print("Connected. Buckets:", [b.name for b in buckets])
except S3Error as e:
    print("S3Error:", e)

Connected. Buckets: ['hw-satantsev']


In [19]:
bucket_name = "hw-satantsev"

if not client.bucket_exists(bucket_name):
    client.make_bucket(bucket_name)
    print("Bucket created:", bucket_name)
else:
    print("Bucket already exists:", bucket_name)


Bucket already exists: hw-satantsev


In [20]:
import pandas as pd

df = pd.DataFrame(
    {
        "id": [1, 2, 3],
        "name": ["Alice", "Bob", "Charlie"],
        "score": [95.5, 88.0, 91.2],
    }
)

df

Unnamed: 0,id,name,score
0,1,Alice,95.5
1,2,Bob,88.0
2,3,Charlie,91.2


In [21]:
from io import BytesIO

object_name_csv = "data/sample.csv"

csv_bytes = df.to_csv(index=False).encode("utf-8")
csv_buffer = BytesIO(csv_bytes)

client.put_object(
    bucket_name=bucket_name,
    object_name=object_name_csv,
    data=csv_buffer,
    length=len(csv_bytes),
    content_type="text/csv",
)

print("Uploaded:", f"s3://{bucket_name}/{object_name_csv}")


Uploaded: s3://hw-satantsev/data/sample.csv


In [22]:
resp = client.get_object(bucket_name, object_name_csv)
try:
    downloaded = resp.read()
finally:
    resp.close()
    resp.release_conn()

df_back = pd.read_csv(BytesIO(downloaded))
df_back


Unnamed: 0,id,name,score
0,1,Alice,95.5
1,2,Bob,88.0
2,3,Charlie,91.2


In [23]:
object_name_parquet = "data/sample.parquet"

parquet_buffer = BytesIO()
df.to_parquet(parquet_buffer, index=False)
parquet_bytes = parquet_buffer.getvalue()

client.put_object(
    bucket_name=bucket_name,
    object_name=object_name_parquet,
    data=BytesIO(parquet_bytes),
    length=len(parquet_bytes),
    content_type="application/octet-stream",
)

print("Uploaded:", f"s3://{bucket_name}/{object_name_parquet}")


Uploaded: s3://hw-satantsev/data/sample.parquet


In [24]:
resp = client.get_object(bucket_name, object_name_parquet)
try:
    downloaded = resp.read()
finally:
    resp.close()
    resp.release_conn()

df_parquet_back = pd.read_parquet(BytesIO(downloaded))
df_parquet_back


Unnamed: 0,id,name,score
0,1,Alice,95.5
1,2,Bob,88.0
2,3,Charlie,91.2
