In [1]:
import pandas as pd
from minio import Minio
from io import BytesIO
import os
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")

# Load data inference from MinIO

In [2]:
env_path = os.path.join("..", ".env")
load_dotenv(env_path)

# Create MinIO client
client = Minio(
    f"{os.getenv('minio_host')}:{os.getenv('minio_port')}",
    access_key=os.getenv("access_key"),
    secret_key=os.getenv("secret_key"),
    secure=False
)

In [3]:
# Load from gold bucket
def load_from_minio(bucket_name: str) -> pd.DataFrame:
    objects = list(client.list_objects(bucket_name))

    if not objects:
        print(f"No objects found in bucket '{bucket_name}'.")
        return None
    
    # Get latest file
    latest = sorted(objects, key=lambda x: x.last_modified, reverse=True)[0]

    # Read data
    response = client.get_object(bucket_name, latest.object_name)
    df = pd.read_parquet(BytesIO(response.read()))

    # Close response
    response.close()
    response.release_conn()

    print(f"loaded {latest.object_name} from bucket '{bucket_name}'")
    return df

In [4]:
df = load_from_minio("gold")
df.head(10)

loaded sales_feature_20260106_222814.parquet from bucket 'gold'


Unnamed: 0,distributor,channel,sub_channel,city,product_name,product_class,sales_team,year,month,total_quantity,total_sales,avg_price,rolling_avg_3m_sales,sales_growth_pct
0,Carter-Conn,Hospital,Government,Bielsk Podlaski,Alisteride Pemidizem,Antiseptics,Delta,2018,April,9.0,315.0,35.0,315.0,0.0
1,Carter-Conn,Hospital,Government,Grodzisk Mazowiecki,Metapatch,Antipiretics,Alfa,2018,March,10.0,4380.0,438.0,4380.0,0.0
2,Carter-Conn,Hospital,Government,Głuchołazy,Solasteride,Mood Stabilizers,Bravo,2018,March,10.0,5200.0,520.0,5200.0,0.0
3,Carter-Conn,Hospital,Government,Jarosław,Dexacilin Triline,Analgesics,Delta,2018,April,5.0,645.0,129.0,645.0,0.0
4,Carter-Conn,Hospital,Government,Jaworzno,Alisteride Pemidizem,Antiseptics,Charlie,2018,March,48.0,1680.0,35.0,1680.0,0.0
5,Carter-Conn,Hospital,Government,Jaworzno,Tesnorphine Silotamine,Antibiotics,Alfa,2018,April,15.0,4785.0,319.0,4785.0,0.0
6,Carter-Conn,Hospital,Government,Mikołów,Tiaracil,Antipiretics,Charlie,2018,March,79.0,12798.0,162.0,12798.0,0.0
7,Carter-Conn,Hospital,Government,Szczytno,Aclonuma,Antimalarial,Delta,2018,January,1500.0,711000.0,474.0,711000.0,0.0
8,Carter-Conn,Hospital,Government,Ziębice,Albuterenone,Antimalarial,Alfa,2018,March,90.0,7380.0,82.0,7380.0,0.0
9,Carter-Conn,Hospital,Government,Ziębice,Eproline,Antibiotics,Charlie,2018,March,10.0,7800.0,780.0,7800.0,0.0


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot distribution of total_sales and rolling_avg_3m_sales
plt.figure(figsize=(12, 6))
sns.barplot(x=df.index, y=df['total_sales'], label='Total Sales', fill=True, alpha=0.5)
plt.title('Distribution of Total Sales and 3-Month Rolling Average Sales')
plt.xlabel('Sales Amount')
plt.ylabel('Density')
plt.legend()
plt.show()