In [0]:
import pandas as pd
from datetime import datetime
import os


In [0]:
# Paths
SOURCE_PATH = "../Datasets/products.csv"
TARGET_DIR = "../Bronze"
TARGET_FILE = f"{TARGET_DIR}/products.csv"

os.makedirs(TARGET_DIR, exist_ok=True)

In [0]:
# Read source
df = pd.read_csv(SOURCE_PATH)

In [0]:
# Validation
required_columns = [
    "product_id",
    "brand",
    "title",
    "price",
    "category",
    "rating",
    "image_url",
    "product_url"
]

missing_cols = set(required_columns) - set(df.columns)
if missing_cols:
    raise ValueError(f"Missing columns: {missing_cols}")

In [0]:
# Cleaning
df["brand"] = df["brand"].str.lower()
df["title"] = df["title"].str.lower()
df["category"] = df["category"].str.lower()


In [0]:
# Metadata
df["ingestion_timestamp"] = datetime.now()
df["source_system"] = "products_csv"
df.head()


In [0]:
# Write to bronze
df.to_csv(TARGET_FILE, index=False)

print("Products ingestion completed successfully.")