In [1]:
import pandas as pd
from src.utils import load_config, save_df
from src.bronze_ingest import bronze_ingest
from src.silver_transform import silver_transform
from src.gold_enrich import gold_enrich
import random

# --- Function to generate sample furniture CSVs ---
def generate_bronze_data(n=100):

    furniture_types = ["Chair", "Sofa", "Table", "Desk", "Bed", "Cabinet", "Shelf", "Stool"]
    materials = ["Oak", "Pine", "Metal", "Glass", "Leather", "Fabric", "Walnut", "Bamboo"]
    adjectives = ["Elegant", "Modern", "Rustic", "Classic", "Compact", "Luxurious", "Stylish", "Durable"]
    features = ["comfortable", "space-saving", "ergonomic", "sturdy", "lightweight", "multi-functional", "easy to assemble", "adjustable"]

    def random_description():
        return f"{random.choice(adjectives)} {random.choice(materials)} {random.choice(furniture_types)} that is {random.choice(features)}."

    names = []
    description_a_list = []
    description_b_list = []

    for i in range(1, n + 1):
        name = f"{random.choice(adjectives)} {random.choice(furniture_types)} {i}"
        names.append(name)
        description_a_list.append(random_description())
        description_b_list.append(random_description())

    df_a = pd.DataFrame({
        "name": names,
        "description_a": description_a_list
    })
    df_b = pd.DataFrame({
        "name": names,
        "description_b": description_b_list
    })

    df_a.to_csv("data/bronze/products_api1.csv", index=False)
    df_b.to_csv("data/bronze/products_api2.csv", index=False)

    print("✅ Generated 100 furniture items in:")
    print("   - data/bronze/products_api1.csv")
    print("   - data/bronze/products_api2.csv")

def main():
    # Generate sample bronze data
    generate_bronze_data(100)
    cfg = load_config("config/settings.yaml")

    # --- Bronze ---
    bronze_df = bronze_ingest(
        api1_path=cfg["data_paths"]["api1_csv"],
        api2_path=cfg["data_paths"]["api2_csv"]
    )

    # --- Silver ---
    silver_df = silver_transform(bronze_df)
    save_df(silver_df, cfg["data_paths"]["silver_output"])
    print("✅ Silver layer saved at:", cfg["data_paths"]["silver_output"])

    # --- Gold ---
    gold_df = gold_enrich(
        df=silver_df,
        model=cfg["ollama"]["model"],
        system_prompt=cfg["ollama"]["system"],
        prompt_template=cfg["ollama"]["prompt"]
    )
    save_df(gold_df, cfg["data_paths"]["gold_output"])
    print("✅ Gold layer saved at:", cfg["data_paths"]["gold_output"])

if __name__ == "__main__":
    main()



✅ Generated 100 furniture items in:
   - data/bronze/products_api1.csv
   - data/bronze/products_api2.csv
✅ Silver layer saved at: data/silver/clean_products.csv


100%|██████████| 100/100 [53:26<00:00, 32.07s/it] 

✅ Gold layer saved at: data/gold/final_products.csv



