In [0]:
# 00_generate_prices: Daily Price Generator

import random
from datetime import datetime
import pandas as pd

# -----------------------------
# CONFIG
# -----------------------------
# S3 bucket path (replace with your bucket name)
S3_BUCKET = "s3://price-inflator-tracker/bronze/prices"

CURRENCY = "NPR"
SOURCE = "MockMarket"
LOCATION = "Kathmandu"

ITEMS = [
    {"item": "Petrol", "category": "Fuel", "base_price": 180},
    {"item": "Diesel", "category": "Fuel", "base_price": 165},
    {"item": "Rice", "category": "Grocery", "base_price": 70},
    {"item": "Cooking Oil", "category": "Grocery", "base_price": 250},
    {"item": "Milk", "category": "Grocery", "base_price": 95}
]

# -----------------------------
# GENERATE TODAY'S DATA
# -----------------------------
today = datetime.today().strftime("%Y-%m-%d")
folder_path = f"{S3_BUCKET}/date={today}/"

rows = []

for item in ITEMS:
    # simulate small daily price fluctuation
    price_change = random.uniform(-2.5, 2.5)
    price = round(item["base_price"] + price_change, 2)
    
    rows.append({
        "date": today,
        "item": item["item"],
        "category": item["category"],
        "price": price,
        "currency": CURRENCY,
        "source": SOURCE,
        "location": LOCATION
    })

df = pd.DataFrame(rows)

# -----------------------------
# WRITE TO S3 USING SPARK
# -----------------------------
import pyspark.sql
spark_df = spark.createDataFrame(df)
s3_output_path = f"{folder_path}prices_csv_output"
spark_df.write.mode("overwrite").csv(s3_output_path, header=True)

print(f"âœ… Daily prices generated and written to: {s3_output_path}")
display(df)
