# Product Dimension & Transaction Bridge Modeling

## Objective
This notebook builds a normalized product dimension (`dim_products`)
and a transaction–product bridge table from retail basket data.


In [None]:
import pandas as pd
import ast

# LOAD DATA

input_path = r"C:\Users\Envy\Desktop\pro\data\processed\cleaned_retail.csv"
df = pd.read_csv(input_path)

df.rename(columns={"Product": "products"}, inplace=True)

def parse_products(x):
    try:
        return list(set(ast.literal_eval(x)))  # REMOVE DUPLICATES HERE
    except:
        return []

df["products"] = df["products"].apply(parse_products)


# CREATE dim_products

all_products = sorted({p for items in df["products"] for p in items})

dim_products = pd.DataFrame({
    "product_id": range(1, len(all_products) + 1),
    "product_name": all_products
})

product_lookup = dict(zip(dim_products["product_name"], dim_products["product_id"]))


# CREATE BRIDGE TABLE

bridge_rows = []

for _, row in df.iterrows():
    for product in row["products"]:
        bridge_rows.append({
            "transaction_id": row["Transaction_ID"],
            "product_id": product_lookup[product]
        })

fact_transaction_products = pd.DataFrame(bridge_rows)

# FINAL SAFETY DEDUP
fact_transaction_products = fact_transaction_products.drop_duplicates()


# SAVE FILES

output_dir = r"C:\ProgramData\MySQL\MySQL Server 8.0\Uploads"

dim_products[["product_name"]].to_csv(
    f"{output_dir}\\dim_products.csv", index=False
)

fact_transaction_products.to_csv(
    f"{output_dir}\\fact_transaction_products.csv", index=False
)

print("✅ Product dimension & bridge table regenerated (duplicates removed)")


✅ Product dimension & bridge table regenerated (duplicates removed)
