# Retail Data Modeling: Star and Snowflake Schemas

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from src.data_generator import generate_products, generate_customers, generate_time, generate_sales, save_dataframes
from src.star_schema import apply_scd_type1, apply_scd_type2, create_star_schema
from src.snowflake_schema import create_snowflake_schema

ModuleNotFoundError: No module named 'src'

In [None]:
# Generate mock data
products = generate_products(10)
customers = generate_customers(20)
time = generate_time()
sales = generate_sales(products, customers, time, 100)
save_dataframes(products, customers, time, sales)

In [None]:
# Apply SCD Type 1
customers_scd1 = apply_scd_type1(customers.copy(), customer_id=1, new_address='123 New St')

In [None]:
# Apply SCD Type 2
customers_scd2 = apply_scd_type2(customers.copy(), customer_id=2, new_address='456 Updated Rd', effective_date=pd.to_datetime('2024-06-01'))

In [None]:
# Create schemas
create_star_schema()
create_snowflake_schema()

In [None]:
# Visualize sales by category (Star Schema)
star_sales = pd.read_csv('data/processed/star/fact_sales.csv')
star_products = pd.read_csv('data/processed/star/dim_products.csv')
sales_by_category = star_sales.merge(star_products, on='product_id').groupby('category')['total_amount'].sum()
sales_by_category.plot(kind='bar', title='Sales by Category (Star Schema)')
plt.show()

In [1]:
# Visualize sales by month (Snowflake Schema)
snow_sales = pd.read_csv('data/processed/snowflake/fact_sales.csv')
snow_time = pd.read_csv('data/processed/snowflake/dim_time.csv')
snow_months = pd.read_csv('data/processed/snowflake/dim_months.csv')
sales_by_month = snow_sales.merge(snow_time, on='date_id').merge(snow_months, on='month_id')
sales_by_month = sales_by_month.groupby(['year', 'month'])['total_amount'].sum()
sales_by_month.plot(kind='line', title='Sales by Month (Snowflake Schema)')
plt.show()

NameError: name 'pd' is not defined