# Clean E-commerce Data

This notebook performs data cleaning operations on the raw data files.

## Cleaning Operations
- Type conversions
- Data validation
- Removal of invalid records


In [None]:
import pandas as pd
import os

# Create data/processed directory if it doesn't exist
os.makedirs("data/processed", exist_ok=True)


## Clean fact_orders


In [None]:
orders = pd.read_csv("data/raw/orders.csv")

orders["order_date"] = pd.to_datetime(orders["order_date"])
orders = orders[orders["total_amount"] > 0]

orders.to_csv("data/processed/orders.csv", index=False)
print(f"Cleaned {len(orders)} orders")
orders.head()


## Clean dim_customers


In [None]:
customers = pd.read_csv("data/raw/customers.csv")

customers["signup_date"] = pd.to_datetime(customers["signup_date"])

customers.to_csv("data/processed/customers.csv", index=False)
print(f"Cleaned {len(customers)} customers")
customers.head()


## Clean fact_deliveries


In [None]:
deliveries = pd.read_csv("data/raw/deliveries.csv")

deliveries["promised_delivery_date"] = pd.to_datetime(deliveries["promised_delivery_date"])
deliveries["actual_delivery_date"] = pd.to_datetime(deliveries["actual_delivery_date"])

deliveries.to_csv("data/processed/deliveries.csv", index=False)
print(f"Cleaned {len(deliveries)} deliveries")
deliveries.head()


## Summary

All cleaned data files have been saved to `data/processed/`:
- orders.csv
- customers.csv
- deliveries.csv
