In [1]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt

date_cols = {
    'olist_orders_dataset.csv': [
        'order_purchase_timestamp',
        'order_approved_at',
        'order_delivered_carrier_date',
        'order_delivered_customer_date',
        'order_estimated_delivery_date',
    ],
    'olist_order_items_dataset.csv': [
        'shipping_limit_date',
    ],
    'olist_order_reviews_dataset.csv': [
        'review_creation_date',
        'review_answer_timestamp',
    ],
    # The following datasets have NO date columns:
    # 'olist_customers_dataset.csv'
    # 'olist_geolocation_dataset.csv'
    # 'olist_order_payments_dataset.csv'
    # 'olist_products_dataset.csv'
    # 'olist_sellers_dataset.csv'
    # 'product_category_name_translation.csv'
}

def read_olist_csv(path):
    """
    Reads an Olist CSV and parses dates for the correct columns.
    Args:
        path (str): Path to the CSV file.
    Returns:
        pd.DataFrame: Loaded dataframe with date columns parsed as datetime.
    """
    # Extract just the filename, e.g., 'olist_orders_dataset.csv'
    filename = os.path.basename(path)
    # Get the correct date columns for this file, or an empty list
    parse_dates = date_cols.get(filename, [])
    # Read the CSV, parsing the specified date columns (if any)
    return pd.read_csv(path, parse_dates=parse_dates)

df_orders      = read_olist_csv('../data/olist_orders_dataset.csv')
df_customers   = read_olist_csv('../data/olist_customers_dataset.csv')
df_items       = read_olist_csv('../data/olist_order_items_dataset.csv')
df_payments    = read_olist_csv('../data/olist_order_payments_dataset.csv')
df_reviews     = read_olist_csv('../data/olist_order_reviews_dataset.csv')
df_products    = read_olist_csv('../data/olist_products_dataset.csv')
df_prod_cat_tr = read_olist_csv('../data/product_category_name_translation.csv')
df_sellers     = read_olist_csv('../data/olist_sellers_dataset.csv')
df_geo         = read_olist_csv('../data/olist_geolocation_dataset.csv')