# Transform Block (Mage) â€” Star Schema Tables

Rewritten `transform.py` as a Jupyter notebook cell, preserving the same output dictionary keys and dict-orientation.

In [None]:
import pandas as pd

# Mage imports (only used when running inside Mage)
if 'transformer' not in globals():
    from mage_ai.data_preparation.decorators import transformer

if 'test' not in globals():
    from mage_ai.data_preparation.decorators import test

RATE_CODE_MAP = {
    1: "Standard rate",
    2: "JFK",
    3: "Newark",
    4: "Nassau or Westchester",
    5: "Negotiated fare",
    6: "Group ride",
}

PAYMENT_TYPE_MAP = {
    1: "Credit card",
    2: "Cash",
    3: "No charge",
    4: "Dispute",
    5: "Unknown",
    6: "Voided trip",
}

def build_dimension(df: pd.DataFrame, columns: list[str], id_column_name: str) -> pd.DataFrame:
    dim = df[columns].drop_duplicates().reset_index(drop=True)
    dim[id_column_name] = dim.index
    return dim

@transformer
def transform(df: pd.DataFrame, *args, **kwargs):
    # Convert datetime columns
    df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"])
    df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"])

    # DATETIME DIMENSION
    datetime_dim = build_dimension(
        df,
        ["tpep_pickup_datetime", "tpep_dropoff_datetime"],
        "datetime_id",
    )

    datetime_dim["pick_hour"] = datetime_dim["tpep_pickup_datetime"].dt.hour
    datetime_dim["pick_day"] = datetime_dim["tpep_pickup_datetime"].dt.day
    datetime_dim["pick_month"] = datetime_dim["tpep_pickup_datetime"].dt.month
    datetime_dim["pick_year"] = datetime_dim["tpep_pickup_datetime"].dt.year
    datetime_dim["pick_weekday"] = datetime_dim["tpep_pickup_datetime"].dt.weekday

    datetime_dim["drop_hour"] = datetime_dim["tpep_dropoff_datetime"].dt.hour
    datetime_dim["drop_day"] = datetime_dim["tpep_dropoff_datetime"].dt.day
    datetime_dim["drop_month"] = datetime_dim["tpep_dropoff_datetime"].dt.month
    datetime_dim["drop_year"] = datetime_dim["tpep_dropoff_datetime"].dt.year
    datetime_dim["drop_weekday"] = datetime_dim["tpep_dropoff_datetime"].dt.weekday

    datetime_dim = datetime_dim[
        [
            "datetime_id",
            "tpep_pickup_datetime",
            "pick_hour",
            "pick_day",
            "pick_month",
            "pick_year",
            "pick_weekday",
            "tpep_dropoff_datetime",
            "drop_hour",
            "drop_day",
            "drop_month",
            "drop_year",
            "drop_weekday",
        ]
    ]

    # OTHER DIMENSIONS
    passenger_count_dim = build_dimension(df, ["passenger_count"], "passenger_count_id")
    trip_distance_dim = build_dimension(df, ["trip_distance"], "trip_distance_id")

    rate_code_dim = build_dimension(df, ["RatecodeID"], "rate_code_id")
    rate_code_dim["rate_code_name"] = rate_code_dim["RatecodeID"].map(RATE_CODE_MAP)
    rate_code_dim = rate_code_dim[["rate_code_id", "RatecodeID", "rate_code_name"]]

    pickup_location_dim = build_dimension(
        df,
        ["pickup_longitude", "pickup_latitude"],
        "pickup_location_id",
    )
    pickup_location_dim = pickup_location_dim[
        ["pickup_location_id", "pickup_latitude", "pickup_longitude"]
    ]

    dropoff_location_dim = build_dimension(
        df,
        ["dropoff_longitude", "dropoff_latitude"],
        "dropoff_location_id",
    )
    dropoff_location_dim = dropoff_location_dim[
        ["dropoff_location_id", "dropoff_latitude", "dropoff_longitude"]
    ]

    payment_type_dim = build_dimension(df, ["payment_type"], "payment_type_id")
    payment_type_dim["payment_type_name"] = payment_type_dim["payment_type"].map(PAYMENT_TYPE_MAP)
    payment_type_dim = payment_type_dim[["payment_type_id", "payment_type", "payment_type_name"]]

    # FACT TABLE
    fact_table = (
        df.merge(passenger_count_dim, on="passenger_count")
        .merge(trip_distance_dim, on="trip_distance")
        .merge(rate_code_dim, on="RatecodeID")
        .merge(pickup_location_dim, on=["pickup_longitude", "pickup_latitude"])
        .merge(dropoff_location_dim, on=["dropoff_longitude", "dropoff_latitude"])
        .merge(datetime_dim, on=["tpep_pickup_datetime", "tpep_dropoff_datetime"])
        .merge(payment_type_dim, on="payment_type")
    )

    fact_table = fact_table[
        [
            "VendorID",
            "datetime_id",
            "passenger_count_id",
            "trip_distance_id",
            "rate_code_id",
            "store_and_fwd_flag",
            "pickup_location_id",
            "dropoff_location_id",
            "payment_type_id",
            "fare_amount",
            "extra",
            "mta_tax",
            "tip_amount",
            "tolls_amount",
            "improvement_surcharge",
            "total_amount",
        ]
    ]

    return {
        "datetime_dim": datetime_dim.to_dict(orient="dict"),
        "passenger_count_dim": passenger_count_dim.to_dict(orient="dict"),
        "trip_distance_dim": trip_distance_dim.to_dict(orient="dict"),
        "rate_code_dim": rate_code_dim.to_dict(orient="dict"),
        "pickup_location_dim": pickup_location_dim.to_dict(orient="dict"),
        "dropoff_location_dim": dropoff_location_dim.to_dict(orient="dict"),
        "payment_type_dim": payment_type_dim.to_dict(orient="dict"),
        "fact_table": fact_table.to_dict(orient="dict"),
    }

@test
def test_output(output, *args):
    assert output is not None, "Transformation output is None"


# --- Local quick run (optional) ---
# from load_notebook import load_data_from_api  # if you saved the load code as a module
# df = load_data_from_api()
# result = transform(df)
# result.keys()
