In [9]:
import requests
import pandas as pd
from utils.config_loader import load_config


def fetch_carts_data(api_url: str) -> pd.DataFrame:
    """
    Fetch carts data from an API and return it as a pandas DataFrame.
    """
    response = requests.get(api_url)
    response.raise_for_status()
    data = response.json()
    carts_df = pd.json_normalize(
        data,
        record_path=["products"],
        meta=["id", "userId", "date"]
    )
    return carts_df


def fetch_products_data(api_url: str) -> pd.DataFrame:
    """
    Fetch products data from an API and return it as a pandas DataFrame.
    """
    response = requests.get(api_url)
    response.raise_for_status()
    data = response.json()
    products_df = pd.json_normalize(data)
    return products_df


def merge_carts_products(carts_df: pd.DataFrame, products_df: pd.DataFrame) -> pd.DataFrame:
    """
    Merge carts and products data on productId.
    """
    merged_df = carts_df.merge(
        products_df,
        left_on="productId",
        right_on="id",
        suffixes=("_cart", "_product")
    ).drop(columns=["id_product"])
    
    # Rename nested JSON fields for Snowflake compatibility
    merged_df.columns = merged_df.columns.str.replace(r"\.", "_", regex=True)
    
    return merged_df

if __name__ == "__main__":
    # Load config (expects carts_url and products_url)
    config = load_config()

    carts_url = config["api"]["carts_url"]
    products_url = config["api"]["products_url"]

    # Fetch data
    carts_df = fetch_carts_data(carts_url)
    products_df = fetch_products_data(products_url)

    # Merge
    merged_df = merge_carts_products(carts_df, products_df)

    print(merged_df.head())

   productId  quantity id_cart userId                      date  \
0          1         4       1      1  2020-03-02T00:00:00.000Z   
1          2         1       1      1  2020-03-02T00:00:00.000Z   
2          3         6       1      1  2020-03-02T00:00:00.000Z   
3          2         4       2      1  2020-01-02T00:00:00.000Z   
4          1        10       2      1  2020-01-02T00:00:00.000Z   

                                               title   price  \
0  Fjallraven - Foldsack No. 1 Backpack, Fits 15 ...  109.95   
1             Mens Casual Premium Slim Fit T-Shirts    22.30   
2                                 Mens Cotton Jacket   55.99   
3             Mens Casual Premium Slim Fit T-Shirts    22.30   
4  Fjallraven - Foldsack No. 1 Backpack, Fits 15 ...  109.95   

                                         description        category  \
0  Your perfect pack for everyday use and walks i...  men's clothing   
1  Slim-fitting style, contrast raglan long sleev...  men's clothing

In [5]:
merged_df.dtypes

productId         int64
quantity          int64
id_cart          object
userId           object
date             object
title            object
price           float64
description      object
category         object
image            object
rating.rate     float64
rating.count      int64
dtype: object