In [1]:
import logging

from delta import DeltaTable
from pyspark.sql import DataFrame
from typing import List, Tuple

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 5, Finished, Available, Finished)

In [9]:
RAW_TABLE = "abfss://fd12376e-2797-4027-bb8e-42a3a8228a70@onelake.dfs.fabric.microsoft.com/77b89b44-1bcf-42fa-a9ac-7d0593123d3d/Tables/playlists"

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 15, Finished, Available, Finished)

In [3]:
%run Google-Helpers

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 9, Finished, Available, Finished)

In [4]:
logger = setup_logger()

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 10, Finished, Available, Finished)

In [5]:
def create_playlists_dataframe(data: List[tuple], columns: List[str]) -> DataFrame:
    """
    Create a DataFrame containing playlists data.

    Args:
        data (List[tuple]): data for the dataframe.
        columns (List[str]): columns for the dataframe.

    Returns:
        DataFrame: A DataFrame with playlist data.
    """
    try:
        logger.info("Creating playlists DataFrame")
        df = spark.createDataFrame(data, columns)
        logger.debug("Playlist dataframe created")
        return df
    except Exception as e:
        logger.exception("Failed to create playlists DataFrame")
        raise

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 11, Finished, Available, Finished)

In [6]:
data = [
    (2025, "PLmWYEDTNOGUJG7RV2ARlG2OCpq8oNwz2s"),
    (2024, "PLmWYEDTNOGULUJYEhh-EUa32rEcHuNXO7"),
    (2023, "PLmWYEDTNOGUIr757MlL8s9iyvYx-0lToh"),
    (2022, "PLmWYEDTNOGULG6eg0zgzvRercwqRP6mII"),
    (2021, "PLmWYEDTNOGUIFKZpE5Z2uOA5i48KVfqju"),
    (2020, "PLmWYEDTNOGUL69D2wj9m2onBKV2s3uT5Y")
]
columns = ["Year", "PlaylistId"]

playlists_df = create_playlists_dataframe(data, columns)

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 12, Finished, Available, Finished)

2025-03-16 22:01:39,740 - INFO - Creating playlists DataFrame


In [10]:
def merge_playlists(playlists_df: DataFrame, table_path: str) -> None:
    """
    Merge the playlists DataFrame into the target Delta table.

    Args:
        playlists_df (DataFrame): The DataFrame containing playlists data.
        table_path: The Delta table ABFS path to merge into.
    """
    try:
        logger.info("Accessing target Delta table: %s", table_path)
        target_table = DeltaTable.forPath(spark, table_path)
        logger.info("Starting merge operation")
        target_table.alias("target") \
            .merge(
                playlists_df.alias("source"),
                "target.Year = source.Year"
            ) \
            .whenMatchedUpdateAll() \
            .whenNotMatchedInsertAll() \
            .execute()
        logger.info("Merge operation completed successfully")
        lastCommit = target_table.history(1).collect()[0]
        metrics = lastCommit["operationMetrics"] 

        numInserted = int(metrics.get("numTargetRowsInserted", 0))
        numUpdated = int(metrics.get("numTargetRowsUpdated", 0))
        numDeleted = int(metrics.get("numTargetRowsDeleted", 0))

        logger.info(f"Rows inserted: {numInserted}")
        logger.info(f"Rows updated: {numUpdated}")
        logger.info(f"Rows deleted: {numDeleted}")
    except Exception as e:
        logger.exception("Exception details: %s", str(e))
        raise

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 16, Finished, Available, Finished)

In [12]:
merge_playlists(playlists_df, RAW_TABLE)

StatementMeta(, 7dfdb7b2-2c9c-4c57-a3f0-b28b72d1e4d0, 18, Finished, Available, Finished)

2025-03-16 22:06:03,087 - INFO - Accessing target Delta table: abfss://fd12376e-2797-4027-bb8e-42a3a8228a70@onelake.dfs.fabric.microsoft.com/77b89b44-1bcf-42fa-a9ac-7d0593123d3d/Tables/playlists
2025-03-16 22:06:12,479 - INFO - Starting merge operation
2025-03-16 22:06:26,710 - INFO - Merge operation completed successfully
2025-03-16 22:06:27,492 - INFO - Rows inserted: 0
2025-03-16 22:06:27,492 - INFO - Rows updated: 6
2025-03-16 22:06:27,493 - INFO - Rows deleted: 0
