### Importing the Needed Modules

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

import sys
sys.path.append('/Workspace/Users/mohammedthoufiq9360@gmail.com/Retail-And-Ecommerce-Analytics-Platform')

from src.paths import SILVER_PRODUCTS_PATH, DIM_PRODUCTS_PATH
from src.schema_definitions import DIM_PRODUCTS_SCHEMA
from src.utils import add_gold_metadata
from delta.tables import DeltaTable

### Querying Silver Products Table

In [0]:
prod_silver_df = spark.read.table(SILVER_PRODUCTS_PATH)
prod_silver_df.limit(5).display()

product_id,category,sub_category,description_PT,description_DE,description_FR,description_ES,description_EN,description_ZH,color,sizes,production_cost,ingestion_ts,_source_file
3388,Feminine,Suits And Sets,Conjunto De Blusa De Malha E Calça Com Detalhes De Drapeado,Set Von Gestrickten Blusen Und Hosen Mit Drapierten Details,Ensemble De Chemisier Tricoté Et De Pantalons Avec Des Détails Drapés,Conjunto De Blusa Y Pantalones Tejidos Con Detalles Drapeados,Set Of Knitted Blouse And Pants With Draped Details,一套针织上衣和带有细节的裤子,UnKnown,S|M|L,47.68,2026-01-14T05:45:59.737Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/products.csv
4324,Feminine,Suits And Sets,Fato De Saia Longa E Blusa De Manga Curta,Longrock Und Kurze Bluse,Fait De La Jupe Longue Et Du Chemisier À Couture Courte,Hecho De Falda Larga Y Blusa Corta,Fact Of Long Skirt And Short -Sleeved Blouse,长裙和短裙上衣的事实,UnKnown,S|M|L|XL,15.0,2026-01-14T05:45:59.737Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/products.csv
5088,Masculine,Sportswear,Jaqueta Masculina De Corrida Com Zíper Total,Herrenjacke Mit Totalem Reißverschluss,Veste Pour Hommes Coulant Avec Une Fermeture Éclair Totale,Chaqueta Para Hombres Corriendo Con Cremallera Total,Men'S Jacket Running With Total Zipper,男士夹克跑步齐全,UnKnown,M|L|XL|XXL,15.13,2026-01-14T05:45:59.737Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/products.csv
6593,Masculine,Shirts,Camisa Masculina Com Manga Dobrável,Herrenhemd Mit Faltem Ärmel,Chemise Pour Hommes Avec Manche Pliante,Camisa De Hombres Con Manga Plegable,Men'S Shirt With Folding Sleeve,男士衬衫带折叠套,UnKnown,M|L|XL|XXL,12.17,2026-01-14T05:45:59.737Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/products.csv
7737,Masculine,Shirts,Camisa Masculina De Manga Curta Com Estampa De Animais,Kurzes Hemd Für Männer Mit Tierdruck,Short-Sleeved Men'S Shirt With Animal Print,Camisa Para Hombres De Manga Corta Con Estampado De Animales,Short -Sleeved Men'S Shirt With Animal Print,短 - 戴上动物印花的男士衬衫,UnKnown,M|L|XL|XXL,20.07,2026-01-14T05:45:59.737Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/products.csv


### Dim_products Schema Reference

In [0]:
DIM_PRODUCTS_SCHEMA

{'product_sk': 'long',
 'product_id': 'integer',
 'category': 'string',
 'sub_category': 'string',
 'color': 'string',
 'sizes': 'string',
 'production_cost': 'double',
 'description_PT': 'string',
 'description_DE': 'string',
 'description_FR': 'string',
 'description_ES': 'string',
 'description_EN': 'string',
 'description_ZH': 'string',
 '_created_at': 'timestamp',
 '_updated_at': 'timestamp'}

### Selecting the Needed columns for dim_products

In [0]:
prod_silver_df = prod_silver_df.select("product_id", "category", "sub_category", "color", "sizes", "production_cost", "description_PT", "description_DE", "description_FR", "description_ES", "description_EN", "description_ZH")

### Creating metadata columns : _created_at and _updated_at

In [0]:
dim_prod_df = add_gold_metadata(prod_silver_df)

### Creating Dim_products Table with surrogate key

In [0]:
spark.sql(f"""
        create table if not exists {DIM_PRODUCTS_PATH}(
            product_sk long generated always as identity,
            product_id integer,
            category string,
            sub_category string,
            color string,
            sizes string,
            production_cost double,
            description_PT string,
            description_DE string,
            description_FR string,
            description_ES string,
            description_EN string,
            description_ZH string,
            _created_at timestamp,
            _updated_at timestamp
        ) using delta
    """)

DataFrame[]

### Updating the Dim_products Table

In [0]:
dim_prod_tbl = DeltaTable.forName(spark, DIM_PRODUCTS_PATH)

dim_prod_tbl.alias("tgt").merge(
    dim_prod_df.alias("src"),
    "tgt.product_id = src.product_id"
).whenMatchedUpdate(set={
    "tgt.product_id":"src.product_id", 
    "tgt.category":"src.category",
    "tgt.sub_category":"src.sub_category",
    "tgt.color":"src.color",
    "tgt.sizes":"src.sizes",
    "tgt.production_cost":"src.production_cost",
    "tgt.description_PT":"src.description_PT",
    "tgt.description_DE":"src.description_DE",
    "tgt.description_FR":"src.description_FR",
    "tgt.description_ES":"src.description_ES",
    "tgt.description_EN":"src.description_EN",
    "tgt.description_ZH":"src.description_ZH",
    "tgt._updated_at": "src._updated_at"
}).whenNotMatchedInsert(values={
    "tgt.product_id":"src.product_id", 
    "tgt.category":"src.category",
    "tgt.sub_category":"src.sub_category",
    "tgt.color":"src.color",
    "tgt.sizes":"src.sizes",
    "tgt.production_cost":"src.production_cost",
    "tgt.description_PT":"src.description_PT",
    "tgt.description_DE":"src.description_DE",
    "tgt.description_FR":"src.description_FR",
    "tgt.description_ES":"src.description_ES",
    "tgt.description_EN":"src.description_EN",
    "tgt.description_ZH":"src.description_ZH",
    "tgt._created_at":"src._created_at",
    "tgt._updated_at":"src._updated_at"
}).execute()

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.read.table(DIM_PRODUCTS_PATH).limit(5).display()

product_sk,product_id,category,sub_category,color,sizes,production_cost,description_PT,description_DE,description_FR,description_ES,description_EN,description_ZH,_created_at,_updated_at
1,3388,Feminine,Suits And Sets,UnKnown,S|M|L,47.68,Conjunto De Blusa De Malha E Calça Com Detalhes De Drapeado,Set Von Gestrickten Blusen Und Hosen Mit Drapierten Details,Ensemble De Chemisier Tricoté Et De Pantalons Avec Des Détails Drapés,Conjunto De Blusa Y Pantalones Tejidos Con Detalles Drapeados,Set Of Knitted Blouse And Pants With Draped Details,一套针织上衣和带有细节的裤子,2026-01-17T12:39:59.566Z,2026-01-17T12:39:59.566Z
2,4324,Feminine,Suits And Sets,UnKnown,S|M|L|XL,15.0,Fato De Saia Longa E Blusa De Manga Curta,Longrock Und Kurze Bluse,Fait De La Jupe Longue Et Du Chemisier À Couture Courte,Hecho De Falda Larga Y Blusa Corta,Fact Of Long Skirt And Short -Sleeved Blouse,长裙和短裙上衣的事实,2026-01-17T12:39:59.566Z,2026-01-17T12:39:59.566Z
3,5088,Masculine,Sportswear,UnKnown,M|L|XL|XXL,15.13,Jaqueta Masculina De Corrida Com Zíper Total,Herrenjacke Mit Totalem Reißverschluss,Veste Pour Hommes Coulant Avec Une Fermeture Éclair Totale,Chaqueta Para Hombres Corriendo Con Cremallera Total,Men'S Jacket Running With Total Zipper,男士夹克跑步齐全,2026-01-17T12:39:59.566Z,2026-01-17T12:39:59.566Z
4,6593,Masculine,Shirts,UnKnown,M|L|XL|XXL,12.17,Camisa Masculina Com Manga Dobrável,Herrenhemd Mit Faltem Ärmel,Chemise Pour Hommes Avec Manche Pliante,Camisa De Hombres Con Manga Plegable,Men'S Shirt With Folding Sleeve,男士衬衫带折叠套,2026-01-17T12:39:59.566Z,2026-01-17T12:39:59.566Z
5,7737,Masculine,Shirts,UnKnown,M|L|XL|XXL,20.07,Camisa Masculina De Manga Curta Com Estampa De Animais,Kurzes Hemd Für Männer Mit Tierdruck,Short-Sleeved Men'S Shirt With Animal Print,Camisa Para Hombres De Manga Corta Con Estampado De Animales,Short -Sleeved Men'S Shirt With Animal Print,短 - 戴上动物印花的男士衬衫,2026-01-17T12:39:59.566Z,2026-01-17T12:39:59.566Z


In [0]:
spark.read.table(DIM_PRODUCTS_PATH).count()

17940