In [67]:
try:
    spark.stop()
except Exception:
    pass

from pyspark.sql import SparkSession

spark = (
    SparkSession.builder.appName("Iceberg via REST")
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
    .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog")
    .config("spark.sql.catalog.local.type", "rest")
    .config("spark.sql.catalog.local.uri", "http://iceberg-rest:8181")
    .config("spark.sql.catalog.local.warehouse", "s3://lake/warehouse")
    .config("spark.sql.catalog.local.io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
    .config("spark.sql.catalog.local.s3.endpoint", "http://minio:9000")
    .config("spark.sql.catalog.local.s3.path-style-access", "true")
    .config("spark.sql.catalog.local.s3.access-key-id", "admin")
    .config("spark.sql.catalog.local.s3.secret-access-key", "admin123")
    .config("spark.sql.catalog.local.s3.region", "us-east-1")
    .getOrCreate()
)

spark

25/11/03 01:16:13 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [68]:
spark.sql("SHOW NAMESPACES IN local").show(truncate=False)

+---------+
|namespace|
+---------+
|raw      |
+---------+



In [69]:
spark.sql("SHOW TABLES IN local.raw").show(truncate=False)

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|raw      |avito    |false      |
+---------+---------+-----------+



In [96]:
tbl = "local.raw.avito"
spark.sql(f"SELECT COUNT(*) AS rows FROM {tbl}").show()

+----+
|rows|
+----+
| 250|
+----+



In [97]:
import os

# Define the export path relative to the current directory of the Jupyter notebook
export_path = os.path.join(os.getcwd(), "exported_avito_table")

# Export the Iceberg table to Parquet format in the current directory
spark.read.format("iceberg").load("local.raw.avito") \
    .write.format("parquet").save(export_path)

# Confirm the files were saved by checking the directory content
print(f"Table exported to: {export_path}")

25/11/03 01:25:08 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Table exported to: /opt/work/src/notebooks/exported_avito_table


In [71]:
# Load RAW table
raw_df = spark.table("local.raw.avito")   # or local.raw.sarouty

In [72]:
# Cr√©er un DataFrame "Silver" minimal
silver_df = raw_df.select("id").distinct()

# Afficher quelques lignes
silver_df.show(5, truncate=False)

print("‚úÖ Silver dataset initialized with only 'id' column.")
print("Total IDs:", silver_df.count())

+--------+
|id      |
+--------+
|55924046|
|57028285|
|57027014|
|57066031|
|57153879|
+--------+
only showing top 5 rows

‚úÖ Silver dataset initialized with only 'id' column.
Total IDs: 160


In [73]:
from pyspark.sql import functions as F, types as T

# 1) D√©finir le sch√©ma du JSON dans "payload"
payload_schema = T.StructType([
    T.StructField("id", T.StringType()),
    T.StructField("url", T.StringType()),
    T.StructField("error", T.StringType()),
    T.StructField("title", T.StringType()),
    T.StructField("price_text", T.StringType()),
    T.StructField("breadcrumbs", T.StringType()),
    T.StructField("category", T.StringType()),
    T.StructField("description", T.StringType()),
    T.StructField("attributes", T.StringType()),  # JSON imbriqu√© sous forme de string
    T.StructField("equipments", T.StringType()),
    T.StructField("seller_name", T.StringType()),
    T.StructField("seller_type", T.StringType()),
    T.StructField("published_date", T.StringType()),
    T.StructField("image_urls", T.StringType()),
])

# 2) Parser le JSON depuis la colonne string "payload"
parsed = (raw_df
    .select(
        *[c for c in raw_df.columns if c != "payload"],  # ex: garder ingest_ts s'il existe
        F.from_json(F.col("payload"), payload_schema).alias("p")
    )
    .filter(F.col("p").isNotNull())  # ignorer les lignes avec JSON invalide
)

# 3) Parser le JSON imbriqu√© "attributes" -> Map<String,String>
attrs_map = F.from_json(F.col("p.attributes"), T.MapType(T.StringType(), T.StringType()))

# 4) Nettoyages utiles:
# - price_value (MAD) √† partir de "price_text" (ex: "6 000 DH" -> 6000.0)
price_value = F.regexp_replace(F.col("p.price_text"), r"[^0-9]", "").cast("double")

# - image_urls -> array<string> en splittant sur " | " et trim de chaque url
image_urls_arr = F.transform(
    F.split(F.col("p.image_urls"), r"\s*\|\s*"),
    lambda x: F.trim(x)
)

# - equipments -> array<string> en splittant sur ";"
equipments_arr = F.transform(
    F.split(F.col("p.equipments"), r"\s*;\s*"),
    lambda x: F.trim(x)
)

# 5) Construire le DataFrame silver (colonnes √† plat)
silver_df = parsed.select(
    F.col("p.id").alias("id"),
    F.col("p.url").alias("url"),
    F.col("p.title").alias("title"),
    F.col("p.price_text").alias("price_text"),
    price_value.alias("price_value_mad"),
    F.col("p.category").alias("category"),
    F.col("p.breadcrumbs").alias("breadcrumbs"),
    F.col("p.description").alias("description"),
    F.col("p.seller_name").alias("seller_name"),
    F.col("p.seller_type").alias("seller_type"),
    F.col("p.published_date").alias("published_date_text"),
    image_urls_arr.alias("image_urls"),
    equipments_arr.alias("equipments"),
    attrs_map.alias("attributes_map"),
    # garder le timestamp d'ingestion s'il est pr√©sent dans ton raw_df
    *([F.col("ingest_ts")] if "ingest_ts" in raw_df.columns else [])
)

In [74]:
import pandas as pd
from IPython.display import display

# Keep things compact
pd.set_option("display.max_columns", 20)   # don't try to show hundreds
pd.set_option("display.max_colwidth", 80)  # clamp long cells to ~80 chars

# pick a small sample and flatten newlines so rows stay short
pdf = (silver_df.limit(10)
       .toPandas()
       .replace({r'[\r\n\t]+': ' '}, regex=True))

# simple, compact table with ellipsis in long cells
display(
    pdf.style
      .set_table_styles([
          {'selector': 'table', 'props': [('table-layout','fixed'), ('width','100%')]},
          {'selector': 'th, td', 'props': [
              ('max-width','280px'),
              ('white-space','nowrap'),
              ('overflow','hidden'),
              ('text-overflow','ellipsis')
          ]}
      ])
      .hide(axis='index')  # remove row numbers
)

id,url,title,price_text,price_value_mad,category,breadcrumbs,description,seller_name,seller_type,published_date_text,image_urls,equipments,attributes_map,ingest_ts
57153832,https://www.avito.ma/fr/sala_el_jadida/local/Magasin_√†_vendre_57153832.htm,Magasin √† vendre,,,"Local, √† vendre",Home Icon Accueil > Tout le Maroc > Sal√© > Sala El Jadida > Avito Immobilier > Local > Magasin √† vendre,"Salam, magasin √† vendre, si vous √™tes int√©ress√©, bienvenue, prix n√©gociable.",Hicham Alaoui,Particulier,2025-11-03 01:07:16,"['https://content.avito.ma/classifieds/images/10141390515?t=images', 'https://content.avito.ma/classifieds/images/10141390516?t=images']","['Bon √©tat', '13', '21+ ans', 'S√©curit√©']","{'Condition': 'Bon √©tat', 'Surface totale': '13', '√Çge du bien': '21+ ans'}",2025-11-03 00:30:30.003000
57066031,https://www.avito.ma/fr/autre_secteur/local/Local_commercial_3_garages_57066031.htm,Local commercial 3 garages,15 000 DH,15000.0,"Local, √† vendre",Home Icon Accueil > Tout le Maroc > Casablanca > Autre secteur > Avito Immobilier > Local > Local commercial 3 garages,"Soci√©t√© by home vous propose des magasines sur * bouskoura Victoria superficies 66m2 commerce et 12m2 voiture * Dar bouaaza errahma 41m2, RDC 21m2 suppent et 50m2 cave. dans une r√©sidence haut-standing. pour plus d'informations contactez nous.",By home,Particulier,2025-11-03 00:59:18,"['https://content.avito.ma/classifieds/images/10140466849?t=images', 'https://content.avito.ma/classifieds/images/10140466850?t=images', 'https://content.avito.ma/classifieds/images/10140466852?t=images', 'https://content.avito.ma/classifieds/images/10140466851?t=images']","['Neuf', 'Imm√©diate', '1', '66', 'Moins de 1 an', 'Chauffage', 'Climatisation', 'Parking', 'S√©curit√©']","{'Condition': 'Neuf', 'Disponibilit√©': 'Imm√©diate', 'Salle de bain': '1', 'Surface totale': '66', '√Çge du bien': 'Moins de 1 an'}",2025-11-03 00:30:30.003000
56708685,https://www.avito.ma/fr/alliance/appartements/√Ä_VENDRE_Bel_appartement_lumineux_2_fa√ßades_56708685.htm,√Ä VENDRE Bel appartement lumineux 2 fa√ßades,490 000 DH,490000.0,"Appartements, √† vendre",Home Icon Accueil > Tout le Maroc > K√©nitra > alliance > Avito Immobilier > Ventes Immobili√®res > Appartements > √Ä VENDRE Bel appartement lumineux 2 fa√ßades,"Bonjour, Je met en vente mon appartement presque neuf jamais habit√© situ√© au quartier Alliance Mehdia. Composition : 2 chambres, 1 salon, 2 salles de bain, 2 balcons. L‚Äôappartement est de deux fa√ßades oppos√©es offrant une excellente a√©ration et un ensoleillement optimal toute la journ√©e. Orientation Rabat. Prix raisonnable et l√©g√®rement n√©gociable. Les interm√©diaires √† s‚Äôabstenir svp.",Bouchra,Particulier,2025-11-03 00:58:20,"['https://content.avito.ma/classifieds/images/10137258041?t=images', 'https://content.avito.ma/classifieds/images/10137258042?t=images', 'https://content.avito.ma/classifieds/images/10137258040?t=images', 'https://content.avito.ma/classifieds/images/10137258043?t=images', 'https://content.avito.ma/classifieds/images/10137258038?t=images', 'https://content.avito.ma/classifieds/images/10137258045?t=images']","['2', '2', 'Neuf', 'Imm√©diate', '100', '1', 'Balcon']","{'Condition': 'Neuf', 'Disponibilit√©': 'Imm√©diate', 'Salle de bain': '2', 'Frais de syndic / mois': '100', 'Chambres': '2', 'Salons': '1'}",2025-11-03 00:30:30.003000
57133177,https://www.avito.ma/fr/autre_secteur/bureaux/Bureau_√†_louer_Bendibane_57133177.htm,Bureau √† louer Bendibane,5 500 DH,5500.0,"Bureaux, √† louer",Home Icon Accueil > Tout le Maroc > Tanger > Autre secteur > Avito Immobilier > Bureaux > Bureau √† louer Bendibane,"Quatre bureaux √† louer √† Ben Diban1 au 3eme √©tage, 1 au 4eme √©tage, 2 au 5eme √©tage sur la rue principale. Du Rambouan du cin√©ma Tarek jusqu‚Äô√† Ben D‚Äôiban emplacement strat√©gique sur la rue principale. Chaque bureau comprend 3 pi√®ces, salon d‚Äôaccueil, salle de bain, cuisine parmi les Activit√©s autoris√©es : M√©decin esth√©tique, dentiste, m√©decin g√©n√©raliste, notaire. Conditions de location : 1 mois de loyer, 1 mois de d√©p√¥t, charge de syndic, frais d‚Äôagence √©quivalant √† 1 mois de loyer, non n√©gociables. ÿ£ÿ±ÿ®ÿπ ŸÖŸÉÿßÿ™ÿ® ŸÑŸÑŸÉÿ±ÿßÿ° 1ŸÅÿßŸÑÿ´ÿßŸÑÿ´ 1ŸÅÿßŸÑÿ±ÿßÿ®ÿπ 2ŸÅÿßŸÑÿÆÿßŸÖÿ≥ ÿ®ŸÜÿØŸäÿ®ÿßŸÜ ÿπŸÑŸâ ÿßŸÑÿ¥ÿßÿ±ÿπ ÿßŸÑÿ±ÿ¶Ÿäÿ≥Ÿä ŸÖŸÜ ÿ±ŸÖÿ®ŸàÿßŸÜ ÿ≥ŸäŸÜŸÖÿß ÿ∑ÿßÿ±ŸÇ ÿ•ŸÑŸâ ÿ®ŸÜ ÿØŸäÿ®ÿßŸÜ ŸÖŸàŸÇÿπ ŸÖŸÖŸäÿ≤ ÿπŸÑŸâ ÿßŸÑÿ¥ÿßÿ±ÿπ ÿßŸÑÿ±ÿ¶Ÿäÿ≥Ÿä ŸÉŸÑ ŸÖŸÉÿ™ÿ® Ÿäÿ™ŸÉŸàŸÜ ŸÖŸÜ 3 ÿ∫ÿ±ŸÅ ÿµÿßŸÑŸàŸÜ ÿßÿ≥ÿ™ŸÇÿ®ÿßŸÑ ŸÇÿßÿπÿ© Ÿàÿ≠ŸÖÿßŸÖ ŸàŸÖÿ∑ÿ®ÿÆ ŸàŸÖŸÜ ÿ®ŸäŸÜ ÿßŸÑÿ£ŸÜÿ¥ÿ∑ÿ© ÿßŸÑŸÖÿ≥ŸÖŸàÿ≠ ÿ®Ÿáÿß ÿ∑ÿ®Ÿäÿ® ÿ™ÿ¨ŸÖŸäŸÑ ÿ∑ÿ®Ÿäÿ® ÿ£ÿ≥ŸÜÿßŸÜ ÿ∑ÿ®Ÿäÿ® ÿπÿßŸÖ ÿπÿØŸàŸÑ ÿ™ÿ≤ŸäŸäŸÜ ÿßŸÑÿπÿ±ÿßÿ¶ÿ≥ ÿ¥ÿ±Ÿàÿ∑ ÿßŸÑŸÉÿ±ÿßÿ° ÿ¥Ÿáÿ± ÿßŸÑŸÉÿ±ÿßÿ° ÿ¥Ÿáÿ± ÿ∂ŸÖÿßŸÜ Ÿàÿ™ŸÉŸÑŸÅÿ© ÿßŸÑÿ≥ŸÜÿØŸäŸÉ 200 ŸÖÿµÿßÿ±ŸäŸÅ ÿßŸÑŸàŸÉÿßŸÑÿ© ÿ™ÿπÿßÿØŸÑ ÿ¥Ÿáÿ± ÿßŸÑŸÉÿ±ÿßÿ°ÿåÿ∫Ÿäÿ± ŸÇÿßÿ®ŸÑÿ© ŸÑŸÑŸÜŸÇÿßÿ¥",AWRACH imobilier,Particulier,2025-11-02 12:15:03,"['https://content.avito.ma/classifieds/images/10141178118?t=images', 'https://content.avito.ma/classifieds/images/10141214219?t=images', 'https://content.avito.ma/classifieds/images/10141214220?t=images', 'https://content.avito.ma/classifieds/images/10141214221?t=images']","['1', '75', '200', '3', '4', 'Ascenseur']","{'Nombre de pi√®ces': '3', 'Salle de bain': '1', 'Frais de syndic / mois': '200', '√âtage': '4', 'Surface totale': '75'}",2025-11-03 01:15:15.002000
56787541,https://www.avito.ma/fr/el_alia/bureaux/Plateau_bureau_neuf_√†_louer_56787541.htm,Plateau bureau neuf √† louer,7 000 DH,7000.0,"Bureaux, √† louer",Home Icon Accueil > Tout le Maroc > Mohammedia > El Alia > Avito Immobilier > Bureaux > Plateau bureau neuf √† louer,"plateau bureau √† louer 76 m2 neuf jamais utilis√©e climatisation centralis√©e fa√ßade vitr√©e au 1er √©tage avec ascenseur garage sous sol immeuble professionnel neuf,",rachid,Particulier,2025-11-02 13:15:05,"['https://content.avito.ma/classifieds/images/10140925036?t=images', 'https://content.avito.ma/classifieds/images/10140924991?t=images', 'https://content.avito.ma/classifieds/images/10140925019?t=images', 'https://content.avito.ma/classifieds/images/10140925031?t=images', 'https://content.avito.ma/classifieds/images/10140925034?t=images']","['1', '76', '3', '1']","{'Nombre de pi√®ces': '3', 'Salle de bain': '1', '√âtage': '1', 'Surface totale': '76'}",2025-11-03 01:15:15.002000
57113050,https://www.avito.ma/fr/souissi/appartements/Sublime_appartement_√†_louer_√†_souissi_57113050.htm,Sublime appartement √† louer √† souissi,20 000 DH,20000.0,"Appartements, √† louer",Home Icon Accueil > Tout le Maroc > Rabat > Souissi > Avito Immobilier > Locations Immobili√®res > Appartements > Sublime appartement √† louer √† souissi,"Nous proposons √† la location un bel appartement situ√© au coeur de toutes les commodit√©s √† souissi Rabat. 3chambres sublimes avec deux Salles de bains et toilettes pour plus de d√©tails, n'h√©sitez pas √† nous contacter.",WIN HABITAT,Particulier,2025-11-02 13:15:07,"['https://content.avito.ma/classifieds/images/10140975957?t=images', 'https://content.avito.ma/classifieds/images/10140975959?t=images', 'https://content.avito.ma/classifieds/images/10140975955?t=images', 'https://content.avito.ma/classifieds/images/10140975954?t=images', 'https://content.avito.ma/classifieds/images/10140975956?t=images', 'https://content.avito.ma/classifieds/images/10140975958?t=images']","['3', '2', '1 mois', '500', '1', '150', 'Ascenseur', 'Balcon', 'Chauffage', 'Climatisation', 'Concierge', 'Cuisine √©quip√©e']","{'Salle de bain': '2', 'Frais de syndic / mois': '500', 'Surface habitable': '150', 'Chambres': '3', 'Caution': '1 mois', 'Salons': '1'}",2025-11-03 01:15:15.002000
57153902,https://www.avito.ma/fr/hamria/appartements/Appartement_√†_louer_110_m¬≤_√†_Mekn√®s_57153902.htm,Appartement √† louer 110 m¬≤ √† Mekn√®s,3 000 DH,3000.0,"Appartements, √† louer",Home Icon Accueil > Tout le Maroc > Mekn√®s > Hamria > Avito Immobilier > Locations Immobili√®res > Appartements > Appartement √† louer 110 m¬≤ √† Mekn√®s,"Appartement √† louer au centre ville se compose de deux chambre salon avec climatiseur s√©jour cuisine √©quip√©e avec balcon salle de bain dans un emplacement proche de toute commodit√©s ,",Aqarcom Meknes,Particulier,2025-11-03 01:59:09,"['https://content.avito.ma/classifieds/images/10141391487?t=images', 'https://content.avito.ma/classifieds/images/10141391488?t=images']","['2', '1', '1 mois', '1', '110', '3', 'Ascenseur', 'Balcon', 'Climatisation', 'Cuisine √©quip√©e', 'Parking', 'S√©curit√©']","{'Salle de bain': '1', '√âtage': '3', 'Surface habitable': '110', 'Chambres': '2', 'Caution': '1 mois', 'Salons': '1'}",2025-11-03 01:15:15.002000
57139537,https://www.avito.ma/fr/guich_oudaya/appartements/Appartement_√†_louer_57139537.htm,Appartement √† louer,6 500 DH,6500.0,"Appartements, √† louer",Home Icon Accueil > Tout le Maroc > Rabat > Guich Oudaya > Avito Immobilier > Locations Immobili√®res > Appartements > Appartement √† louer,Appartement √† louer,karim immo,Particulier,2025-11-03 01:57:11,"['https://content.avito.ma/classifieds/images/10141242792?t=images', 'https://content.avito.ma/classifieds/images/10141242793?t=images', 'https://content.avito.ma/classifieds/images/10141242795?t=images', 'https://content.avito.ma/classifieds/images/10141242796?t=images']","['2', '2', '70', '1 mois', '1', '100', 'Climatisation', 'Meubl√©']","{'Salle de bain': '2', 'Surface habitable': '100', 'Chambres': '2', 'Caution': '1 mois', 'Salons': '1', 'Surface totale': '70'}",2025-11-03 01:15:15.002000
57031791,https://www.avito.ma/fr/anfa/appartements/Studio_√†_louer_73_m¬≤_√†_bd_Anfa_Casablanca_57031791.htm,Studio √† louer 73 m¬≤ √† bd Anfa Casablanca,7 000 DH,7000.0,"Appartements, √† louer",Home Icon Accueil > Tout le Maroc > Casablanca > Anfa > Avito Immobilier > Locations Immobili√®res > Appartements > Studio √† louer 73 m¬≤ √† bd Anfa Casablanca,"Studio de 73m meubl√© avec ascenseur garage Situ√© 3e √©tage Compos√© d‚Äôune chambre, cuisine s√©par√©e, salon principal, un 2e s√©jour et salle de bain prix 7000 dirhams",MedZaim immobilier,Particulier,2025-11-03 01:56:13,"['https://content.avito.ma/classifieds/images/10140077685?t=images', 'https://content.avito.ma/classifieds/images/10140077686?t=images', 'https://content.avito.ma/classifieds/images/10140077687?t=images', 'https://content.avito.ma/classifieds/images/10140077688?t=images', 'https://content.avito.ma/classifieds/images/10140077689?t=images', 'https://content.avito.ma/classifieds/images/10140077692?t=images', 'https://content.avito.ma/classifieds/images/10140077694?t=images', 'https://content.avito.ma/classifieds/images/10140077693?t=images', 'https://content.avito.ma/classifieds/images/10140077695?t=images', 'https://content.avito.ma/classifieds/images/10140077696?t=images', 'https://content.avito.ma/classifieds/images/10140077697?t=images', 'https://content.avito.ma/classifieds/images/10140077698?t=images']","['1', '1', '2', '73', '3', 'Ascenseur', 'Climatisation', 'Concierge', 'Cuisine √©quip√©e', 'Meubl√©', 'Parking']","{'Salle de bain': '1', '√âtage': '3', 'Surface habitable': '73', 'Chambres': '1', 'Salons': '2'}",2025-11-03 01:15:15.002000
57153882,https://www.avito.ma/fr/mehdia/appartements/Appartement_√†_louer_53_m¬≤_√†_Mehdia_57153882.htm,Appartement √† louer 53 m¬≤ √† Mehdia,,,"Appartements, √† louer",Home Icon Accueil > Tout le Maroc > Mehdia > Toute la ville > Avito Immobilier > Locations Immobili√®res > Appartements > Appartement √† louer 53 m¬≤ √† Mehdia,"Joli appartement √† louer √† Mehdia plage, juste pour famille maximum 4 personnes ou couple mari√©.",Laila dihi,Particulier,2025-11-03 01:37:16,"['https://content.avito.ma/classifieds/images/10141391161?t=images', 'https://content.avito.ma/classifieds/images/10141391162?t=images', 'https://content.avito.ma/classifieds/images/10141391163?t=images']","['1', '0', '1', '53', 'Studio', '1', 'Ascenseur', 'Balcon', 'Chauffage', 'Climatisation', 'Concierge', 'Cuisine √©quip√©e']","{""Type d'appartement"": 'Studio', 'Salle de bain': '0', '√âtage': '1', 'Surface habitable': '53', 'Chambres': '1', 'Salons': '1'}",2025-11-03 00:45:30.003000


In [75]:
silver_df.printSchema()

root
 |-- id: string (nullable = true)
 |-- url: string (nullable = true)
 |-- title: string (nullable = true)
 |-- price_text: string (nullable = true)
 |-- price_value_mad: double (nullable = true)
 |-- category: string (nullable = true)
 |-- breadcrumbs: string (nullable = true)
 |-- description: string (nullable = true)
 |-- seller_name: string (nullable = true)
 |-- seller_type: string (nullable = true)
 |-- published_date_text: string (nullable = true)
 |-- image_urls: array (nullable = true)
 |    |-- element: string (containsNull = false)
 |-- equipments: array (nullable = true)
 |    |-- element: string (containsNull = false)
 |-- attributes_map: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- ingest_ts: timestamp (nullable = true)



In [76]:
# Drop the 'price_text' column
silver_df = silver_df.drop('price_text')

# Rename 'price_value_mad' to 'price'
silver_df = silver_df.withColumnRenamed('price_value_mad', 'price')

# Replace NULL 'price' values with 0.0
silver_df = silver_df.fillna({'price': 0.0})

silver_df.select('price').show(10, truncate=False)

+-------+
|price  |
+-------+
|7500.0 |
|12500.0|
|6000.0 |
|0.0    |
|14000.0|
|6300.0 |
|7300.0 |
|0.0    |
|2300.0 |
|7500.0 |
+-------+
only showing top 10 rows



In [77]:
from pyspark.sql import functions as F

# Transform 'category' into 'offre' and 'category_type'
silver_df = (
    silver_df
    # Create 'offre' column for 'rent' or 'sale'
    .withColumn(
        "offre",
        F.when(F.col("category").contains("√† louer"), "rent")
         .when(F.col("category").contains("√† vendre"), "sale")
         .otherwise(None)
    )
    # Create 'category_type' column with only the property type (e.g., Maisons, Appartements)
    .withColumn(
        "type",
        F.when(
            F.col("category").contains("√† louer") | F.col("category").contains("√† vendre"),
            F.split(F.col("category"), ",")[0]
        ).otherwise(None)
    )
    # Drop the original 'category' column
    .drop("category")
)

# Check the result
silver_df.select("offre", "type").show(30, truncate=False)


+-----+------------------+
|offre|type              |
+-----+------------------+
|sale |Terrains et fermes|
|sale |Villas et Riads   |
|sale |Terrains et fermes|
|sale |Local             |
|sale |Appartements      |
|sale |Maisons           |
|rent |Bureaux           |
|rent |Appartements      |
|rent |Appartements      |
|rent |Appartements      |
|rent |Bureaux           |
|sale |Local             |
|sale |Local             |
|sale |Appartements      |
|rent |Appartements      |
|rent |Appartements      |
|rent |Appartements      |
|rent |Bureaux           |
|rent |Appartements      |
|rent |Bureaux           |
|rent |Appartements      |
|rent |Local             |
|rent |Appartements      |
|rent |Appartements      |
|rent |Appartements      |
|rent |Appartements      |
|rent |Local             |
|rent |Appartements      |
|rent |Appartements      |
|rent |Bureaux           |
+-----+------------------+
only showing top 30 rows



In [78]:
from pyspark.sql import functions as F

# Split 'breadcrumbs' based on '>'
split_breadcrumbs = F.split(F.col("breadcrumbs"), " > ")

# Create new columns for each segment
silver_df = (
    silver_df
    .withColumn("city", split_breadcrumbs.getItem(2))  # e.g., "Casablanca"
    .withColumn("neighborhood", split_breadcrumbs.getItem(3))  # e.g., "Maarif"
    .withColumn("site", split_breadcrumbs.getItem(4))  # e.g., "Avito Immobilier"
    .drop("breadcrumbs")  # Drop the original column if not needed
)

# Check the result
silver_df.select(
     "city", "neighborhood", "site", 
).show(30, truncate=False)


+----------+--------------+----------------+
|city      |neighborhood  |site            |
+----------+--------------+----------------+
|Casablanca|Lissasfa      |Avito Immobilier|
|Rabat     |Hay Riad      |Avito Immobilier|
|Marrakech |Targa         |Avito Immobilier|
|Casablanca|Val Fleuri    |Avito Immobilier|
|Casablanca|Bourgogne     |Avito Immobilier|
|Casablanca|Florida       |Avito Immobilier|
|Agadir    |Hay Dakhla    |Avito Immobilier|
|Mohammedia|Autre secteur |Avito Immobilier|
|Tit Mellil|Toute la ville|Avito Immobilier|
|Tanger    |Autre secteur |Avito Immobilier|
|Mohammedia|El Alia       |Avito Immobilier|
|Rabat     |Souissi       |Avito Immobilier|
|Mekn√®s    |Hamria        |Avito Immobilier|
|Rabat     |Guich Oudaya  |Avito Immobilier|
|Casablanca|Anfa          |Avito Immobilier|
|Casablanca|C.I.L         |Avito Immobilier|
|Tanger    |De La Plage   |Avito Immobilier|
|Nouaceur  |Toute la ville|Avito Immobilier|
|Casablanca|Sidi Bernoussi|Avito Immobilier|
|Temara  

In [79]:
from pyspark.sql import functions as F

# Change all values in the 'site' column to 'avito'
silver_df = silver_df.withColumn('site', F.lit('avito'))

# Show the updated data
silver_df.select('site').show(10, truncate=False)

+-----+
|site |
+-----+
|avito|
|avito|
|avito|
|avito|
|avito|
|avito|
|avito|
|avito|
|avito|
|avito|
+-----+
only showing top 10 rows



In [89]:
# Group by 'seller_name' and 'seller_type', and count the occurrences
silver_df.groupBy('seller_type').count().orderBy('count', ascending=False).show(10, truncate=False)

+-----------+-----+
|seller_type|count|
+-----------+-----+
|Particulier|238  |
|Boutique   |1    |
|NULL       |1    |
+-----------+-----+



In [90]:
from pyspark.sql import functions as F

# Convert 'seller_type' to lowercase
silver_df = silver_df.withColumn("seller_type", F.lower(F.col("seller_type")))

# Show the distinct values of 'seller_type' with counts, including NULLs
silver_df.groupBy('seller_type').count().orderBy('count', ascending=False).show(30, truncate=False)

# Show rows where 'seller_type' is NULL
silver_df.filter(F.col('seller_type').isNull()).show(5, truncate=False)

+-----------+-----+
|seller_type|count|
+-----------+-----+
|particulier|238  |
|boutique   |1    |
|NULL       |1    |
+-----------+-----+

+--------+---------------------------------------------------------------------------------------------+-----+-----+-----------+-----------+-----------+----------+----------+-----------------------+-----+----+----+------------+-----+-----------------+-------+------+------------------+--------------+-----+-----------+-------------+----------------+--------+----------------------+---------+--------------+-------------+------+--------+--------------+
|id      |url                                                                                          |title|price|description|seller_name|seller_type|image_urls|equipments|ingest_ts              |offre|type|city|neighborhood|site |Surface habitable|Caution|Zoning|Type d'appartement|Surface totale|√âtage|√Çge du bien|Salle de bain|Nombre de pi√®ces|Chambres|Frais de syndic / mois|Condition|Nombre d'√©ta

In [91]:
# Drop rows where 'seller_type' is NULL
silver_df = silver_df.filter(F.col('seller_type').isNotNull())

# Show the updated result to confirm the rows are dropped
silver_df.select('seller_type').distinct().show(10, truncate=False)

+-----------+
|seller_type|
+-----------+
|particulier|
|boutique   |
+-----------+



In [94]:
# Group by 'seller_name' and 'seller_type', and count the occurrences
silver_df.groupBy('seller_name').count().orderBy('count', ascending=False).show(5, truncate=False)

+----------------------------+-----+
|seller_name                 |count|
+----------------------------+-----+
|MedZaim immobilier          |10   |
|AGENCE IMMOBILIER MOSTAGIMMO|10   |
|SAKAN LIK GROUPE            |8    |
|SAFIN Immobilier            |6    |
|ABAMNY Immobilier           |6    |
+----------------------------+-----+
only showing top 5 rows



In [83]:
from pyspark.sql.functions import to_timestamp

# Convert 'published_date_text' to timestamp and rename it to 'published_date'
silver_df = silver_df.withColumn(
    "published_date", 
    to_timestamp("published_date_text", "yyyy-MM-dd HH:mm:ss")  # Adjust format if needed
).drop("published_date_text")  # Drop the original 'published_date_text' column

# Show the result
silver_df.select("id", "published_date").show(5, truncate=False)

+--------+-------------------+
|id      |published_date     |
+--------+-------------------+
|57020711|2025-11-02 11:17:06|
|57055336|2025-11-02 18:17:08|
|57140550|2025-11-02 13:17:10|
|56620071|2025-11-03 00:58:12|
|56920617|2025-11-03 00:59:14|
+--------+-------------------+
only showing top 5 rows



In [80]:
from pyspark.sql.functions import col, udf
from pyspark.sql.types import ArrayType, StringType
import re

# Define a UDF to clean the 'equipments' list, handling NoneType
def clean_equipments(equipments):
    # Handle NoneType case
    if equipments is None:
        return []
    
    # Filter out unwanted entries (numbers and time-related strings)
    cleaned = [item for item in equipments if not re.match(r'^\d+$', str(item)) and not re.match(r'.*(mois|an).*', str(item))]
    return cleaned

# Register the UDF
clean_equipments_udf = udf(clean_equipments, ArrayType(StringType()))

# Apply the UDF to clean 'equipments' directly (in place)
silver_df = silver_df.withColumn("equipments", clean_equipments_udf(col("equipments")))

# Show the cleaned 'equipments' column
silver_df.select("equipments").show(5, truncate=False)

+--------------------------------------------------------------+
|equipments                                                    |
+--------------------------------------------------------------+
|[Bon √©tat, S√©curit√©]                                          |
|[Neuf, Imm√©diate, Chauffage, Climatisation, Parking, S√©curit√©]|
|[Neuf, Imm√©diate, Balcon]                                     |
|[Ascenseur]                                                   |
|[]                                                            |
+--------------------------------------------------------------+
only showing top 5 rows



                                                                                

In [81]:
from pyspark.sql import functions as F

# Extract the keys from 'attributes_map'
keys = silver_df.select(F.explode(F.map_keys(F.col("attributes_map"))).alias("attribute")).distinct().rdd.flatMap(lambda x: x).collect()

# For each key, create a new column with its corresponding value from 'attributes_map'
for key in keys:
    silver_df = silver_df.withColumn(
        key, 
        F.when(F.col("attributes_map").getItem(key).isNotNull(), 
               F.col("attributes_map").getItem(key)).otherwise(None)
    )

# Drop the 'attributes_map' column after extracting the keys and values
silver_df = silver_df.drop("attributes_map")

# Show the result with the new columns and drop the 'attributes_map' column
silver_df.select("id", *keys).show(5, truncate=False)

+--------+-----------------+-------+------+------------------+--------------+-----+-----------+-------------+----------------+--------+----------------------+---------+--------------+-------------+------+----------+
|id      |Surface habitable|Caution|Zoning|Type d'appartement|Surface totale|√âtage|√Çge du bien|Salle de bain|Nombre de pi√®ces|Chambres|Frais de syndic / mois|Condition|Nombre d'√©tage|Disponibilit√©|Salons|Standing  |
+--------+-----------------+-------+------+------------------+--------------+-----+-----------+-------------+----------------+--------+----------------------+---------+--------------+-------------+------+----------+
|56941776|NULL             |NULL   |NULL  |NULL              |290           |NULL |NULL       |4            |NULL            |4       |NULL                  |Bon √©tat |4             |Imm√©diate    |NULL  |NULL      |
|57106819|NULL             |NULL   |NULL  |NULL              |NULL          |NULL |NULL       |NULL         |NULL            |NUL

In [95]:
import pandas as pd
from IPython.display import display

# Keep things compact
pd.set_option("display.max_columns", 20)   # don't try to show hundreds
pd.set_option("display.max_colwidth", 80)  # clamp long cells to ~80 chars

# pick a small sample and flatten newlines so rows stay short
pdf = (silver_df.limit(10)
       .toPandas()
       .replace({r'[\r\n\t]+': ' '}, regex=True))

# simple, compact table with ellipsis in long cells
display(
    pdf.style
      .set_table_styles([
          {'selector': 'table', 'props': [('table-layout','fixed'), ('width','100%')]},
          {'selector': 'th, td', 'props': [
              ('max-width','280px'),
              ('white-space','nowrap'),
              ('overflow','hidden'),
              ('text-overflow','ellipsis')
          ]}
      ])
      .hide(axis='index')  # remove row numbers
)

id,url,title,price,description,seller_name,seller_type,image_urls,equipments,ingest_ts,offre,type,city,neighborhood,site,Surface habitable,Caution,Zoning,Type d'appartement,Surface totale,√âtage,√Çge du bien,Salle de bain,Nombre de pi√®ces,Chambres,Frais de syndic / mois,Condition,Nombre d'√©tage,Disponibilit√©,Salons,Standing,published_date
57020711,https://www.avito.ma/fr/lissasfa/bureaux/Bureau_√†_louer_d_une_superficie_141_m¬≤_57020711.htm,Bureau √† louer d'une superficie 141 m¬≤,12690.0,"FBC met √† votre disposition un plateau bureau 311 √† louer de 141 m¬≤, situ√© dans un espace professionnel, s√©curis√©, calme et propre. Situ√© au 3√®me √©tage, bien √©quip√© : climatisation, 2 ascenseurs, cloisons, chauffage, parking et internet haut d√©bit (fibre optique), kitchenette, acc√®s direct √† deux autoroutes, celle de rond-point Azbane et la Rocade sud-ouest. Pour plus d'information, n'h√©sitez pas √† nous contacter.",FACILITIES BUSINESS CENTER,particulier,"['https://content.avito.ma/classifieds/images/10139961083?t=images', 'https://content.avito.ma/classifieds/images/10139961084?t=images', 'https://content.avito.ma/classifieds/images/10139961070?t=images', 'https://content.avito.ma/classifieds/images/10139961074?t=images', 'https://content.avito.ma/classifieds/images/10139961073?t=images', 'https://content.avito.ma/classifieds/images/10139961069?t=images', 'https://content.avito.ma/classifieds/images/10139961068?t=images', 'https://content.avito.ma/classifieds/images/10139961072?t=images', 'https://content.avito.ma/classifieds/images/10139961076?t=images', 'https://content.avito.ma/classifieds/images/10139961077?t=images', 'https://content.avito.ma/classifieds/images/10139961078?t=images', 'https://content.avito.ma/classifieds/images/10139961082?t=images']","['Ascenseur', 'Chauffage', 'Climatisation', 'C√¢blage t√©l√©phonique', 'Parking', 'S√©curit√©']",2025-11-03 00:17:15.003000,rent,Bureaux,Casablanca,Lissasfa,avito,,,,,141.0,3.0,,0.0,3.0,,,,,,,,2025-11-02 11:17:06
57055336,https://www.avito.ma/fr/hay_riad/appartements/Appartement_en_location_√†_Hay_Riad_Rabat_57055336.htm,Appartement en location √† Hay Riad Rabat,0.0,"Bel appartement de 195 m¬≤ en location au 1 er √©tage situ√© √† Hay Riad Rabat Compos√© de deux salons avec chemin√©e s√©jour cuisine salle d'inviter deux chambres avec salle de bain suite parental avec salle de bain et drissing prix de location 16000dhs / mois pour plus d'informations veuillez contacter nous ,",Immosaadaoui,particulier,"['https://content.avito.ma/classifieds/images/10140311388?t=images', 'https://content.avito.ma/classifieds/images/10140311398?t=images', 'https://content.avito.ma/classifieds/images/10140311399?t=images', 'https://content.avito.ma/classifieds/images/10140311404?t=images', 'https://content.avito.ma/classifieds/images/10140311389?t=images', 'https://content.avito.ma/classifieds/images/10140311393?t=images', 'https://content.avito.ma/classifieds/images/10140311387?t=images']",[],2025-11-03 00:17:15.003000,rent,Appartements,Rabat,Hay Riad,avito,195.0,,,,,1.0,,,,4.0,,,,,,,2025-11-02 18:17:08
57140550,https://www.avito.ma/fr/targa/bureaux/Bureaux_priv√©s_Domiciliation_√†_Targa__Marrakech_57140550.htm,"Bureaux priv√©s-Domiciliation √† Targa, Marrakech",1200.0,üè¢ Bureaux priv√©s & domiciliation √† Marrakech Besoin d‚Äôun espace professionnel cl√© en main √† Marrakech ? Excellentia Business Center vous propose : ‚úÖ Bureaux priv√©s √©quip√©s et climatis√©s ‚Äì pr√™ts √† l‚Äôemploi ‚úÖ Internet fibre optique haut d√©bit inclus ‚úÖ Service de domiciliation 100 % en ligne ‚Äì obtenez votre adresse professionnelle sans vous d√©placer ‚úÖ R√©ception et gestion de votre courrier Offrez √† votre entreprise une image professionnelle et un environnement de travail moderne au c≈ìur de Marrakech. üìû Contactez-nous d√®s aujourd‚Äôhui et profitez d‚Äôune solution compl√®te pour votre bureau et domiciliation.,Excellentia Business Center,particulier,"['https://content.avito.ma/classifieds/images/10141255291?t=images', 'https://content.avito.ma/classifieds/images/10141255293?t=images', 'https://content.avito.ma/classifieds/images/10141255292?t=images', 'https://content.avito.ma/classifieds/images/10141255294?t=images', 'https://content.avito.ma/classifieds/images/10141255301?t=images', 'https://content.avito.ma/classifieds/images/10141255302?t=images']","['Ascenseur', 'Climatisation', 'C√¢blage t√©l√©phonique', 'Parking']",2025-11-03 00:17:15.003000,rent,Bureaux,Marrakech,Targa,avito,,,,,7.0,0.0,,1.0,5.0,,,,,,,,2025-11-02 13:17:10
56620071,https://www.avito.ma/fr/val_fleuri/appartements/Studio_meubl√©_Val_Fleuri_TRAMWAY_56620071.htm,Studio meubl√© Val Fleuri TRAMWAY,6300.0,"Studio bien meubl√© √† louer, √† Val Fleuri, 45m¬≤, ETG1, 1 Salon balcon, 1 Cuisine √©quip√©e, 1 Chambre √† coucher,1 SDB douche, ascenseur parking concierge Loyer 6300 Dh par mois",ABAMNY Immobilier,particulier,"['https://content.avito.ma/classifieds/images/10141320194?t=images', 'https://content.avito.ma/classifieds/images/10141320200?t=images', 'https://content.avito.ma/classifieds/images/10141320202?t=images', 'https://content.avito.ma/classifieds/images/10141320211?t=images', 'https://content.avito.ma/classifieds/images/10141320220?t=images', 'https://content.avito.ma/classifieds/images/10141320224?t=images', 'https://content.avito.ma/classifieds/images/10141320228?t=images', 'https://content.avito.ma/classifieds/images/10141320229?t=images', 'https://content.avito.ma/classifieds/images/10141320230?t=images']","['Ascenseur', 'Balcon', 'Climatisation', 'Concierge', 'Cuisine √©quip√©e', 'Meubl√©']",2025-11-03 00:17:15.003000,rent,Appartements,Casablanca,Val Fleuri,avito,45.0,1 mois,,,45.0,,,1.0,,1.0,,,,,1.0,,2025-11-03 00:58:12
56920617,https://www.avito.ma/fr/bourgogne/local/Magasin_√†_louer_plein_centre_ville_56920617.htm,Magasin √† louer plein centre ville,10000.0,Local √† louer totalement am√©nag√© disponible en place centre-ville. Si vous √™tes vraiment int√©ress√©. Appelle-moi sur le num√©ro Cafe sur l‚Äôannonce. Merci,New Adresse SARL,particulier,"['https://content.avito.ma/classifieds/images/10141252952?t=images', 'https://content.avito.ma/classifieds/images/10141252967?t=images', 'https://content.avito.ma/classifieds/images/10141252968?t=images', 'https://content.avito.ma/classifieds/images/10141252971?t=images']",[],2025-11-03 00:17:15.003000,rent,Local,Casablanca,Bourgogne,avito,,,,,100.0,,,1.0,,,,,,,,,2025-11-03 00:59:14
57133177,https://www.avito.ma/fr/autre_secteur/bureaux/Bureau_√†_louer_Bendibane_57133177.htm,Bureau √† louer Bendibane,5500.0,"Quatre bureaux √† louer √† Ben Diban1 au 3eme √©tage, 1 au 4eme √©tage, 2 au 5eme √©tage sur la rue principale. Du Rambouan du cin√©ma Tarek jusqu‚Äô√† Ben D‚Äôiban emplacement strat√©gique sur la rue principale. Chaque bureau comprend 3 pi√®ces, salon d‚Äôaccueil, salle de bain, cuisine parmi les Activit√©s autoris√©es : M√©decin esth√©tique, dentiste, m√©decin g√©n√©raliste, notaire. Conditions de location : 1 mois de loyer, 1 mois de d√©p√¥t, charge de syndic, frais d‚Äôagence √©quivalant √† 1 mois de loyer, non n√©gociables. ÿ£ÿ±ÿ®ÿπ ŸÖŸÉÿßÿ™ÿ® ŸÑŸÑŸÉÿ±ÿßÿ° 1ŸÅÿßŸÑÿ´ÿßŸÑÿ´ 1ŸÅÿßŸÑÿ±ÿßÿ®ÿπ 2ŸÅÿßŸÑÿÆÿßŸÖÿ≥ ÿ®ŸÜÿØŸäÿ®ÿßŸÜ ÿπŸÑŸâ ÿßŸÑÿ¥ÿßÿ±ÿπ ÿßŸÑÿ±ÿ¶Ÿäÿ≥Ÿä ŸÖŸÜ ÿ±ŸÖÿ®ŸàÿßŸÜ ÿ≥ŸäŸÜŸÖÿß ÿ∑ÿßÿ±ŸÇ ÿ•ŸÑŸâ ÿ®ŸÜ ÿØŸäÿ®ÿßŸÜ ŸÖŸàŸÇÿπ ŸÖŸÖŸäÿ≤ ÿπŸÑŸâ ÿßŸÑÿ¥ÿßÿ±ÿπ ÿßŸÑÿ±ÿ¶Ÿäÿ≥Ÿä ŸÉŸÑ ŸÖŸÉÿ™ÿ® Ÿäÿ™ŸÉŸàŸÜ ŸÖŸÜ 3 ÿ∫ÿ±ŸÅ ÿµÿßŸÑŸàŸÜ ÿßÿ≥ÿ™ŸÇÿ®ÿßŸÑ ŸÇÿßÿπÿ© Ÿàÿ≠ŸÖÿßŸÖ ŸàŸÖÿ∑ÿ®ÿÆ ŸàŸÖŸÜ ÿ®ŸäŸÜ ÿßŸÑÿ£ŸÜÿ¥ÿ∑ÿ© ÿßŸÑŸÖÿ≥ŸÖŸàÿ≠ ÿ®Ÿáÿß ÿ∑ÿ®Ÿäÿ® ÿ™ÿ¨ŸÖŸäŸÑ ÿ∑ÿ®Ÿäÿ® ÿ£ÿ≥ŸÜÿßŸÜ ÿ∑ÿ®Ÿäÿ® ÿπÿßŸÖ ÿπÿØŸàŸÑ ÿ™ÿ≤ŸäŸäŸÜ ÿßŸÑÿπÿ±ÿßÿ¶ÿ≥ ÿ¥ÿ±Ÿàÿ∑ ÿßŸÑŸÉÿ±ÿßÿ° ÿ¥Ÿáÿ± ÿßŸÑŸÉÿ±ÿßÿ° ÿ¥Ÿáÿ± ÿ∂ŸÖÿßŸÜ Ÿàÿ™ŸÉŸÑŸÅÿ© ÿßŸÑÿ≥ŸÜÿØŸäŸÉ 200 ŸÖÿµÿßÿ±ŸäŸÅ ÿßŸÑŸàŸÉÿßŸÑÿ© ÿ™ÿπÿßÿØŸÑ ÿ¥Ÿáÿ± ÿßŸÑŸÉÿ±ÿßÿ°ÿåÿ∫Ÿäÿ± ŸÇÿßÿ®ŸÑÿ© ŸÑŸÑŸÜŸÇÿßÿ¥",AWRACH imobilier,particulier,"['https://content.avito.ma/classifieds/images/10141178118?t=images', 'https://content.avito.ma/classifieds/images/10141214219?t=images', 'https://content.avito.ma/classifieds/images/10141214220?t=images', 'https://content.avito.ma/classifieds/images/10141214221?t=images']",['Ascenseur'],2025-11-03 01:15:15.002000,rent,Bureaux,Tanger,Autre secteur,avito,,,,,75.0,4.0,,1.0,3.0,,200.0,,,,,,2025-11-02 12:15:03
56787541,https://www.avito.ma/fr/el_alia/bureaux/Plateau_bureau_neuf_√†_louer_56787541.htm,Plateau bureau neuf √† louer,7000.0,"plateau bureau √† louer 76 m2 neuf jamais utilis√©e climatisation centralis√©e fa√ßade vitr√©e au 1er √©tage avec ascenseur garage sous sol immeuble professionnel neuf,",rachid,particulier,"['https://content.avito.ma/classifieds/images/10140925036?t=images', 'https://content.avito.ma/classifieds/images/10140924991?t=images', 'https://content.avito.ma/classifieds/images/10140925019?t=images', 'https://content.avito.ma/classifieds/images/10140925031?t=images', 'https://content.avito.ma/classifieds/images/10140925034?t=images']",[],2025-11-03 01:15:15.002000,rent,Bureaux,Mohammedia,El Alia,avito,,,,,76.0,1.0,,1.0,3.0,,,,,,,,2025-11-02 13:15:05
57113050,https://www.avito.ma/fr/souissi/appartements/Sublime_appartement_√†_louer_√†_souissi_57113050.htm,Sublime appartement √† louer √† souissi,20000.0,"Nous proposons √† la location un bel appartement situ√© au coeur de toutes les commodit√©s √† souissi Rabat. 3chambres sublimes avec deux Salles de bains et toilettes pour plus de d√©tails, n'h√©sitez pas √† nous contacter.",WIN HABITAT,particulier,"['https://content.avito.ma/classifieds/images/10140975957?t=images', 'https://content.avito.ma/classifieds/images/10140975959?t=images', 'https://content.avito.ma/classifieds/images/10140975955?t=images', 'https://content.avito.ma/classifieds/images/10140975954?t=images', 'https://content.avito.ma/classifieds/images/10140975956?t=images', 'https://content.avito.ma/classifieds/images/10140975958?t=images']","['Ascenseur', 'Balcon', 'Chauffage', 'Climatisation', 'Concierge', 'Cuisine √©quip√©e']",2025-11-03 01:15:15.002000,rent,Appartements,Rabat,Souissi,avito,150.0,1 mois,,,,,,2.0,,3.0,500.0,,,,1.0,,2025-11-02 13:15:07
57153902,https://www.avito.ma/fr/hamria/appartements/Appartement_√†_louer_110_m¬≤_√†_Mekn√®s_57153902.htm,Appartement √† louer 110 m¬≤ √† Mekn√®s,3000.0,"Appartement √† louer au centre ville se compose de deux chambre salon avec climatiseur s√©jour cuisine √©quip√©e avec balcon salle de bain dans un emplacement proche de toute commodit√©s ,",Aqarcom Meknes,particulier,"['https://content.avito.ma/classifieds/images/10141391487?t=images', 'https://content.avito.ma/classifieds/images/10141391488?t=images']","['Ascenseur', 'Balcon', 'Climatisation', 'Cuisine √©quip√©e', 'Parking', 'S√©curit√©']",2025-11-03 01:15:15.002000,rent,Appartements,Mekn√®s,Hamria,avito,110.0,1 mois,,,,3.0,,1.0,,2.0,,,,,1.0,,2025-11-03 01:59:09
57139537,https://www.avito.ma/fr/guich_oudaya/appartements/Appartement_√†_louer_57139537.htm,Appartement √† louer,6500.0,Appartement √† louer,karim immo,particulier,"['https://content.avito.ma/classifieds/images/10141242792?t=images', 'https://content.avito.ma/classifieds/images/10141242793?t=images', 'https://content.avito.ma/classifieds/images/10141242795?t=images', 'https://content.avito.ma/classifieds/images/10141242796?t=images']","['Climatisation', 'Meubl√©']",2025-11-03 01:15:15.002000,rent,Appartements,Rabat,Guich Oudaya,avito,100.0,1 mois,,,70.0,,,2.0,,2.0,,,,,1.0,,2025-11-03 01:57:11


In [85]:
silver_df.printSchema()

root
 |-- id: string (nullable = true)
 |-- url: string (nullable = true)
 |-- title: string (nullable = true)
 |-- price: double (nullable = false)
 |-- description: string (nullable = true)
 |-- seller_name: string (nullable = true)
 |-- seller_type: string (nullable = true)
 |-- image_urls: array (nullable = true)
 |    |-- element: string (containsNull = false)
 |-- equipments: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ingest_ts: timestamp (nullable = true)
 |-- offre: string (nullable = true)
 |-- type: string (nullable = true)
 |-- city: string (nullable = true)
 |-- neighborhood: string (nullable = true)
 |-- site: string (nullable = false)
 |-- Surface habitable: string (nullable = true)
 |-- Caution: string (nullable = true)
 |-- Zoning: string (nullable = true)
 |-- Type d'appartement: string (nullable = true)
 |-- Surface totale: string (nullable = true)
 |-- √âtage: string (nullable = true)
 |-- √Çge du bien: string (nullable = true)
 |-- 