# **Does Dior’s pricing and lifecycle strategy correlate with secondary-market appreciation?**

## Install Libraries

In [None]:
# Installation des bibliothèques nécessaires
!pip install playwright beautifulsoup4 pandas
!playwright install
!playwright install-deps

Installing dependencies...
Hit:1 https://cli.github.com/packages stable InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:10 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [6,468 kB]
Get:11 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,298 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [3,708 kB]
Fetched 11.6 MB in 2s (4,674 kB/s)
Reading package lists... Done
W: Skipping acquire of configured file

In [None]:
from google.colab import auth
from google.cloud import bigquery
import pandas as p
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
auth.authenticate_user()
print("Authenticated.")

AuthorizationError: Failed to fetch user credentials

In [None]:
PROJECT_ID = "asli-api"
DATASET_ID = "data_management_projet"

# Choose a table name for the scraped output
TABLE_ID = "dior_data"   # change if you want

client = bigquery.Client(project=PROJECT_ID)

FULL_TABLE = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
FULL_TABLE

In [None]:
from google.cloud import bigquery
import pandas as pd
import os

# 1. Manually specify the project ID
project_id = 'asli-api'

# 2. Initialize the client with the project ID
# This fixes the "projects//jobs" empty path error
client = bigquery.Client(project=project_id)

query = """
    SELECT
        url,
        title,
        COUNT(1) as num_occurrences,
        MAX(scrape_date) as last_scraped
    FROM
        `asli-api.data_management_projet.dior_data`
    WHERE
        LENGTH(content) > 100
    GROUP BY
        url, title
    ORDER BY
        num_occurrences DESC
    LIMIT 50;
"""

try:
    print(f"Running query on project: {project_id}...")
    df = client.query(query).to_dataframe()
    print("Query complete!")
    display(df) # Use display() for a nice table view in Jupyter
except Exception as e:
    print(f"An error occurred: {e}")


Pulling the data from the bigquery tables provided to us

In [None]:
import pandas as pd
from google.cloud import bigquery
import os

# 1. Setup Authentication
# Replace with the path to your service account JSON key file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path/to/your/key.json"

# 2. Initialize the BigQuery Client
client = bigquery.Client()

# 3. Your SQL Query
query = """
SELECT
  sales.* EXCEPT (country, currency),
  brand_info.brand_name,
  price_monitor.* EXCEPT (country),
  geo.country_name
FROM `edhec-business-manageme.luxurydata2502.ficticious-sales` AS sales
INNER JOIN `edhec-business-manageme.luxurydata2502.brand_id` AS brand_info
  ON sales.brand_id = brand_info.brand_id
INNER JOIN `edhec-business-manageme.luxurydata2502.price-monitoring-2022` AS price_monitor
  ON sales.reference_code = price_monitor.reference_code
INNER JOIN `gdelt-bq.extra.countryinfo2` AS geo
  ON price_monitor.currency = geo.currency_code
WHERE brand_info.brand_name = "Dior" AND price_monitor.brand = "Dior"
LIMIT 10
"""

print("Fetching data from BigQuery...")

# 4. Load results into a Pandas DataFrame
df_prof = client.query(query).to_dataframe()



In [None]:
# 5. Export to CSV
output_file = "dior_sales_export.csv"
df.to_csv(output_file, index=False)

print(f"Success! Data exported to {output_file}")
print(df.head()) # Preview the first few rows

## Dior Scrapping:




In [None]:
from datetime import datetime
import asyncio
import pandas as pd
from bs4 import BeautifulSoup

async def scrape_dior_category(target_url, category_name="General"):
    """
    Scrapes a specific Dior category page using a Google Translate proxy check bypass.
    """
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
        )
        page = await context.new_page()

        bypass_url = f"https://translate.google.com/translate?sl=auto&tl=fr&u={target_url}"
        print(f"[Dior] Scraping category '{category_name}' via Proxy...")

        try:
            await page.goto(bypass_url, wait_until="domcontentloaded", timeout=60000)
            await asyncio.sleep(5) # Give translation time to settle

            # Scroll to load dynamic content
            for i in range(5):
                await page.mouse.wheel(0, 2000)
                await asyncio.sleep(2)

            content = await page.content()
        except Exception as e:
            print(f"[Error] Failed to scrape {category_name}: {e}")
            content = ""
        finally:
            await browser.close()

        if not content:
            return []

        soup = BeautifulSoup(content, 'html.parser')
        products = []
        scrape_date = datetime.now().strftime("%Y-%m-%d")
        items = soup.select('div[data-testid^="product-card-"]')

        for item in items:
            testid = item.get('data-testid', '')
            retail_product_id = testid.replace('product-card-', '') if testid else "N/A"

            name_el = item.select_one('[data-testid="product-title"]')
            product_name = name_el.get_text(separator=" ", strip=True) if name_el else "N/A"

            price_el = item.select_one('[data-testid="price-line"]')
            retail_price = price_el.get_text(separator=" ", strip=True) if price_el else "N/A"

            img_el = item.select_one('img.main-asset')
            image_url = img_el.get('src') if img_el else "N/A"

            link_el = item.select_one('a.product-card__link')
            raw_url = link_el.get('href') if link_el else "N/A"
            product_url = raw_url.split('?')[0] if raw_url != "N/A" else "N/A"

            full_text = item.get_text().lower()
            availability = "Unavailable" if "indisponible" in full_text else "In Stock"

            if product_name != "N/A":
                products.append({
                    "retail_product_id": retail_product_id,
                    "product_name": product_name,
                    "category": category_name,
                    "retail_price": retail_price,
                    "currency": "EUR",
                    "product_url": product_url,
                    "image_url": image_url,
                    "availability": availability,
                    "scrape_date": scrape_date
                })

        return products

async def scrape_all_dior_categories(categories_dict):
    all_results = []
    for cat_name, url in categories_dict.items():
        data = await scrape_dior_category(url, cat_name)
        all_results.extend(data)
        print(f"[Done] Collected {len(data)} items from {cat_name}.")
    return all_results

# --- CONFIGURATION DES CATÉGORIES ---
categories_to_scrape = {
    "Bags": "https://www.dior.com/fr_fr/fashion/mode-homme/sacs/tous-les-sacs",
    "Ready-to-Wear": "https://www.dior.com/fr_fr/fashion/mode-homme/pret-a-porter/tout-le-pret-a-porter",
    "Bags": "https://www.dior.com/fr_fr/fashion/mode-femme/sacs/tous-les-sacs",
    "Ready-to-Wear": "https://www.dior.com/fr_fr/fashion/mode-femme/pret-a-porter/tout-le-pret-a-porter",
    "TShirts-Polos": "https://www.dior.com/fr_fr/fashion/mode-homme/pret-a-porter/polos-t-shirts",
}

import nest_asyncio
nest_asyncio.apply()

all_data = asyncio.run(scrape_all_dior_categories(categories_to_scrape))
df_dior = pd.DataFrame(all_data)
print(f"TOTAL: {len(df_dior)} produits extraits avec succès !")
df_dior.head()

[Dior] Scraping category 'Bags' via Proxy...
[Done] Collected 46 items from Bags.
[Dior] Scraping category 'Ready-to-Wear' via Proxy...
[Done] Collected 44 items from Ready-to-Wear.
[Dior] Scraping category 'TShirts-Polos' via Proxy...
[Done] Collected 0 items from TShirts-Polos.
TOTAL: 90 produits extraits avec succès !


Unnamed: 0,retail_product_id,product_name,category,retail_price,currency,product_url,image_url,availability,scrape_date
0,M0714OUQO_M900_TU,Sac Dior Bow Medium,Bags,"4 100,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
1,M0715OUQO_M900_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
2,M0715PUQO_M20N_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
3,M0715PUQO_M030_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
4,M0715OUQO_M36Z_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11


In [None]:
categories_retries = {
    "Shoes": "https://www.dior.com/fr_fr/fashion/mode-homme/chaussures/toutes-les-chaussures",
    "Shirts": "https://www.dior.com/fr_fr/fashion/mode-homme/pret-a-porter/chemises",
    "Shirts": "https://www.dior.com/fr_fr/fashion/mode-femme/pret-a-porter/chemises",
    "Shoes": "https://www.dior.com/fr_fr/fashion/mode-femme/souliers/tous-les-souliers"
}

print("Relance des catégories ciblées...")
retry_data = asyncio.run(scrape_all_dior_categories(categories_retries))
df_retry = pd.DataFrame(retry_data)

if not df_retry.empty:
    # Fusion avec le DataFrame principal (df_dior) et suppression des doublons
    if 'df_dior' in locals() or 'df_dior' in globals():
        df_dior = pd.concat([df_dior, df_retry], ignore_index=True)
        df_dior = df_dior.drop_duplicates(subset=['retail_product_id'])
        print(f"Succès ! {len(df_retry)} produits ajoutés. Total actuel: {len(df_dior)} produits.")
    else:
        df_dior = df_retry
        print(f"Nouveau DataFrame créé avec {len(df_dior)} produits.")
else:
    print("Aucun produit trouvé lors de la tentative de relance.")

df_dior.tail()

Relance des catégories ciblées...
[Dior] Scraping category 'Shoes' via Proxy...
[Done] Collected 45 items from Shoes.
[Dior] Scraping category 'Shirts' via Proxy...
[Done] Collected 0 items from Shirts.
Succès ! 45 produits ajoutés. Total actuel: 135 produits.


Unnamed: 0,retail_product_id,product_name,category,retail_price,currency,product_url,image_url,availability,scrape_date
130,KCO523LMA_S59K,Mule 30M Dior Or,Shoes,"950,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
131,KCO523LMA_S49K,Mule 30M Dior Or,Shoes,"950,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
132,KCO522LMA_S49K,Mule à talon 30M Dior Or,Shoes,"990,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
133,KCO488LAM_S49K,Sandale à plateforme D-Lane Dior Or,Shoes,"1 150,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
134,KCV445DLL_S62K,Escarpin slingback J'Adior,Shoes,"1 690,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11


In [None]:
categories_retries = {"Home Makeup": "https://www.dior.com/fr_fr/beauty/home-makeup/home-makeup.html",
    "Skin Care": "https://www.dior.com/fr_fr/beauty/le-soin/les-categories",
    "Bath and Body": "https://www.dior.com/fr_fr/beauty/page/bath-and-body-by-category.html"}

print("Relance des catégories ciblées...")
retry_data = asyncio.run(scrape_all_dior_categories(categories_retries))
df_retry = pd.DataFrame(retry_data)

if not df_retry.empty:
    # Fusion avec le DataFrame principal (df_dior) et suppression des doublons
    if 'df_dior' in locals() or 'df_dior' in globals():
        df_dior = pd.concat([df_dior, df_retry], ignore_index=True)
        df_dior = df_dior.drop_duplicates(subset=['retail_product_id'])
        print(f"Succès ! {len(df_retry)} produits ajoutés. Total actuel: {len(df_dior)} produits.")
    else:
        df_dior = df_retry
        print(f"Nouveau DataFrame créé avec {len(df_dior)} produits.")
else:
    print("Aucun produit trouvé lors de la tentative de relance.")

df_dior.tail()

Relance des catégories ciblées...
[Dior] Scraping category 'Home Makeup' via Proxy...
[Done] Collected 0 items from Home Makeup.
[Dior] Scraping category 'Skin Care' via Proxy...
[Done] Collected 0 items from Skin Care.
[Dior] Scraping category 'Bath and Body' via Proxy...
[Done] Collected 0 items from Bath and Body.
Aucun produit trouvé lors de la tentative de relance.


Unnamed: 0,retail_product_id,product_name,category,retail_price,currency,product_url,image_url,availability,scrape_date
130,KCO523LMA_S59K,Mule 30M Dior Or,Shoes,"950,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
131,KCO523LMA_S49K,Mule 30M Dior Or,Shoes,"950,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
132,KCO522LMA_S49K,Mule à talon 30M Dior Or,Shoes,"990,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
133,KCO488LAM_S49K,Sandale à plateforme D-Lane Dior Or,Shoes,"1 150,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
134,KCV445DLL_S62K,Escarpin slingback J'Adior,Shoes,"1 690,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11


## Dior Data:

In [None]:
df_dior.head(221)

Unnamed: 0,retail_product_id,product_name,category,retail_price,currency,product_url,image_url,availability,scrape_date
0,M0714OUQO_M900_TU,Sac Dior Bow Medium,Bags,"4 100,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
1,M0715OUQO_M900_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
2,M0715PUQO_M20N_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
3,M0715PUQO_M030_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
4,M0715OUQO_M36Z_TU,Sac Dior Bow Petit,Bags,"3 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
...,...,...,...,...,...,...,...,...,...
217,613D497A3037_C585,Surchemise Dior Ribbon,Shirts,"2 200,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
218,613D590A3031_C820,Surchemise,Shirts,"1 450,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
219,613D581A3040_C830,Surchemise Cannage,Shirts,"2 700,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11
220,313C509F3088_C978,Surchemise Cannage à broderie Christian Dior C...,Shirts,"2 400,00 €",EUR,https://www-dior-com.translate.goog/fr_fr/fash...,,In Stock,2026-02-11


## Scrapping Vestaire Collective

In [None]:
import re
import asyncio
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup

async def scrape_vestiaire_for_product(product_row):
    """
    Scrapes Vestiaire for a specific Dior product as a seed.
    """
    product_name = product_row['product_name']
    retail_id = product_row['retail_product_id']
    retail_price = product_row['retail_price']
    retail_cat = product_row['category']

    # Derive keywords (remove brand and generic words)
    keywords = re.sub(r'dior|christian|sac |handbag |pochette ', '', product_name, flags=re.IGNORECASE).strip()
    search_query = f"Dior {keywords}"

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
        )
        page = await context.new_page()

        query_encoded = search_query.replace(' ', '+')
        url = f"https://fr.vestiairecollective.com/search/?q={query_encoded}"

        try:
            await page.goto(url, wait_until="networkidle", timeout=30000)
            await page.mouse.wheel(0, 500)
            await asyncio.sleep(1)
            content = await page.content()
        except:
            content = ""
        finally:
            await browser.close()

        if not content:
            return []

        soup = BeautifulSoup(content, 'html.parser')
        listings = []
        scrape_date = datetime.now().strftime("%Y-%m-%d")
        cards = soup.select('a[class*="product-card_productCard"]')[:5]

        for card in cards:
            aria_label = card.get('aria-label', '')
            listing_url = "https://fr.vestiairecollective.com" + card.get('href', '')
            resale_id = card.get('id', '').replace('product_id_', '')
            name_el = card.select_one('h3')
            listing_title = name_el.get_text(separator=" ", strip=True) if name_el else "N/A"

            price_el = card.select_one('span[class*="productDetails__price"]') or card.select_one('p[class*="price"]')
            resale_price = price_el.get_text(strip=True) if price_el else "N/A"

            # Robust price extraction check
            if resale_price == "N/A" and aria_label:
                price_match = re.search(r"(\d[\d\s]*€)", aria_label.replace('\u00a0', ' '))
                resale_price = price_match.group(1).strip() if price_match else "N/A"

            country_match = re.search(r"Expédié depuis ([^,\.]+)", aria_label)
            seller_country = country_match.group(1).strip() if country_match else "N/A"
            condition = "Vintage" if "vintage" in aria_label.lower() else "Pre-owned"

            listings.append({
                "listing_id": resale_id,
                "listing_title": listing_title,
                "category": retail_cat,
                "resale_price": resale_price,
                "currency": "EUR" if "€" in str(resale_price) else "N/A",
                "condition": condition,
                "listing_date": scrape_date,
                "listing_url": listing_url,
                "seller_country": seller_country,
                "parent_retail_id": retail_id,
                "parent_retail_price": retail_price,
                "scrape_date": scrape_date
            })
        return listings

async def run_seed_based_resale_extraction(retail_df):
    all_resale = []
    # Deduplicate unique Dior products to avoid redundant scrapes
    seeds = retail_df.drop_duplicates(subset=['retail_product_id'])
    total_seeds = len(seeds)

    print(f"[Info] Starting resale scrape for {total_seeds} unique retail products...")

    for idx, (i, product) in enumerate(seeds.iterrows()):
        results = await scrape_vestiaire_for_product(product)
        all_resale.extend(results)

        if (idx + 1) % 5 == 0 or (idx + 1) == total_seeds:
            print(f"[Progress] Processed {idx+1}/{total_seeds} products... ({len(all_resale)} listings found)")

        # Slightly longer sleep for large batches to avoid blocking
        await asyncio.sleep(1.5)

    return all_resale

import nest_asyncio
nest_asyncio.apply()

if 'df_dior' in globals() and not df_dior.empty:
    resale_results = asyncio.run(run_seed_based_resale_extraction(df_dior))
    df_raw_resale = pd.DataFrame(resale_results)
    print(f"TOTAL: {len(df_raw_resale)} annonces Vestiaire liées extraites !")
    display(df_raw_resale.head())
else:
    print("Erreur: df_dior est vide ou inexistant.")


[Info] Starting resale scrape for 221 unique retail products...
[Progress] Processed 5/221 products... (10 listings found)
[Progress] Processed 10/221 products... (10 listings found)


ERROR:asyncio:Future exception was never retrieved
future: <Future finished exception=TargetClosedError('Target page, context or browser has been closed\nCall log:\n  - navigating to "https://fr.vestiairecollective.com/search/?q=Dior+Book+Tote+Small+%C3%A0+bandouli%C3%A8re+amour", waiting until "networkidle"\n')>
playwright._impl._errors.TargetClosedError: Target page, context or browser has been closed
Call log:
  - navigating to "https://fr.vestiairecollective.com/search/?q=Dior+Book+Tote+Small+%C3%A0+bandouli%C3%A8re+amour", waiting until "networkidle"

