## VeryChic Scraping Tool

#### Setup and Library Imports

In [2]:
from bs4 import BeautifulSoup
import re
import pandas as pd
import json
import requests
import csv
from datetime import datetime

#### Configuring and Sending the API Request

In [3]:
import requests

headers = {
    'sec-ch-ua-platform': '"macOS"',
    'Referer': 'https://www.verychic.fr/',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    'Accept': 'application/json, text/plain, */*',
    'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
}

params = {
    'branding': 'VRC',
    'currency': 'EUR',
    'detailed': 'false',
    'language': 'fr',
    'opinionCount': '0',
    'page': '0',
    'size': '2000',
    'memberStatus': 'PROSPECT',
    'channel': 'B2C_HOTEL',
    'publishingStatus': 'nonexpired',
    'channelVersion': '24.12.0',
}

response = requests.get('https://api.verychic.com/verychic-endpoints/v1/products.json', params=params, headers=headers)

#### Parsing and Extracting Hotel Information

In [4]:
data = response.json()  # Parse the JSON response

In [5]:
response.json()

{'content': [{'source': 'ORCHESTRA',
   'externalId': 373346,
   'name': 'Hôtel Origines par Adrien Descouls *****',
   'image': 'https://admin-verychic.orchestra-platform.com/admin/TS/fckUserFiles/Image/CFE_Hotel_Origines_par_Adrien_Descouls/2024_01_25_Origines_par_Adrien_Descouls_15.jpg',
   'imageHash': '60b109abf5355485158c09d001c2b672',
   'productCurrency': 'EUR',
   'normalizedPrice': 106.0,
   'destinationName': 'Puy-de-Dôme, France',
   'country': 'France',
   'countryCode': 'FR',
   'shortDesc': 'Adresse raffinée, luxe et gastronomie dans les volcans d’Auvergne.',
   'discount': 0.0,
   'latitude': 45.50137896,
   'longitude': 3.24346232,
   'language': 'fr',
   'transportation': 'NONE',
   'advantages': ['Petits déjeuners offerts',
    '1 tablette de chocolat et 1 boisson de bienvenue offertes',
    'VeryFlexible : réservez en toute sérénité !'],
   'vipAdvantages': {'PLATINUM': [], 'GOLD': [], 'SILVER': []},
   'thematics': ['NATURE',
    'PDJ',
    'EUROPE',
    'SUD_FRANC

#### Saving Extracted Data to a JSON File

In [6]:
# Extract 'name', 'destinationName', 'normalizedPrice', 'discount', and add a timestamp
hotels = []  # List to store extracted hotel information
current_date = datetime.now().strftime('%Y-%m-%d')  # Get the current date as a timestamp

for hotel in data['content']:  # Loop through the 'content' key in the JSON data
    hotel_info = {
        'name': hotel.get('name', 'N/A'),  # Extract hotel name
        'destinationName': hotel.get('destinationName', 'N/A'),  # Extract destination name
        'normalizedPrice': hotel.get('normalizedPrice', 0.0),  # Extract normalized price
        'discount': hotel.get('discount', 0.0),  # Extract discount
        'offerStartDate': hotel.get('offerStartDate', 'N/A'),  # Extract offer start date
        'offerEndDate': hotel.get('offerEndDate', 'N/A'),  # Extract offer end date
        'retrievalDate': current_date  # Add current date as timestamp
    }
    hotels.append(hotel_info)

# Save the extracted data to a JSON file
with open('extracted_hotels.json', 'w', encoding='utf-8') as output_file:
    json.dump(hotels, output_file, ensure_ascii=False, indent=4)

# Print extracted hotels to verify
for hotel in hotels:
    print(hotel)


{'name': 'Hôtel Origines par Adrien Descouls *****', 'destinationName': 'Puy-de-Dôme, France', 'normalizedPrice': 106.0, 'discount': 0.0, 'offerStartDate': '2025-01-02T00:05+0100', 'offerEndDate': '2025-01-10T23:55+0100', 'retrievalDate': '2025-01-10'}
{'name': '25hours Hotel Terminus Nord ****', 'destinationName': 'Paris, France', 'normalizedPrice': 135.0, 'discount': 44.0, 'offerStartDate': '2025-01-09T00:05+0100', 'offerEndDate': '2025-01-10T23:55+0100', 'retrievalDate': '2025-01-10'}
{'name': 'Kappa Club Seven Islands Resort ****', 'destinationName': 'Watamu, Kenya', 'normalizedPrice': 972.0, 'discount': None, 'offerStartDate': '2024-12-27T00:05+0100', 'offerEndDate': '2025-01-10T23:55+0100', 'retrievalDate': '2025-01-10'}
{'name': 'Hôtel Mercure Dijon Centre Clemenceau ****', 'destinationName': 'Dijon, France', 'normalizedPrice': 109.0, 'discount': 40.0, 'offerStartDate': '2024-12-20T00:05+0100', 'offerEndDate': '2025-01-10T23:55+0100', 'retrievalDate': '2025-01-10'}
{'name': 'Saf

#### Exporting Data to a CSV File

In [14]:
# Function to split destination into city and country
def split_destination(destination):
    if destination:
        parts = destination.split(',', 1)
        city = parts[0].strip() if len(parts) > 1 else ''  # First part = city
        country = parts[-1].strip()
        return city, country
    return '', ''  # Return empty if no destination

# Function to assign category based on stars in the name
def get_category(hotel_name):
    if '*****' in hotel_name:
        return '5*'
    elif '****' in hotel_name:
        return '4*'
    else:
        return 'Other'
    
from datetime import datetime

def transform_date(date_string):
    # Parse the input date string to a datetime object
    dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M%z")

    # Format the datetime object to the desired string format
    formatted_date = dt.strftime("%Y-%m-%d %H:%M:%S.%f")

    return formatted_date

# Function to format dates as DD/MM/YY


# Update hotel data with additional fields
verychic_online_hotels = []
for hotel in hotels:
    city, country = split_destination(hotel.get('destinationName', ''))
    category = get_category(hotel.get('name', ''))
    start_date = transform_date(hotel.get('offerStartDate', 'N/A'))  # Format start date
    end_date = transform_date(hotel.get('offerEndDate', 'N/A'))  # Format end date

    # Check if "Dernière minute" is in the name
    last_minute = 'X' if 'Dernière minute' in hotel.get('name', '') else ''

    # Create a new hotel entry
    hotel_info = {
        'hotel_name': hotel.get('name', 'N/A'),
        'City': city,
        'Country': country,
        'Category': category,
        'Price': hotel.get('normalizedPrice', 0.0),
        'Discount': hotel.get('discount', 0.0),
        'start_date': start_date,
        'end_date': end_date,
        'Dernière Minute': last_minute
    }
    verychic_online_hotels.append(hotel_info)



CSV file created with 'Dernière Minute' column at verychic_hotels_10_01_2025.csv.


In [None]:
today = datetime.today().strftime('%d_%m_%Y')
# Save the updated data into a CSV file
csv_file_path = f"verychic_hotels_{today}.csv"
with open(csv_file_path, 'w', newline='', encoding='utf-8-sig') as csv_file:
    fieldnames = ['hotel_name', 'City', 'Country', 'Category', 'Price', 
                  'Discount', 'start_date', 'end_date', 'Dernière Minute']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=',')
    writer.writeheader()
    writer.writerows(verychic_online_hotels)

print(f"CSV file created with 'Dernière Minute' column at {csv_file_path}.")

