In [1]:
import requests
from bs4 import BeautifulSoup
import mysql.connector
from datetime import datetime
import re


base_url= "https://www.parafendri.tn/11-visage?page={}"
# Fonction pour extraire et convertir le prix
def extract_and_convert_price(raw_price):
    try:
        # Supprimer "TND" et "TTC" du texte brut du prix et tout autre caractère non numérique
        cleaned_price = re.sub(r'[^\d.]', '', raw_price)

        # Si le prix nettoyé est vide, retourner None
        if not cleaned_price:
            return None

        # Formater le prix avec une virgule pour les milliers et sans décimales
        formatted_price = "{:,.0f}".format(float(cleaned_price))

        return formatted_price
    except (ValueError, TypeError):
        print(f"Erreur lors de la conversion du prix : {raw_price}")
        return None

# Function to scrape a page and return a list of products
def scrape_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses
        soup = BeautifulSoup(response.text, 'html.parser')
        products = soup.select('.product-miniature')
        return products
    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la requête vers {url}: {e}")
        return []

# Function to extract product information from a product element
def extract_product_info(product):
    image_element = product.find('img', class_='replace-2x img-responsive')
    product_name_element = product.find('img', class_='replace-2x img-responsive')

    if image_element:
        image_link = image_element.get('src', '')
        product_name = image_element.get('title', '').strip()
    else:
        image_link = ''
        product_name = ''

    product_link_element = product.find('h2', class_='h3 product-title')
    if product_link_element:
        product_link = product_link_element.find('a')['href']
    else:
        product_link = ''

    price_element = product.find('span', class_='price')
    if price_element:
        price_str = price_element.text.strip()
        cleaned_price = extract_and_convert_price(price_str)
    else:
        cleaned_price = None

    date_prix = datetime.today().strftime('%Y-%m-%d')

    return {'product_name': product_name, 'image_link': image_link,
            'product_price': cleaned_price, 'product_link': product_link, 'date_prix': date_prix}

# Function to execute the scraping and save to MySQL database
def job():
    # Configuration de la base de données
    db_config = {
        'host': 'localhost',
        'user': 'root',
        'database': 'scrapping_db'
    }

    # Initialize starting page number
    start_page = 1

    # Create a MySQL connection
    connection = mysql.connector.connect(**db_config)
    cursor = connection.cursor()

    # Loop through each page
    while True:
        # Construct the URL for the current page
        url = base_url.format(start_page)
        print(f"Scraping de la page : {url}")

        # Scrape the page
        products = scrape_page(url)

        if not products:
            print(f"Aucun produit trouvé sur la page {url}. Fin du scraping.")
            break

        for product in products:
            # Extract product information
            row_data = extract_product_info(product)

            # Insert data into MySQL
            cursor.execute("""
                INSERT INTO produits (nom, nom_site, image, lien)
                            VALUES (%s, %s, %s, %s)
            """, (row_data['product_name'], 'ParaFendri', row_data['image_link'],
                  row_data['product_link']))

            print(f"Produit extrait : {row_data}")

            # Get the ID of the last inserted product
            cursor.execute("SELECT LAST_INSERT_ID()")
            product_id = cursor.fetchone()[0]
            print(f"Nouveau produit inséré. ID: {product_id}")

            # Insert into the "prix" table with the current date
            cursor.execute("""
                INSERT INTO prix (id_produit, prix, date_prix)
                VALUES (%s, %s, %s)
            """, (product_id, row_data['product_price'], row_data['date_prix']))
            print(f"Prix ajouté pour le produit. ID: {product_id}")

        # Move to the next page
        start_page += 1

    # Commit and close MySQL connection
    connection.commit()
    connection.close()
    print("Données insérées dans la base de données MySQL.")

# Manual Execution
if __name__ == "__main__":
    job()


    # Scheduled Execution
# Uncomment the following lines if you want to schedule the script
# schedule.every().day.at("00:00").do(job)
# while True:
#     schedule.run_pending()
#     time.sleep(1)


Scraping de la page : https://www.parafendri.tn/11-visage?page=1
Produit extrait : {'product_name': 'AVENE CRÈME RICHE HYDRANCE SPF30 PEAUX SÈCHES', 'image_link': 'https://www.parafendri.tn/761-home_default/creme-peau-intolerante-riche-avene.jpg', 'product_price': '44,000', 'product_link': 'https://www.parafendri.tn/beaute/856-creme-peau-intolerante-riche-avene.html', 'date_prix': '2023-12-10'}
Nouveau produit inséré. ID: 45172
Prix ajouté pour le produit. ID: 45172
Produit extrait : {'product_name': 'XEN Acnoz Gel Nettoyant 250ml peaux grasses', 'image_link': 'https://www.parafendri.tn/2258-home_default/xen-acnoz-gel-nettoyant-150g.jpg', 'product_price': '28,000', 'product_link': 'https://www.parafendri.tn/visage/28-xen-acnoz-gel-nettoyant-150g.html', 'date_prix': '2023-12-10'}
Nouveau produit inséré. ID: 45173
Prix ajouté pour le produit. ID: 45173
Produit extrait : {'product_name': 'SVR AMPOULE A LIFT - Concentré lissant Retexturisant', 'image_link': 'https://www.parafendri.tn/744-h