Web Scraping Project:

The purpose of this code is to scrape all relevant information from the Freeglisse website, specifically focusing on the page dedicated to second-hand skis: https://freeglisse.com/fr/12-ski-occasion.

The goal of scraping this page is to analyze the types of products sold on the platform, as well as the price ranges applied according to product quality. This will help determine whether my client should consider adding this website's offerings to his online marketplace and assess if the products sold are a good fit for his target market.

In [5]:
# Importing all needed libraries 
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Function to retrieve all the URLs of product pages
def get_all_pages():
    urls = []
    base_url = "https://freeglisse.com/fr/12-ski-occasion?page="
    
    for page_number in range(1, 36):  # Ajuster le nombre de pages selon le site
        url = f"{base_url}{page_number}"
        urls.append(url)
    
    return urls

# Function to extract product URLs from a page
def extract_product_urls(page_url):
    try:
        r = requests.get(page_url)
        r.raise_for_status()  # Check if the request was successful
    except requests.RequestException as e:
        print(f"Erreur lors de la requête pour {page_url}: {e}")
        return []
    
    soup = BeautifulSoup(r.content, "html.parser")
    articles = soup.find_all('article', {"class": 'product-miniature'})
    
    urls = []
    for article in articles:
        try:
            product_url = article.find("a", {"class": "thumbnail product-thumbnail"})['href']
            urls.append(product_url)
        except AttributeError:
            continue  # Skip articles without a URL
    
    return urls

# Function to extract product information
def infos_articles_onepage(url):
    try:
        r = requests.get(url)
        r.raise_for_status()  # verify if the request worked 
    except requests.RequestException as e:
        print(f"Erreur lors de la requête pour {url}: {e}")
        return {}

    soup = BeautifulSoup(r.content, "html.parser")
    
    # Initialize the dictionary to store data
    article_data = {
        "Reference": "",
        "Nom": "",
        "Prix_de_vente": "",
        "Année_fabrication": "",
        "Prix_neuf": "",
        "Longueur_ski": "",
        "Type_produit": "",
        "Type": "",
        "Niveau": "",
        "Disponibilité_produit": ""
    }

    try:
        article_data["Nom"] = soup.find("h1", {'class': 'h1 product-detail-name'}).text.strip()
    except AttributeError:
        pass

    try:
        article_data["Prix_de_vente"] = soup.find('span', {'class': 'current-price-value'}).text.strip()
    except AttributeError:
        pass

    try:
        article_data["Disponibilité_produit"] = soup.find('span', {'id': 'availability_message'}).text.strip()
    except AttributeError:
        pass

    try:
        article_data["Reference"] = soup.find('div', {'class': 'product-reference rb-tag-cate'}).span.text.strip()
    except AttributeError:
        pass

    try:
        article_data["Niveau"] = soup.find('section', {'class': 'product-features'}).dl.dd.find_next().find_next().find_next().find_next().text.strip()
    except AttributeError:
        pass

    try :      
        article_data["Année_fabrication"] = soup.find_all('div',{'class':'product-description'})[1].find_all('p',text=re.compile('Année'))[0].text.strip()
    except IndexError :
            pass 

    try:
        article_data["Prix_neuf"] = soup.find('p', text=re.compile('Prix neuf')).text.strip()
    except AttributeError:
        pass

    try:
        article_data["Type_produit"] = soup.find('section', {'class': 'product-features'}).find_all("dd")[6].text.strip()
    except (AttributeError, IndexError):
        pass

    try:
        article_data["Type"] = soup.find('section', {'class': 'product-features'}).find_all("dd")[0].text.strip()
    except (AttributeError, IndexError):
        pass

    try:
        article_data["Longueur_ski"] = soup.find('div', {'class': 'product-variants js-product-variants'}).find_all("span")[9].text.strip()

    except AttributeError:

        pass
    
    return article_data

# Function to scrape all pages and create a DataFrame
def infos_articles_allpages():
    all_data = []
    pages = get_all_pages()
    for onepage in pages [:1]:  # limited to one page for the test 
        product_urls = extract_product_urls(onepage)
        for url in product_urls : #[:10]:  # Limited to 10 products par page for the test
            data = infos_articles_onepage(url)
            if data:
                all_data.append(data)
            print(f"Scraping {url}")
    
    # Create a DataFrame from the collected data
    df = pd.DataFrame(all_data)
    return df

# Scrape the information and create the DataFrame
df = infos_articles_allpages()
print(df)

# Save the DataFrame to a CSV file"
df.to_csv("données_ski.csv", index=False)


  article_data["Année_fabrication"] = soup.find_all('div',{'class':'product-description'})[1].find_all('p',text=re.compile('Année'))[0].text.strip()#.find_all.text#.dl.dd.find_next().find_next().find_next().find_next().text#find_next_siblings()#.dd[3].text#.span#.strip()
  article_data["Prix_neuf"] = soup.find('p', text=re.compile('Prix neuf')).text.strip()


Scraping https://freeglisse.com/fr/ski-occasion-junior-loisir/11445-508748-ski-occasion-junior-rossignol-star-wars-fixations.html#/3-etat_du_materiel-qualite_a/846-taille_skis-104_cm


In [2]:
len(df)

24

In [4]:
df

Unnamed: 0,Reference,Nom,Prix_de_vente,Année_fabrication,Prix_neuf,Longueur_ski,Type_produit,Type,Niveau,Disponibilité_produit
0,11445_a44,Ski occasion junior Rossignol Star Wars + fixa...,"49,00 €",Année 2018,Prix neuf : 170 €,104 cm,Ski occasion junior loisir,Piste,Débutant,Disponible
1,12453_a40,Ski occasion junior Rossignol Hero J + fixations,"59,00 €",Année: 2017,,100 cm,,Piste,Loisir sport,Disponible
2,16575_g35_marron,Ski occasion Rossignol Experience 76 Ci LTD + ...,"145,00 €",Année : 2019,,146 cm,,All mountain,Loisir,Disponible
3,18084_a50,Ski occasion junior toutes marques à 29 € + Fi...,"29,00 €",,,68 cm,,Piste,Loisir,Disponible
4,1795_a60,Ski occasion junior toutes marques à 19 € + Fi...,"19,00 €",,,68 cm,,Piste,Loisir,Disponible
5,11512_b13,Ski occasion junior Rossignol fun Girl + fixat...,"55,00 €",Année : 2014,Prix neuf : 149 €,100 cm,,Piste,Loisir,Disponible
6,18502_g26,Ski occasion Atomic Redster XT + fixations,"159,00 €",Année : 2019,,156 cm,,Piste,Loisir sport,Disponible
7,18372_i34,Ski occasion Volkl Deacon 7.4 + fixations,"149,00 €",Année : 2020,,149 cm,,Piste,Loisir sport,Disponible
8,14324_a14,Ski occasion junior Rossignol Hero Junior Pro ...,"89,00 €",Année : 2019,Prix neuf : 259 €,110 cm,Ski occasion junior performance,Piste,Performant,Disponible
9,16466_p16_bleurouge,Ski occasion Rossignol React 6 Compact + fixat...,"119,00 €",Année: 2019,,149 cm,Ski occasion adulte performance,Piste,Loisir sport,Disponible
