In [1]:
# libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# Define the URL to scrape
base_url = 'https://www.naturabuy.fr/Munitions-Balles-22LR-cat-884.html'
page_number = 1

# Create an empty list to store the scraped data
data = []

# Loop through the first two pages of the website
while page_number <= 2:

    # Construct the URL for the current page
    url = base_url + f'?PAGE={page_number}'

    # Make a GET request to the URL
    response = requests.get(url)

    # Parse the HTML content of the response using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all the item cards on the page
    cards = soup.find_all('a', class_='itemcard')

    # Loop through the item cards and scrape the information
    for card in cards[:5]:

        # Get the href attribute of the item card and construct the URL for the product page
        product_url = 'https://www.naturabuy.fr/' + card['href'].lstrip('/')

        # Make a GET request to the product page
        response = requests.get(product_url)

        # Parse the HTML content of the response using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Scrape the product name from the title tag
        try:
            product_name = soup.find('title').text.strip()
        except:
            product_name = 'N/A'
            
        ##### 
        ## Scrape manufacturers name
        #try:
        #    manufacturer_element = soup.select_one("html body div#contall div#body_container div#body_container_in div#PAGE div#Columns div#mainProduct div#productWrapper div#blocGallery div#productCriteres div.critere:contains('Marque') div.criterevalue")
        #    manufacturer = manufacturer_element.text.strip()
        #except:
        #    manufacturer = 'N/A'
        #####
        
        # Scrape the manufacturer
        try:
            manufacturer_element = soup.select_one("html:-soup-contains('Marque :') body div#contall div#body_container div#body_container_in div#PAGE div#Columns div#mainProduct div#productWrapper div#blocGallery div#productCriteres div.critere div.criterevalue")
            if manufacturer_element:
                manufacturer = manufacturer_element.text.strip().replace("Marque :", "")
            else:
                manufacturer = "N/A"
        except:
            manufacturer = "N/A"


        # Scrape whether the item is new or used
        try:
            item_is_new = soup.find('span', id='availabilityCondition').text.strip()
        except:
            item_is_new = 'N/A'

        # Scrape the price
        try:
            price = soup.find('div', id='priceContainer').text.strip()
        except:
            price = 'N/A'

        # Scrape the shipping cost
        try:
            shipping_cost = soup.find('div', id='shippingsContainer').find('b').text.strip()
        except:
            shipping_cost = 'N/A'

        # Add the scraped data to the list
        data.append({
            'ProductName': product_name,
            'Manufacturer': manufacturer,
            'IsNew': item_is_new,
            'Price': price,
            'ShippingCost': shipping_cost
        })

        # Wait for a short time to avoid getting blocked
        time.sleep(1)

    # Increment the page number
    page_number += 1

# Convert the list of dictionaries to a pandas DataFrame and save it to a CSV file
df = pd.DataFrame(data)

df

Unnamed: 0,ProductName,Manufacturer,IsNew,Price,ShippingCost
0,Lot de 2 boîtes de 22lr (2) - Munitions - Ball...,Armscor,Occasion,"20,00 €","6,50 €"
1,Cartouches 22 LR Aguila Solid Point Subsonique...,Aguila,Neuf,"10,80 €","8,00 €"
2,"CCI 22LR 45GR LRN SEMI-AUTO ""QUIET 22"" boite 5...",CCI,Neuf,"99,00 €","17,55 €"
3,( 22Lr Win Super X Lhp Copper)Munitions Super-...,Winchester,Neuf,"25,07 €","6,90 €"
4,500 MUNITIONS CCI STANDARD 22LR - Munitions - ...,CCI,Neuf,"73,00 €","15,00 €"
5,Balles Winchester T22 - Cal. 22LR - 40 gr LRN ...,Winchester,Neuf,"98,00 €","9,90 €"
6,Balles Remington Golden Pointe Creuse High Vel...,Remington,Neuf,"629,00 €","9,90 €"
7,Balles Remington Golden Pointe Creuse High Vel...,Remington,Neuf,"1 258,00 €","9,90 €"
8,Balles Remington Golden Bullet Pointe Cuivre H...,Remington,Neuf,"144,00 €","9,90 €"
9,Balles Remington Golden Bullet Pointe Cuivre H...,Remington,Neuf,"288,00 €","9,90 €"


In [3]:
# print data types of columns before inference
print(df.dtypes)

# infer data types of columns
df = df.infer_objects()

# print data types of columns after inference
print(df.dtypes)

ProductName     object
Manufacturer    object
IsNew           object
Price           object
ShippingCost    object
dtype: object
ProductName     object
Manufacturer    object
IsNew           object
Price           object
ShippingCost    object
dtype: object


In [4]:
# change dtypes of columns for easier manipulation
df['ProductName'] = df['ProductName'].astype(str)
df['Manufacturer'] = df['Manufacturer'].astype(str)
df['IsNew'] = df['IsNew'].astype(str)
df['Price'] = df['Price'].astype(str)
df['ShippingCost'] = df['ShippingCost'].astype(str)

# change formatting of prices, remove currency, set as float
df['Price'] = df['Price'].str.replace(',', '.').str.extract('(\d+\.\d+)', expand=False).astype(float)
df['ShippingCost'] = df['ShippingCost'].str.replace(',', '.').str.extract('(\d+\.\d+)', expand=False).fillna(0).astype(float)

df['TotalPrice'] = df['Price'] + df['ShippingCost']

# Convert TotalPrice column back to string with euro symbol and commas
#df['TotalPrice'] = df['TotalPrice'].apply(lambda x: '{:,.2f} €'.format(x))

# Save updated DataFrame to CSV
df.to_csv('naturabuy_scraped_data.csv', index=False)

In [5]:
df

Unnamed: 0,ProductName,Manufacturer,IsNew,Price,ShippingCost,TotalPrice
0,Lot de 2 boîtes de 22lr (2) - Munitions - Ball...,Armscor,Occasion,20.0,6.5,26.5
1,Cartouches 22 LR Aguila Solid Point Subsonique...,Aguila,Neuf,10.8,8.0,18.8
2,"CCI 22LR 45GR LRN SEMI-AUTO ""QUIET 22"" boite 5...",CCI,Neuf,99.0,17.55,116.55
3,( 22Lr Win Super X Lhp Copper)Munitions Super-...,Winchester,Neuf,25.07,6.9,31.97
4,500 MUNITIONS CCI STANDARD 22LR - Munitions - ...,CCI,Neuf,73.0,15.0,88.0
5,Balles Winchester T22 - Cal. 22LR - 40 gr LRN ...,Winchester,Neuf,98.0,9.9,107.9
6,Balles Remington Golden Pointe Creuse High Vel...,Remington,Neuf,629.0,9.9,638.9
7,Balles Remington Golden Pointe Creuse High Vel...,Remington,Neuf,258.0,9.9,267.9
8,Balles Remington Golden Bullet Pointe Cuivre H...,Remington,Neuf,144.0,9.9,153.9
9,Balles Remington Golden Bullet Pointe Cuivre H...,Remington,Neuf,288.0,9.9,297.9


In [None]:
df.columns

In [None]:
# tomorrow
# add scrape target - qtty of boxes of ammo, or amount of rounds. Box of 50, box of 500 or 50 rounds, etc