In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from google.colab import files

In [4]:
#requesting html code from Heureka with paddleboards
response = requests.get('https://paddleboardy.heureka.cz/f:24551:41990821/')
soup = BeautifulSoup(response.text, 'html.parser')

#finding all the product blocks in html based on h3 tag
products = soup.find_all('h3', {'class' : 'c-product__title'})

#finding all the titles and related links in html 
titles = [product.find('a').text for product in products]
hrefs = [product.find('a')['href'] for product in products]
all_products = []

#iterating through titles and hrefs to get into the detail of each product and find in which store is selling it
for href, title in zip(hrefs, titles):
  response2 = requests.get(href)
  soup2 = BeautifulSoup(response2.text, 'html.parser')
  
  #picking from the html the shops which are selling the product
  shops = soup2.find_all('div', {'class' : 'c-offer__inner'})

  offers_list = []
  #finding the price of the product in the shop and transferring into clear int variable
  for shop in shops:
    price = [letter.replace('\xa0', '') for letter in (shop.find('span', {'class':'c-offer__price u-bold u-delta'}).text[:-2])]

    s = [str(integer) for integer in price]
    a_string = "".join(s)
    res = int(a_string)
    
    try:
      #creating and offer with title, price and shop
      offer = {
          'date' : datetime.now().strftime('%Y-%m-%d'), 
          'title' : title,
          'price' : res,
          'shop' : shop.find('img', {'class':'c-offer__shop-logo e-image-with-fallback'})['alt'],
          'shop_link' : shop.find('a', {'class': 'c-offer__shop-logo-cont'})['href'],
          'shipping_cost' : shop.find('div', {'class':'c-offer__delivery-availability'})
          }
      offers_list.append(offer)

    except Exception as e:
      offers_list.append({})

  #list of all products
  all_products = all_products + offers_list

#transferring list of all products into dataframe
df = pd.DataFrame(all_products) 

#replacing None for zero and changing data types
df = df.replace({'shipping_cost' : {None : '0'}})
df = df.dropna()
df = df.astype({
    'shipping_cost' : 'int32',
    'price' : 'int32'
})

#sum of price and shipping cost as a total price
df['total_price'] = df['price'] + df['shipping_cost']

#downloading the data in csv format
df.to_csv('paddleboard_scrap.csv', encoding = 'utf-8-sig')
files.download('paddleboard_scrap.csv')

#use the code below for continuous update of the csv file

#df.to_csv('paddleboard_scrap.csv', mode = 'a', header = False)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>