In [28]:
import requests
from bs4 import BeautifulSoup
import re
import json

In [29]:
URL_PAGE2 = "https://kim.fspot.org/cours/page2.html"
URL_PAGE3 = "https://kim.fspot.org/cours/page3.html"

def get_prices_from_url(url):
    s = BeautifulSoup(requests.get(url).content.decode("utf-8"))
    prices={}
    for f in s.findAll('h2'):
        idx = s.findAll('h2').index(f)
        price={'price':s.findAll('span', attrs={'class': 'pricing-table-price'})[idx].text.split()[0],
              'storage':s.findAll('ul', attrs={'class': 'pricing-table-list'})[idx].find(string=re.compile("storage")).split()[0],
              'databases':int(s.findAll('ul', attrs={'class': 'pricing-table-list'})[idx].find(string=re.compile("database")).split()[0])}
        prices[f.text]=price
    return prices


In [30]:
prices = get_prices_from_url(URL_PAGE2)
prices

{'Personal': {'price': '$5', 'storage': '1GB', 'databases': 1},
 'Small Business': {'price': '$25', 'storage': '10GB', 'databases': 5},
 'Enterprise': {'price': '$45', 'storage': '100GB', 'databases': 25}}

In [31]:
prices = get_prices_from_url(URL_PAGE3)
prices

{'Personal': {'price': '$5', 'storage': '1GB', 'databases': 1},
 'Small Business': {'price': '$25', 'storage': '10GB', 'databases': 5},
 'Enterprise': {'price': '$45', 'storage': '100GB', 'databases': 25},
 'Privilege': {'price': '$99', 'storage': '1TB', 'databases': 100}}

In [32]:
URL = "https://www.beerwulf.com/fr-fr/p/bieres/melusine-bio.33"

def extract_beer_infos(url):
    s = BeautifulSoup(requests.get(url).content.decode("utf-8"))
    infos = {
        'name': s.find('div', class_='product-detail-info-row mobile-header-details').h1.text,
        'note': float(s.find('span', class_="label-stars").text[1:-1].replace(',','.')),
        'price': float(s.find('span', attrs={'class':"price"}).text.split()[0].replace(',', '.')),
        'volume': int(s.find('div', attrs={'class': "product-subtext"}).text.split()[-2])
    }
    return infos

In [33]:
infos = extract_beer_infos(URL)
infos

{'name': 'Mélusine Bio', 'note': 3.29, 'price': 38.99, 'volume': 33}

In [34]:
URL_BEERLIST_FRANCE = "https://www.beerwulf.com/fr-FR/api/search/searchProducts?country=France&container=Bouteille"

In [35]:
def extract_beer_list_infos(url):
    
    JsonData = json.loads(requests.get(url).content)['items']
    # Collecter les pages de bières à partir du JSON
    beer_pages = ["https://www.beerwulf.com" + JsonData[i]['contentReference'] for i in range(len(JsonData))]
    
    # Sequential version (slow):
    beers = [extract_beer_infos(url) for url in beer_pages]

    # Parallel version (faster):
    # beers = []
    
    return beers

In [36]:

beers = extract_beer_list_infos(URL_BEERLIST_FRANCE)
beers

[{'name': 'Gallia East IPA', 'note': 3.8, 'price': 42.99, 'volume': 33},
 {'name': 'La Lager Sans Gluten de Vézelay',
  'note': 2.78,
  'price': 38.99,
  'volume': 25},
 {'name': 'Brasserie De Sutter Brin de Folie',
  'note': 3.42,
  'price': 44.99,
  'volume': 33},
 {'name': 'La Cristal IPA du Mont Blanc',
  'note': 3.44,
  'price': 44.99,
  'volume': 33},
 {'name': 'Mélusine Bio', 'note': 3.29, 'price': 38.99, 'volume': 33},
 {'name': 'La Parisienne Le Titi Parisien',
  'note': 3.5,
  'price': 38.99,
  'volume': 33},
 {'name': 'Gallia Session IPA', 'note': 3.61, 'price': 42.99, 'volume': 33},
 {'name': 'Ninkasi Brut IPA', 'note': 3.36, 'price': 44.99, 'volume': 33},
 {'name': 'Pietra', 'note': 3.17, 'price': 38.99, 'volume': 33},
 {'name': 'Desperados', 'note': 3.15, 'price': 35.99, 'volume': 33},
 {'name': 'Gallia West IPA', 'note': 3.56, 'price': 42.99, 'volume': 33}]

In [37]:
import unittest

class Lesson4Tests(unittest.TestCase):
    def test_01_get_prices_from_url_page2(self):
        prices = get_prices_from_url(URL_PAGE2)
        # We should have found 3 products:
        self.assertIsInstance(prices, dict)
        self.assertEqual(len(prices), 3)
        self.assertIn('Personal', prices)
        self.assertIn('Small Business', prices)
        self.assertIn('Enterprise', prices)
        
        personal = prices['Personal']
        self.assertIn('price', personal)
        self.assertIn('storage', personal)
        self.assertIn('databases', personal)
        self.assertEqual(personal['price'], '$5')
        self.assertEqual(personal['storage'], '1GB')
        self.assertEqual(personal['databases'], 1)
        
    def test_02_get_prices_from_url_page3(self):
        prices = get_prices_from_url(URL_PAGE3)
        self.assertIsInstance(prices, dict)
        self.assertEqual(len(prices), 4)
        self.assertEqual(
            prices['Privilege'],
            {'databases': 100, 'price': '$99', 'storage': '1TB'}
        )
    
    def test_03_extract_beer_list_infos(self):
        infos = extract_beer_list_infos(URL_BEERLIST_FRANCE)
        # We should have 11 French beers:
        self.assertIsInstance(infos, list)
        self.assertEqual(len(infos), 11)
        # All of them are 25cl or 33cl:
        for beer in infos:
            self.assertIn(beer['volume'], [25, 33])

            
def run_tests():
    test_suite = unittest.makeSuite(Lesson4Tests)
    runner = unittest.TextTestRunner(verbosity=2)
    runner.run(test_suite)

In [38]:

if __name__ == '__main__':
    run_tests()

test_01_get_prices_from_url_page2 (__main__.Lesson4Tests) ... ok
test_02_get_prices_from_url_page3 (__main__.Lesson4Tests) ... ok
test_03_extract_beer_list_infos (__main__.Lesson4Tests) ... ok

----------------------------------------------------------------------
Ran 3 tests in 2.974s

OK
