In [None]:
from bs4 import BeautifulSoup
import requests as req
import pandas as pd
from decimal import *

In [196]:
def parse(source: str, source_link=False) -> pd.DataFrame:

    if source_link:

        web = req.get(source)
        soup = BeautifulSoup(web.text, 'html.parser')

    else:

        soup = BeautifulSoup(source)

    df = pd.DataFrame([], columns=['name', 'price', 'old_price', 'sizes_available'])

    for i, el in enumerate(soup.findAll('div', class_='offer_item')):

        name = el.find('div', class_='name').a.text
        price = el.find('div', class_='price')

        if el.find('div', class_='sizes_available') is not None:
            sizes = [x.text for x in el.find('div', class_='sizes_available').find('div').findAll('a')]
            for j, size in enumerate(sizes):
                sizes[j] = ''.join([x for x in size if x.isalnum()])
        else:
            sizes = None

        if price.find('span', class_='old-price') is not None:

            spec_price = price.find('span', class_='spec-price').text
            old_price = price.find('span', class_='old-price').text

            df.loc[i] = [name,
                        int(''.join([x for x in spec_price if x.isdigit()])),
                        int(''.join([x for x in old_price if x.isdigit()])),
                        sizes]
        else:
            price_ = price.find('span').text
            df.loc[i] = [name,
                        int(''.join([x for x in price_ if x.isdigit()])),
                        None,
                        sizes]

    return df

In [187]:
link = 'https://21-shop.ru/catalog/muzhskoe/odezhda/bryuki/?PAGEN_1=3&more=1'
# link = 'https://21-shop.ru/catalog/muzhskoe/aksessuary/ochki/'
df = parse(link, True)
df
print(df.dtypes)

name               object
price               int64
old_price          object
sizes_available    object
dtype: object


In [None]:
!pip install dominate

Collecting dominate
  Downloading dominate-2.9.1-py2.py3-none-any.whl (29 kB)
Installing collected packages: dominate
Successfully installed dominate-2.9.1


In [169]:
import unittest
from dominate import document
from dominate.tags import *
import numpy as np
from pandas.testing import *

In [197]:
def generate_html(df: pd.DataFrame) -> str:
  with document(title='Shop') as doc:
    for index in range(len(df)):

      name = df.loc[index]['name']
      spec_price = df.loc[index]['price']
      old_price = df.loc[index]['old_price']
      sizes_available = df.loc[index]['sizes_available']

      with div(_class='offer_item'):
        with div(_class='name'):
          a(name)
        with div(_class='price'):
          if old_price is not None:
            span(old_price, _class='old-price')
            span(spec_price, _class='spec-price')
          else:
            span(spec_price)
        if sizes_available is not None:
          with div(_class='sizes_available'):
            with div():
              for i in sizes_available:
                a(i)
  return doc.render()

class TestParser(unittest.TestCase):
  def test_parser(self):

    columns = ['name', 'price', 'old_price', 'sizes_available']
    expected = pd.DataFrame([], columns=columns)

    expected.loc[0] = pd.Series(['Брюки', 1111, 2222, ['XX', 'SX']], index=columns)
    expected.loc[1] = pd.Series(['Юбки', 3333, None, ['XX', 'SX', 'S', 'M']], index=columns)
    expected.loc[2] = pd.Series(['Очки', 4444, None, None], index=columns)
    expected[['price']] = expected[['price']].astype('int')

    html_page = generate_html(expected)
    actual = parse(html_page)

    assert_frame_equal(actual, expected)

unittest.main(argv=[''], verbosity=2, exit=False)

test_parser (__main__.TestParser) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.054s

OK


<unittest.main.TestProgram at 0x7d2fd118ac80>