In [1]:
import pandas as pd
import requests
from lxml import html

In [2]:
authors = {
  "Достоевский Ф. М.": 9150,
  "Роллинс Дж.": 59396,
  "Фицджеральд Ф. С.": 28727,
  "Глуховский Д. А.": 53427,
  "Стругацкий А. Н.": 26268,
  "Лукьяненко С. В.": 16626,
  "Фрай М.": 28927,
  "Хантер Э.": 37969,
  "Роулинг Дж. К.": 104832
}

In [3]:
#getting book ids

book_urls = []

for author in authors:
    url = "https://www.moscowbooks.ru/catalog/author/" + str(authors[author]) + '/'
    print("Checking: {} ({})".format(author, url))
    res = requests.get(url)
    tree = html.fromstring(res.text)
    
    pages_number = 0
    for i in tree.xpath('.//a[@class="pager__text" and @data-ajaxpage]'):
        pages_number += 1
    if pages_number == 0: #if no buttons for pages
        pages_number = 1
    print(pages_number)
    
    #first page is also main page
    for book in tree.xpath('.//a[@href and @target="_blank" and @title]'):
        if book.attrib['href'].startswith('/book/'):
            book_urls.append(book.attrib['href'])
    print('parsed page', 1)
        
    for i in range(1, pages_number): #from 1 because already parsed 1st page
        pageurl = "https://www.moscowbooks.ru/catalog/author/" + str(authors[author]) + '/?page=' + str(i+1)
        res = requests.get(url)
        tree = html.fromstring(res.text)
        for book in tree.xpath('.//a[@href and @target="_blank" and @title]'):
            if book.attrib['href'].startswith('/book/'):
                book_urls.append(book.attrib['href'])
        print('parsed page', i+1)
    print('\n')
print('All authors checked')

Checking: Достоевский Ф. М. (https://www.moscowbooks.ru/catalog/author/9150/)
4
parsed page 1
parsed page 2
parsed page 3
parsed page 4


Checking: Роллинс Дж. (https://www.moscowbooks.ru/catalog/author/59396/)
1
parsed page 1


Checking: Фицджеральд Ф. С. (https://www.moscowbooks.ru/catalog/author/28727/)
2
parsed page 1
parsed page 2


Checking: Глуховский Д. А. (https://www.moscowbooks.ru/catalog/author/53427/)
1
parsed page 1


Checking: Стругацкий А. Н. (https://www.moscowbooks.ru/catalog/author/26268/)
2
parsed page 1
parsed page 2


Checking: Лукьяненко С. В. (https://www.moscowbooks.ru/catalog/author/16626/)
1
parsed page 1


Checking: Фрай М. (https://www.moscowbooks.ru/catalog/author/28927/)
1
parsed page 1


Checking: Хантер Э. (https://www.moscowbooks.ru/catalog/author/37969/)
2
parsed page 1
parsed page 2


Checking: Роулинг Дж. К. (https://www.moscowbooks.ru/catalog/author/104832/)
2
parsed page 1
parsed page 2


All authors checked


In [4]:
len(book_urls)

281

In [5]:
import functools

def counter(func):
    @functools.wraps(func)
    def wrapper(*args, **argv):
        wrapper.calls += 1
        if wrapper.calls % 10 == 0:
            print(wrapper.calls, 'books were processed.')
        result = func(*args, **argv)
        return result
    wrapper.calls = 0
    return wrapper

@counter
def extract_book(book):
    if book.startswith('/book/'):
        bookdict = {}
        bookdict['Код товара'] = int(book[6:-1])
        
        res = requests.get('https://www.moscowbooks.ru' + book)
        tree = html.fromstring(res.text)
                
        bookdict['Автор'] = tree.xpath('.//div[@class="page-header__author"]')[0].xpath('.//a')[0].text_content()
        
        bookdict['Название'] = tree.xpath('.//meta[@property="og:title" and @content]')[0].attrib['content']
        
        bookdict['Обложка'] = 'https://moscowbooks.ru' + tree.xpath(
            './/meta[@property="og:image" and @content]')[0].attrib['content']
        
        bookdict['Рейтинг'] = int(tree.xpath(
            './/div[@class="book___rating-stars rating-stars rating-stars_lg" and @data-rate]')[0].attrib['data-rate'])
        
        bookdict['Стикеры'] = ', '.join(sticker.text_content().strip()
                                for sticker in tree.xpath('.//div[@class="book__stickers stickers stickers_lg"]'))
        
        bookdict['Наличие'] = len(tree.xpath('.//span[@class="instock1"]')) > 0 #[0].text_content() == '\xa0В наличии')
        
        bookdict['Цена'] = tree.xpath('.//div[@class="book__price"]')[0].text_content().strip()
        
        big_desc = tree.xpath('.//div[@class="book__description collapsed js-book-description"]')[0]
        anno = big_desc.xpath('.//b')[0].text_content()
        desc = big_desc.text_content().strip()[len(anno):]
        desc = desc[:desc.find('Читать дальше...')-len('Читать дальше...')]
        bookdict['Описание'] = desc.strip()
        
        for detail in tree.xpath('.//div[@class="book__details-left"]')[0]:
            dt = detail.xpath('.//dt')
            bookdict[dt[0].text_content().strip()] = dt[1].text_content().strip()
        for detail in tree.xpath('.//div[@class="book__details-right"]')[0]:
            dt = detail.xpath('.//dt')
            bookdict[dt[0].text_content().strip()] = dt[1].text_content().strip()
        
        return bookdict
    
    else:
        print('incorrect book id format')
        return

In [6]:
books = list(map(extract_book, book_urls))

10 books were processed.
20 books were processed.
30 books were processed.
40 books were processed.
50 books were processed.
60 books were processed.
70 books were processed.
80 books were processed.
90 books were processed.
100 books were processed.
110 books were processed.
120 books were processed.
130 books were processed.
140 books were processed.
150 books were processed.
160 books were processed.
170 books were processed.
180 books were processed.
190 books were processed.
200 books were processed.
210 books were processed.
220 books were processed.
230 books were processed.
240 books were processed.
250 books were processed.
260 books were processed.
270 books were processed.
280 books were processed.


In [11]:
cols = []
for book in books:
    for attr in book:
        if attr not in cols:
            cols.append(attr)
df = pd.DataFrame(books, columns=cols)
df

Unnamed: 0,Код товара,Автор,Название,Обложка,Рейтинг,Стикеры,Наличие,Цена,Описание,Издательство:,...,Тиснение:,Бумага:,Обрез:,Иллюстрации:,Язык оригинала:,Перевод:,Производитель:,Год производства:,Место производства:,Иллюстраторы:
0,1012089,Достоевский Ф. М.,Преступление и наказание,https://moscowbooks.ru/image/book/675/w259/i67...,0,,True,136 руб.,"""Преступление и наказание"" — высочайший образе...",Эксмо,...,,,,,,,,,,
1,1004205,Достоевский Ф. М.,Игрок,https://moscowbooks.ru/image/book/668/w259/i66...,0,,False,220 руб.,"В романе ""Игрок"" писатель изображает прекрасно...",Искателькнига,...,,,,,,,,,,
2,1000435,Достоевский Ф. М.,Чужая жена и муж под кроватью,https://moscowbooks.ru/image/book/664/w259/i66...,0,,True,144 руб.,Очень необычный сборник. Очень непривычный Дос...,АСТ,...,,,,,,,,,,
3,997191,Достоевский Ф. М.,Полное собрание романов в двух томах. В 2 книгах,https://moscowbooks.ru/image/book/661/w259/i66...,0,,True,1 880 руб.,"В первый том вошли романы ""Бедные люди"", ""Игро...",АЛЬФА-КНИГА,...,,,,,,,,,,
4,995794,Достоевский Ф. М.,Преступление и наказание,https://moscowbooks.ru/image/book/659/w259/i65...,0,,True,12 654 руб.,"""Преступление и наказание"" — одно из самых изв...",,...,Углубленное тиснение (красочное плоскоуглублен...,Офсетная,Рисованный,С иллюстрациями,,,,,,
5,994923,Достоевский Ф. М.,Село Степанчиково и его обитатели. Из записок ...,https://moscowbooks.ru/image/book/658/w259/i65...,0,,True,778 руб.,"В своей повести ""Село Степанчиково и его обита...",Издательский дом Мещерякова,...,,,,,,,,,,
6,992455,Достоевский Ф. М.,Собрание сочинений. В 10 томах,https://moscowbooks.ru/image/book/656/w259/i65...,0,,True,6 400 руб.,Федор Михайлович Достоевский (1821—1881) — вел...,Книжный Клуб Книговек,...,,,,,,,,,,
7,989763,Достоевский Ф. М.,Малая проза. Книга 2. Скверный анекдот. Крокод...,https://moscowbooks.ru/image/book/654/w259/i65...,0,,True,4 880 руб.,"Настоящее издание ""малой прозы"" Ф. М. Достоевс...",,...,,,,,,,,,,
8,988509,Достоевский Ф. М.,Братья Карамазовы,https://moscowbooks.ru/image/book/653/w259/i65...,0,,True,6 318 руб.,"«Братья Карамазовы» (1879–1880), последний, са...",,...,Углубленное тиснение (красочное плоскоуглублен...,Офсетная,Рисованный,,,,,,,
9,987119,Достоевский Ф. М.,Преступление и наказание,https://moscowbooks.ru/image/book/652/w259/i65...,0,,True,630 руб.,"""Преступление и наказание"" — одно из самых изв...",СЗКЭО,...,,,,,,,,,,


In [12]:
df.to_csv('hw_3.csv', encoding='utf-8', index=False)