In [None]:
import requests
from bs4 import BeautifulSoup
import time
import json
from urllib.parse import urljoin
import re
import pandas as pd
import logging

In [None]:
class ReviewParser:
    def __init__(self, base_url):
        self.base_url = base_url
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        })
    
    def get_page(self, url, retries=3, delay=2):
        for attempt in range(retries):
            try:
                response = self.session.get(url)
                
                if response.status_code == 521:
                    logging.info(f"–°–µ—Ä–≤–µ—Ä –≤–µ—Ä–Ω—É–ª –æ—à–∏–±–∫—É 521 –¥–ª—è {url}. –ü–æ–ø—ã—Ç–∫–∞ {attempt + 1}/{retries}")
                    if attempt < retries - 1:
                        time.sleep(delay)
                        continue
                    else:
                        response.raise_for_status()
                
                response.raise_for_status()
                return response.text
                
            except requests.RequestException as e:
                logging.error(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ–ª—É—á–µ–Ω–∏–∏ —Å—Ç—Ä–∞–Ω–∏—Ü—ã {url} (–ø–æ–ø—ã—Ç–∫–∞ {attempt + 1}/{retries}): {e}")
                if attempt < retries - 1:
                    time.sleep(delay)
                else:
                    return None
        return None
    
    def parse_reviews_list(self, html):
        logging.info("–ü–∞—Ä—Å–∏–Ω–≥ —Å–ø–∏—Å–∫–∞ –æ—Ç–∑—ã–≤–æ–≤ –Ω–∞ –≥–ª–∞–≤–Ω–æ–π —Å—Ç—Ä–∞–Ω–∏—Ü–µ")
        soup = BeautifulSoup(html, 'html.parser')
        reviews = []
        
        # –ù–∞—Ö–æ–¥–∏–º –≤—Å–µ –±–ª–æ–∫–∏ —Å –æ—Ç–∑—ã–≤–∞–º–∏
        review_blocks = soup.find_all('div', class_='smTeaser')
        
        for block in review_blocks:
            review_data = self.parse_review_preview(block)
            if review_data:
                reviews.append(review_data)
        
        return reviews
    
    def extract_datetime_from_text(self, date_text):
        logging.info("–ò–∑–≤–ª–µ—á–µ–Ω–∏–µ –¥–∞—Ç—ã –∏ –≤—Ä–µ–º–µ–Ω–∏ –∏–∑ —Ç–µ–∫—Å—Ç–∞ (–ø—Ä–∏–º–µ—Ä: '14 –û–∫—Ç—è–±—Ä—å, 2025 - 15:53')")
        if not date_text:
            return ""
        
        try:
            # –ü—Ä–∏–º–µ—Ä: "14 –û–∫—Ç—è–±—Ä—å, 2025 - 15:53" -> "14.10.25 15:53"
            # –°–ª–æ–≤–∞—Ä—å –¥–ª—è –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –º–µ—Å—è—Ü–µ–≤
            months = {
                '—è–Ω–≤–∞—Ä—å': '01', '—Ñ–µ–≤—Ä–∞–ª—å': '02', '–º–∞—Ä—Ç': '03', '–∞–ø—Ä–µ–ª—å': '04',
                '–º–∞–π': '05', '–∏—é–Ω—å': '06', '–∏—é–ª—å': '07', '–∞–≤–≥—É—Å—Ç': '08',
                '—Å–µ–Ω—Ç—è–±—Ä—å': '09', '–æ–∫—Ç—è–±—Ä—å': '10', '–Ω–æ—è–±—Ä—å': '11', '–¥–µ–∫–∞–±—Ä—å': '12'
            }
            
            # –†–∞–∑–±–∏—Ä–∞–µ–º —Å—Ç—Ä–æ–∫—É
            parts = date_text.split(' - ')
            if len(parts) == 2:
                date_part = parts[0].strip()  # "14 –û–∫—Ç—è–±—Ä—å, 2025"
                time_part = parts[1].strip()  # "15:53"
                
                # –†–∞–∑–±–∏—Ä–∞–µ–º –¥–∞—Ç—É
                date_parts = date_part.split()
                if len(date_parts) >= 3:
                    day = date_parts[0].zfill(2)  # "14"
                    month_ru = date_parts[1].rstrip(',').lower()  # "–æ–∫—Ç—è–±—Ä—å"
                    year = date_parts[2]  # "2025"
                    
                    # –ü—Ä–µ–æ–±—Ä–∞–∑—É–µ–º –º–µ—Å—è—Ü
                    month = months.get(month_ru, '01')
                    
                    # –°–æ–∫—Ä–∞—â–∞–µ–º –≥–æ–¥ –¥–æ –¥–≤—É—Ö —Ü–∏—Ñ—Ä
                    year_short = year[2:] if len(year) == 4 else year
                    
                    return f"{day}.{month}.{year_short} {time_part}"
            
            return date_text  # –ï—Å–ª–∏ –Ω–µ —É–¥–∞–ª–æ—Å—å —Ä–∞—Å–ø–∞—Ä—Å–∏—Ç—å, –≤–æ–∑–≤—Ä–∞—â–∞–µ–º –∏—Å—Ö–æ–¥–Ω—ã–π —Ç–µ–∫—Å—Ç
            
        except Exception as e:
            logging.warning(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–∏ –¥–∞—Ç—ã '{date_text}': {e}")
            return date_text

    def parse_review_preview(self, block):
        logging.info("–ü–∞—Ä—Å–∏–Ω–≥ –ø—Ä–µ–≤—å—é –æ—Ç–∑—ã–≤–∞")
        try:
            # –û—Å–Ω–æ–≤–Ω–∞—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –∏–∑ –ø—Ä–µ–≤—å—é
            product_name = block.find('div', class_='productName').get_text(strip=True)
            author_name = block.find('div', class_='authorName').get_text(strip=True)
            
            # –†–µ–π—Ç–∏–Ω–≥
            rating_elem = block.find('div', class_='starsRating')
            rating = self.extract_rating(rating_elem)
            
            # –î–∞—Ç–∞ –∏ –≤—Ä–µ–º—è
            date_created = block.find('span', class_='date-created').get_text(strip=True)
            time_created = block.find('span', class_='time-created').get_text(strip=True)
            
            # –°–æ–∑–¥–∞–µ–º –æ–±—â–µ–µ –ø–æ–ª–µ –¥–∞—Ç—ã –∏ –≤—Ä–µ–º–µ–Ω–∏ –ø—É–±–ª–∏–∫–∞—Ü–∏–∏
            published_datetime = f"{date_created} {time_created}"
            
            # –ó–∞–≥–æ–ª–æ–≤–æ–∫ –∏ —Ç–µ–∫—Å—Ç
            title = block.find('div', class_='reviewTitle').get_text(strip=True)
            teaser = block.find('span', class_='reviewTeaserText').get_text(strip=True)
            
            # –°—Å—ã–ª–∫–∞ –Ω–∞ –ø–æ–ª–Ω—ã–π –æ—Ç–∑—ã–≤
            review_link = block.find('a', class_='reviewTextSnippet')['href']
            full_review_url = urljoin(self.base_url, review_link)
            
            # –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–æ–º–º–µ–Ω—Ç–∞—Ä–∏–µ–≤
            comments_elem = block.find('div', class_='comments')
            comments_count = comments_elem.get_text(strip=True) if comments_elem else "0"
            
            # –§–æ—Ç–æ–≥—Ä–∞—Ñ–∏–∏
            photo_count = block.get('data-photos-count', '0')
            
            return {
                'product_name': product_name,
                'author': author_name,
                'rating': rating,
                'date': date_created,
                'time': time_created,
                'published_datetime': published_datetime,
                'title': title,
                'teaser_text': teaser,
                'full_review_url': full_review_url,
                'comments_count': comments_count,
                'photos_count': photo_count,
                'scraped_at': time.strftime('%Y-%m-%d %H:%M:%S')
            }
            
        except Exception as e:
            logging.error(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–∞—Ä—Å–∏–Ω–≥–µ –ø—Ä–µ–≤—å—é –æ—Ç–∑—ã–≤–∞: {e}")
            return None
    
    def parse_full_review(self, url):
        logging.info("–ü–∞—Ä—Å–∏–Ω–≥ –ø–æ–ª–Ω–æ–π –≤–µ—Ä—Å–∏–∏ –æ—Ç–∑—ã–≤–∞")
        html = self.get_page(url)
        if not html:
            return None
            
        soup = BeautifulSoup(html, 'html.parser')
        review_block = soup.find('div', class_='reviewBlock')
        
        if not review_block:
            return None
            
        try:
            # –û—Å–Ω–æ–≤–Ω–∞—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è
            author_elem = review_block.find('span', itemprop='name')
            author = author_elem.get_text(strip=True) if author_elem else ""
            
            # –†–µ–π—Ç–∏–Ω–≥
            rating_meta = review_block.find('meta', itemprop='ratingValue')
            rating = rating_meta['content'] if rating_meta else ""
            
            # –î–∞—Ç–∞ –ø—É–±–ª–∏–∫–∞—Ü–∏–∏
            date_meta = review_block.find('meta', itemprop='datePublished')
            published_date = date_meta['content'] if date_meta else ""
            
            # –î–∞—Ç–∞ –∏ –≤—Ä–µ–º—è –∏–∑ —Ç–µ–∫—Å—Ç–∞ (–¥–ª—è published_datetime)
            date_text_elem = review_block.find('span', class_='dtreviewed')
            date_text = date_text_elem.get_text(strip=True) if date_text_elem else ""
            
            # –ò–∑–≤–ª–µ–∫–∞–µ–º –¥–∞—Ç—É –∏ –≤—Ä–µ–º—è –∏–∑ —Ç–µ–∫—Å—Ç–∞ (–ø—Ä–∏–º–µ—Ä: "14 –û–∫—Ç—è–±—Ä—å, 2025 - 15:53")
            published_datetime = self.extract_datetime_from_text(date_text)
            
            # –ó–∞–≥–æ–ª–æ–≤–æ–∫
            title_elem = review_block.find('h2', class_='reviewTitle')
            title = title_elem.get_text(strip=True) if title_elem else ""
            
            # –°—Ç–æ–∏–º–æ—Å—Ç—å
            price_elem = review_block.find('div', class_='item-data')
            price = price_elem.get_text(strip=True) if price_elem else ""
            
            # –ü–æ–ª–Ω—ã–π —Ç–µ–∫—Å—Ç –æ—Ç–∑—ã–≤–∞
            review_body = review_block.find('div', itemprop='reviewBody')
            full_text = self.clean_review_text(review_body) if review_body else ""
            
            # –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è (—Å–æ—Ö—Ä–∞–Ω—è–µ–º –∫–∞–∫ —Å—Ç—Ä–æ–∫—É —Å —Ä–∞–∑–¥–µ–ª–∏—Ç–µ–ª–µ–º –¥–ª—è CSV)
            images = []
            img_elements = review_block.find_all('img')
            for img in img_elements:
                src = img.get('src') or img.get('data-original')
                if src and 'user-images' in src:
                    images.append(urljoin(self.base_url, src))
            
            # –í–µ—Ä–¥–∏–∫—Ç (—Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç/–Ω–µ —Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç)
            verdict_elem = review_block.find('span', class_='verdict')
            verdict = verdict_elem.get_text(strip=True) if verdict_elem else ""
            
            return {
                'author': author,
                'rating': rating,
                'published_date': published_date,
                'published_datetime': published_datetime,
                'title': title,
                'price': price,
                'full_text': full_text,
                'images': ' | '.join(images),  # –°–æ—Ö—Ä–∞–Ω—è–µ–º –∫–∞–∫ —Å—Ç—Ä–æ–∫—É –¥–ª—è CSV
                'verdict': verdict,
                'review_url': url
            }
            
        except Exception as e:
            logging.error(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–∞—Ä—Å–∏–Ω–≥–µ –ø–æ–ª–Ω–æ–≥–æ –æ—Ç–∑—ã–≤–∞ {url}: {e}")
            return None
    
    def extract_rating(self, rating_elem):
        logging.info("–ò–∑–≤–ª–µ—á–µ–Ω–∏–µ —Ä–µ–π—Ç–∏–Ω–≥–∞ –∏–∑ –∑–≤–µ–∑–¥")
        if not rating_elem:
            return 0
        
        # –ò—â–µ–º –∫–ª–∞—Å—Å—ã —Å —Ä–µ–π—Ç–∏–Ω–≥–æ–º
        rating_classes = rating_elem.get('class', [])
        for cls in rating_classes:
            if 'fivestarWidgetStatic-' in cls:
                try:
                    return int(cls.split('-')[-1])
                except ValueError:
                    continue
        
        # –ê–ª—å—Ç–µ—Ä–Ω–∞—Ç–∏–≤–Ω—ã–π —Å–ø–æ—Å–æ–±: —Å—á–∏—Ç–∞–µ–º –∑–∞–ø–æ–ª–Ω–µ–Ω–Ω—ã–µ –∑–≤–µ–∑–¥—ã
        stars = rating_elem.find_all('div', class_='star')
        filled_stars = len([star for star in stars if star.find('div', class_='on')])
        return filled_stars
    
    def clean_review_text(self, review_body):
        logging.info("–û—á–∏—Å—Ç–∫–∞ —Ç–µ–∫—Å—Ç–∞ –æ—Ç HTML —Ç–µ–≥–æ–≤ –∏ –ª–∏—à–Ω–∏—Ö –ø—Ä–æ–±–µ–ª–æ–≤")

        if not review_body:
            return ""
        
        # –£–¥–∞–ª—è–µ–º –≤—Å–µ —Ç–µ–≥–∏, –∫—Ä–æ–º–µ –ø–µ—Ä–µ–Ω–æ—Å–æ–≤ —Å—Ç—Ä–æ–∫
        for br in review_body.find_all("br"):
            br.replace_with("\n")
        
        text = review_body.get_text(separator='\n')
        
        # –û—á–∏—â–∞–µ–º –æ—Ç –ª–∏—à–Ω–∏—Ö –ø—Ä–æ–±–µ–ª–æ–≤ –∏ –ø–µ—Ä–µ–Ω–æ—Å–æ–≤
        lines = [line.strip() for line in text.split('\n')]
        lines = [line for line in lines if line]
        
        return '\n'.join(lines)
    
    def get_reviews_from_page(self, page_url):
        logging.info("–ü–æ–ª—É—á–∏—Ç—å –æ—Ç–∑—ã–≤—ã —Å –æ–¥–Ω–æ–π —Å—Ç—Ä–∞–Ω–∏—Ü—ã")
        logging.info(f"–ü–∞—Ä—Å–∏–Ω–≥ —Å—Ç—Ä–∞–Ω–∏—Ü—ã: {page_url}")
        
        html = self.get_page(page_url)
        if not html:
            logging.error(f"–ù–µ —É–¥–∞–ª–æ—Å—å –∑–∞–≥—Ä—É–∑–∏—Ç—å —Å—Ç—Ä–∞–Ω–∏—Ü—É: {page_url}")
            return []
            
        # –ü–∞—Ä—Å–∏–º –æ—Ç–∑—ã–≤—ã —Å —Ç–µ–∫—É—â–µ–π —Å—Ç—Ä–∞–Ω–∏—Ü—ã
        page_reviews = self.parse_reviews_list(html)
        logging.info(f"–ù–∞–π–¥–µ–Ω–æ –æ—Ç–∑—ã–≤–æ–≤ –Ω–∞ —Å—Ç—Ä–∞–Ω–∏—Ü–µ: {len(page_reviews)}")
        
        all_reviews = []
        
        # –î–ª—è –∫–∞–∂–¥–æ–≥–æ –æ—Ç–∑—ã–≤–∞ –ø–æ–ª—É—á–∞–µ–º –ø–æ–ª–Ω—É—é –≤–µ—Ä—Å–∏—é
        for i, review_preview in enumerate(page_reviews, 1):
            logging.info(f"–ü–∞—Ä—Å–∏–Ω–≥ –ø–æ–ª–Ω–æ–≥–æ –æ—Ç–∑—ã–≤–∞ {i}/{len(page_reviews)}: {review_preview['title'][:50]}...")
            
            full_review = self.parse_full_review(review_preview['full_review_url'])
            if full_review:
                # –û–±—ä–µ–¥–∏–Ω—è–µ–º –¥–∞–Ω–Ω—ã–µ –∏–∑ –ø—Ä–µ–≤—å—é –∏ –ø–æ–ª–Ω–æ–≥–æ –æ—Ç–∑—ã–≤–∞
                complete_review = {**review_preview, **full_review}
                all_reviews.append(complete_review)
            else:
                # –ï—Å–ª–∏ –Ω–µ —É–¥–∞–ª–æ—Å—å –ø–æ–ª—É—á–∏—Ç—å –ø–æ–ª–Ω—ã–π –æ—Ç–∑—ã–≤, —Å–æ—Ö—Ä–∞–Ω—è–µ–º —Ö–æ—Ç—è –±—ã –ø—Ä–µ–≤—å—é
                logging.error(f"–ù–µ —É–¥–∞–ª–æ—Å—å –ø–æ–ª—É—á–∏—Ç—å –ø–æ–ª–Ω—ã–π –æ—Ç–∑—ã–≤, —Å–æ—Ö—Ä–∞–Ω—è–µ–º –ø—Ä–µ–≤—å—é: {review_preview['title'][:50]}...")
                # –î–æ–±–∞–≤–ª—è–µ–º –æ–±—è–∑–∞—Ç–µ–ª—å–Ω—ã–µ –ø–æ–ª—è –∏–∑ –ø–æ–ª–Ω–æ–≥–æ –æ—Ç–∑—ã–≤–∞ —Å –ø—É—Å—Ç—ã–º–∏ –∑–Ω–∞—á–µ–Ω–∏—è–º–∏
                review_preview['published_date'] = ""
                review_preview['review_url'] = review_preview['full_review_url']
                review_preview['price'] = ""
                review_preview['full_text'] = ""
                review_preview['images'] = ""
                review_preview['verdict'] = ""
                all_reviews.append(review_preview)
            
            # –ó–∞–¥–µ—Ä–∂–∫–∞ –º–µ–∂–¥—É –∑–∞–ø—Ä–æ—Å–∞–º–∏
            time.sleep(5)
        
        return all_reviews
    
    def save_to_json(self, reviews, filename):
        logging.info("–°–æ—Ö—Ä–∞–Ω–∏—Ç—å –æ—Ç–∑—ã–≤—ã –≤ JSON —Ñ–∞–π–ª")

        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(reviews, f, ensure_ascii=False, indent=2)
        logging.info(f"–û—Ç–∑—ã–≤—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ {filename}")
    
    def save_to_csv(self, reviews, filename):
        logging.info("–°–æ—Ö—Ä–∞–Ω–∏—Ç—å –æ—Ç–∑—ã–≤—ã –≤ CSV —Ñ–∞–π–ª")
        import csv
        
        if not reviews:
            logging.error("–ù–µ—Ç –¥–∞–Ω–Ω—ã—Ö –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è –≤ CSV")
            return
        
        # –ü—Ä–µ–¥–æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω—ã–π —Å–ø–∏—Å–æ–∫ –≤—Å–µ—Ö –≤–æ–∑–º–æ–∂–Ω—ã—Ö –ø–æ–ª–µ–π
        fieldnames = [
            'product_name', 'author', 'rating', 'date', 'time', 'published_datetime',
            'title', 'teaser_text', 'full_review_url', 'comments_count', 'photos_count',
            'scraped_at', 'published_date', 'review_url', 'price', 'full_text', 
            'images', 'verdict'
        ]
        
        with open(filename, 'w', encoding='utf-8', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
            writer.writeheader()
            writer.writerows(reviews)
        logging.info(f"–û—Ç–∑—ã–≤—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ {filename}")

# –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
if __name__ == "__main__":
    # –ë–∞–∑–æ–≤—ã–π URL —Å–∞–π—Ç–∞
    BASE_URL = "https://irecommend.ru"
    
    # –°–æ–∑–¥–∞–µ–º –ø–∞—Ä—Å–µ—Ä
    parser = ReviewParser(BASE_URL)
    
    all_reviews = []
    total_pages = 20
    
    logging.info("–ù–∞—á–∏–Ω–∞–µ–º —Å–±–æ—Ä –æ—Ç–∑—ã–≤–æ–≤...")
    
    successful_pages = 0
    
    for page_num in range(0, total_pages):
        # –§–æ—Ä–º–∏—Ä—É–µ–º URL —Å—Ç—Ä–∞–Ω–∏—Ü—ã
        if page_num == 0:
            page_url = "https://irecommend.ru/catalog/reviews/939-13393"
        else:
            page_url = f"https://irecommend.ru/catalog/reviews/939-13393?page={page_num}"
        
        logging.info(f"\n=== –°—Ç—Ä–∞–Ω–∏—Ü–∞ {page_num + 1}/{total_pages} ===")
        
        # –ü–æ–ª—É—á–∞–µ–º –æ—Ç–∑—ã–≤—ã —Å–æ —Å—Ç—Ä–∞–Ω–∏—Ü—ã
        page_reviews = parser.get_reviews_from_page(page_url)
        
        if page_reviews:
            all_reviews.extend(page_reviews)
            successful_pages += 1
            logging.info(f"–£—Å–ø–µ—à–Ω–æ —Å–æ–±—Ä–∞–Ω–æ –æ—Ç–∑—ã–≤–æ–≤ —Å–æ —Å—Ç—Ä–∞–Ω–∏—Ü—ã: {len(page_reviews)}")
        else:
            logging.inferroro(f"–ù–µ —É–¥–∞–ª–æ—Å—å —Å–æ–±—Ä–∞—Ç—å –æ—Ç–∑—ã–≤—ã —Å–æ —Å—Ç—Ä–∞–Ω–∏—Ü—ã {page_num + 1}")
        
        print(f"–í—Å–µ–≥–æ —Å–æ–±—Ä–∞–Ω–æ –æ—Ç–∑—ã–≤–æ–≤: {len(all_reviews)}")
        
        # –°–æ—Ö—Ä–∞–Ω—è–µ–º –ø—Ä–æ–º–µ–∂—É—Ç–æ—á–Ω—ã–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã –ø–æ—Å–ª–µ –∫–∞–∂–¥–æ–π —Å—Ç—Ä–∞–Ω–∏—Ü—ã
        if page_reviews:
            parser.save_to_json(all_reviews, f'reviews_page_{page_num + 1}.json')
        
        # –ó–∞–¥–µ—Ä–∂–∫–∞ –º–µ–∂–¥—É —Å—Ç—Ä–∞–Ω–∏—Ü–∞–º–∏
        time.sleep(5)
    
    logging.info(f"\n=== –°–±–æ—Ä –∑–∞–≤–µ—Ä—à–µ–Ω ===")
    logging.info(f"–û–±—Ä–∞–±–æ—Ç–∞–Ω–æ —Å—Ç—Ä–∞–Ω–∏—Ü: {successful_pages}/{total_pages}")
    logging.info(f"–í—Å–µ–≥–æ —Å–æ–±—Ä–∞–Ω–æ –æ—Ç–∑—ã–≤–æ–≤: {len(all_reviews)}")
    
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ñ–∏–Ω–∞–ª—å–Ω—ã–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã
    if all_reviews:
        parser.save_to_json(all_reviews, 'reviews_final.json')
        parser.save_to_csv(all_reviews, 'reviews_final.csv')
        logging.info("–§–∏–Ω–∞–ª—å–Ω—ã–µ —Ñ–∞–π–ª—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã: reviews_final.json –∏ reviews_final.csv")
    else:
        logging.error("–ù–µ —É–¥–∞–ª–æ—Å—å —Å–æ–±—Ä–∞—Ç—å –æ—Ç–∑—ã–≤—ã")

–ù–∞—á–∏–Ω–∞–µ–º —Å–±–æ—Ä –æ—Ç–∑—ã–≤–æ–≤...

=== –°—Ç—Ä–∞–Ω–∏—Ü–∞ 1/20 ===
–ü–∞—Ä—Å–∏–Ω–≥ —Å—Ç—Ä–∞–Ω–∏—Ü—ã: https://irecommend.ru/catalog/reviews/939-13393
–°–µ—Ä–≤–µ—Ä –≤–µ—Ä–Ω—É–ª –æ—à–∏–±–∫—É 521 –¥–ª—è https://irecommend.ru/catalog/reviews/939-13393. –ü–æ–ø—ã—Ç–∫–∞ 1/3
–°–µ—Ä–≤–µ—Ä –≤–µ—Ä–Ω—É–ª –æ—à–∏–±–∫—É 521 –¥–ª—è https://irecommend.ru/catalog/reviews/939-13393. –ü–æ–ø—ã—Ç–∫–∞ 2/3
–°–µ—Ä–≤–µ—Ä –≤–µ—Ä–Ω—É–ª –æ—à–∏–±–∫—É 521 –¥–ª—è https://irecommend.ru/catalog/reviews/939-13393. –ü–æ–ø—ã—Ç–∫–∞ 3/3
–û—à–∏–±–∫–∞ –ø—Ä–∏ –ø–æ–ª—É—á–µ–Ω–∏–∏ —Å—Ç—Ä–∞–Ω–∏—Ü—ã https://irecommend.ru/catalog/reviews/939-13393 (–ø–æ–ø—ã—Ç–∫–∞ 3/3): 521 Server Error:  for url: https://irecommend.ru/catalog/reviews/939-13393
–ù–µ —É–¥–∞–ª–æ—Å—å –∑–∞–≥—Ä—É–∑–∏—Ç—å —Å—Ç—Ä–∞–Ω–∏—Ü—É: https://irecommend.ru/catalog/reviews/939-13393
–ù–µ —É–¥–∞–ª–æ—Å—å —Å–æ–±—Ä–∞—Ç—å –æ—Ç–∑—ã–≤—ã —Å–æ —Å—Ç—Ä–∞–Ω–∏—Ü—ã 1
–í—Å–µ–≥–æ —Å–æ–±—Ä–∞–Ω–æ –æ—Ç–∑—ã–≤–æ–≤: 0

=== –°—Ç—Ä–∞–Ω–∏—Ü–∞ 2/20 ===
–ü–∞—Ä—Å–∏–Ω–≥ —Å—Ç—Ä–∞–Ω–∏—Ü—ã: htt

KeyboardInterrupt: 

In [39]:
df = pd.read_json('reviews_page_2.json')
df.head(8)

Unnamed: 0,product_name,author,rating,date,time,published_datetime,title,teaser_text,full_review_url,comments_count,photos_count,scraped_at,published_date,review_url,price,full_text,images,verdict
0,–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ –û–û–û –§—Ä—ç—à —ç–Ω–¥ –°–Ω—ç–∫ –ö–æ–º–ø–∞–Ω–∏ G...,–ú–∞—Å—è–Ω—è –ú–∞–Ω—è—à–∞,4,2025-07-10,22:47,07.10.2025 22:47,"–ó–∞ —Ç–∞–∫—É—é —Ü–µ–Ω—É –∏ —Ç–∞–∫–æ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Å—á–∏—Ç–∞—é, —á—Ç–æ –≤...",–í—Å–µ–º –ø—Ä–∏–≤–µ—Ç–∏–∫–∏-–ø–∏—Å—Ç–æ–ª–µ—Ç–∏–∫–∏ . –°–µ–≥–æ–¥–Ω—è –≤ —Å–≤–æ–µ–º –æ...,https://irecommend.ru/content/za-takuyu-tsenu-...,1,5,2025-10-15 13:00:21,,https://irecommend.ru/content/za-takuyu-tsenu-...,,,,
1,–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ Lays –†–∞–∫–∏ –≤ –ø—Ä—è–Ω–æ–º —Å–æ—É—Å–µ,Piupiue,3,2025-07-10,21:17,07.10.25 21:17,–¢–∞–∫–æ–µ —Å–µ–±–µ,"–ù–∞ –ø—Ä–æ–±—É –≤–∑—è–ª, –ø–æ–≤—ë–ª—Å—è –Ω–∞ –º–æ—Ä—Å–∫–æ–π –º–æ—Ç–∏–≤. –û–∂–∏–¥–∞...",https://irecommend.ru/content/takoe-sebe-n1100...,1,3,2025-10-15 13:00:21,2025-10-07T20:17:14+02:00,https://irecommend.ru/content/takoe-sebe-n1100...,,"–ù–∞ –ø—Ä–æ–±—É –≤–∑—è–ª, –ø–æ–≤—ë–ª—Å—è –Ω–∞ –º–æ—Ä—Å–∫–æ–π –º–æ—Ç–∏–≤. –û–∂–∏–¥–∞...",https://irecommend.ru/sites/default/files/imag...,–Ω–µ —Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç
2,–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ Lays –†—ë–±—Ä—ã—à–∫–∏ –≥—Ä–∏–ª—å,summer_166,4,2025-07-10,20:10,07.10.2025 20:10,–ü—Ä–∏–∫–æ–ª—å–Ω—ã–π –≤–∫—É—Å —Ä–µ–±—Ä—ã—à–µ–∫ —Å–æ —Å–ª–∞–¥–∏–Ω–∫–æ–π –≤–æ –≤–∫—É—Å–µ...,–î–æ–±—Ä—ã–π –¥–µ–Ω—å –≤—Å–µ–º!–•–æ—á—É –ø–æ–¥–µ–ª–∏—Ç—å—Å—è –æ—Ç–∑—ã–≤–æ–º –Ω–∞ –ß–∏...,https://irecommend.ru/content/prikolnyi-vkus-r...,3,3,2025-10-15 13:00:21,,https://irecommend.ru/content/prikolnyi-vkus-r...,,,,
3,–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ –ß–µ–±–æ –ò–∑ –Ω–∞—Ç—É—Ä–∞–ª—å–Ω–æ–≥–æ –∫–∞—Ä—Ç–æ—Ñ...,VikAny,5,2025-06-10,22:59,06.10.25 22:59,"–ï—Å–ª–∏ –∑–∞—Ö–æ—á–µ—Ç—Å—è –ø–æ—Ö—Ä—É—Å—Ç–µ—Ç—å, —Ç–æ –≤—ã–±–µ—Ä—É - –ß–µ–±–æ‚ù§Ô∏è‚Äç...",–î–æ–±—Ä–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏ —Å—É—Ç–æ–∫! –Ø –Ω–µ –ª—é–±–ª—é —á–∏–ø—Å—ã.,https://irecommend.ru/content/esli-zakhochetsy...,6,14,2025-10-15 13:00:21,2025-10-06T21:59:11+02:00,https://irecommend.ru/content/esli-zakhochetsy...,–Ω–µ—Å–∫–æ–ª—å–∫–æ —Ä–∞–∑,–î–æ–±—Ä–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏ —Å—É—Ç–æ–∫! ‚òïÔ∏è\n–Ø –Ω–µ –ª—é–±–ª—é —á–∏–ø—Å—ã. –ï...,https://irecommend.ru/sites/default/files/imag...,—Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç
4,"–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ Lays ""–°—ã—Ä""",Ruti_Root,5,2025-06-10,21:38,06.10.2025 21:38,"üßÄ–†–∞–Ω—å—à–µ –±—ã–ª–∏ —Å–∞–º—ã–º–∏ –Ω–µ–ª—é–±–∏–º—ã–º–∏, –Ω–æ –Ω–µ —Å–µ–π—á–∞—Å! ...",–í—Å–µ–º –¥–æ–±—Ä–æ–≥–æ –≤—Ä–µ–º–µ–Ω–∏ —Å—É—Ç–æ–∫! ¬† –í –¥–∞–Ω–Ω–æ–º –æ—Ç–∑—ã–≤–µ ...,https://irecommend.ru/content/ranshe-byli-samy...,5,7,2025-10-15 13:00:21,,https://irecommend.ru/content/ranshe-byli-samy...,,,,
5,–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ –†–æ–±–ª–æ–∫—Å–µ—Ä—ã –°–æ –≤–∫—É—Å–æ–º —Å–º–µ—Ç–∞–Ω...,Grafusha,4,2025-05-10,17:33,05.10.25 17:33,"–†–æ–±–ª–æ–∫—Å —Ç–∞–º, —Ä–æ–±–ª–æ–∫—Å —Ç—É—Ç, —Ä–æ–±–ª–æ–∫—Å –≤–µ–∑–¥–µüòµ–∑–∞–º–∞–Ω—á...","–í—Å–µ–º –ø—Ä–∏–≤–µ—Ç –†–æ–±–ª–æ–∫—Å –∑–∞—Ö–≤–∞—Ç–∏–ª –¥–µ—Ç—Å–∫–∏–π –º–∏—Ä, –≤—Å–µ ...",https://irecommend.ru/content/robloks-tam-robl...,78,3,2025-10-15 13:00:21,2025-10-05T16:33:25+02:00,https://irecommend.ru/content/robloks-tam-robl...,–æ–¥–∏–Ω —Ä–∞–∑,"–í—Å–µ–º –ø—Ä–∏–≤–µ—ÇüéÄ\n–†–æ–±–ª–æ–∫—Å –∑–∞—Ö–≤–∞—Ç–∏–ª –¥–µ—Ç—Å–∫–∏–π –º–∏—Ä, –≤—Å...",https://irecommend.ru/sites/default/files/imag...,—Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç
6,–ß–∏–ø—Å—ã –∫–∞—Ä—Ç–æ—Ñ–µ–ª—å–Ω—ã–µ Lorenz Crunchips X-cut –ß–∏–º–∏...,–ê—Ä—Å–∫—Ä–∏–≥–∏—Ü–∏–æ–Ω–∏–µ—Ü,5,2025-05-10,03:48,05.10.25 03:48,–í–∫—É—Å –∏–∑ 00-—Ö: —è—Ä–∫–∏–π —É–∫—Ä–æ–ø –∏ —á—ë—Ä–Ω—ã–π –ø–µ—Ä–µ—Ü,–í —ç—Ç–æ–º –æ—Ç–∑—ã–≤–µ –ø–æ–¥–µ–ª—é—Å—å —Å–≤–æ–∏–º–∏ –≤–ø–µ—á–∞—Ç–ª–µ–Ω–∏—è–º–∏ –æ—Ç...,https://irecommend.ru/content/vkus-iz-00-kh-ya...,1,5,2025-10-15 13:00:21,2025-10-05T02:48:29+02:00,https://irecommend.ru/content/vkus-iz-00-kh-ya...,149 —Ä—É–±–ª–µ–π,–í —ç—Ç–æ–º –æ—Ç–∑—ã–≤–µ –ø–æ–¥–µ–ª—é—Å—å —Å–≤–æ–∏–º–∏ –≤–ø–µ—á–∞—Ç–ª–µ–Ω–∏—è–º–∏ –æ—Ç...,https://irecommend.ru/sites/default/files/imag...,—Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç
7,–ß–∏–ø—Å—ã Binggrae –∫—Ä–∞–±–æ–≤—ã–µ,V1ku1ya.7,2,2025-04-10,15:05,04.10.25 15:05,"–Ø—Ä–∫–∞—è —É–ø–∞–∫–æ–≤–∫–∏ –ø—à–µ–Ω–∏—á–Ω—ã—Ö —á–∏–ø—Å–æ–≤, –∞ —è—Ä–∫–æ—Å—Ç–∏ –≤–æ ...","–î–æ–±—Ä–æ–≥–æ –¥–µ–Ω—å–∫–∞, –∫—Ç–æ –∑–∞–≥–ª—è–Ω—É–ª –∫–æ –º–Ω–µ –Ω–∞ –æ—á–µ—Ä–µ–¥–Ω...",https://irecommend.ru/content/yarkaya-upakovki...,14,11,2025-10-15 13:00:21,2025-10-04T14:05:24+02:00,https://irecommend.ru/content/yarkaya-upakovki...,–æ–¥–∏–Ω —Ä–∞–∑,"–î–æ–±—Ä–æ–≥–æ –¥–µ–Ω—å–∫–∞, –∫—Ç–æ –∑–∞–≥–ª—è–Ω—É–ª –∫–æ –º–Ω–µ –Ω–∞ –æ—á–µ—Ä–µ–¥–Ω...",https://irecommend.ru/sites/default/files/imag...,–Ω–µ —Ä–µ–∫–æ–º–µ–Ω–¥—É–µ—Ç


In [40]:
print(df['full_text'][1])

–ù–∞ –ø—Ä–æ–±—É –≤–∑—è–ª, –ø–æ–≤—ë–ª—Å—è –Ω–∞ –º–æ—Ä—Å–∫–æ–π –º–æ—Ç–∏–≤. –û–∂–∏–¥–∞–Ω–∏—è –Ω–µ –æ–ø—Ä–∞–≤–¥–∞–ª–∏—Å—å
–°–æ—Å—Ç–∞–≤ —Ç–∞–∫–æ–π –∂–µ –ø–∞—Ä—à–∏–≤—ã–π –∫–∞–∫ –∏ –æ–±—ã—á–Ω–æ, –Ω–æ —Ç—É—Ç —É–¥–∏–≤–ª–µ–Ω–∏—è –Ω–µ –∏—Å–ø—ã—Ç–∞–ª.
–í–º–µ—Å—Ç–æ –Ω–∞—Å—ã—â–µ–Ω–Ω–æ–≥–æ –≤–∫—É—Å–∞ —Ä–∞–∫–æ–≤ –≤ —Å–ø–µ—Ü–∏—è—Ö –ø–æ–ª—É—á–∏–ª –∫–∞–∫–æ–π-—Ç–æ –ø–æ—Ä–æ—à–∫–æ–≤—ã–π –∫–æ–Ω—Ü–µ–Ω—Ç—Ä–∞—Ç —Å –≤—ã—Ä–∞–∂–µ–Ω–Ω—ã–º –∞—Ä–æ–º–∞—Ç–∏–∑–∞—Ç–æ—Ä–æ–º –º–æ—Ä–µ–ø—Ä–æ–¥—É–∫—Ç–æ–≤. –í–∫—É—Å –æ–¥–Ω–æ–±–æ–∫–∏–π, –Ω–µ —è—Ä–∫–∏–π, –Ω–µ—Ç –º–Ω–æ–≥–æ—Å–ª–æ–π–Ω–æ—Å—Ç–∏, —á—Ç–æ –æ–±–µ—â–∞–µ—Ç "–ø—Ä—è–Ω—ã–π —Å–æ—É—Å". –ü—Ä–æ—Å—Ç–æ —Å–æ–ª–µ–Ω–æ—Å—Ç—å –∏ –æ—Ç—Ç–µ–Ω–æ–∫ –∞—Ä–æ–º–∞—Ç–∏–∑–∞—Ç–æ—Ä–∞ –¥–ª—è —Å—É—Ö–∞—Ä–∏–∫–æ–≤.
–í –æ–±—â–µ–º —Å –Ω–∞—Å—Ç–æ—è—â–∏–º –≤–∫—É—Å–æ–º –º–∞–ª–æ –æ–±—â–µ–≥–æ, –±–æ–ª–µ–µ –º–µ–Ω–µ–µ –∫–∞—á–µ—Å—Ç–≤–æ —Å–∞–º–æ–π –∫–∞—Ä—Ç–æ—à–∫–∏ –µ—â—ë —Å—Ç–∞—Ä–∞—é—Ç—Å—è —Å–æ–±–ª—é–¥–∞—Ç—å, –Ω–æ —Å –∫–∞–∂–¥—ã–º –Ω–æ–≤—ã–º –æ–ø—Ä–æ–±–æ–≤–∞–Ω–Ω—ã–º –≤–∫—É—Å–æ–º –¥–æ–≤–µ—Ä–∏–µ –∫ —ç—Ç–æ–º—É –ø—Ä–æ–∏–∑–≤