In [11]:
import asyncio
import re
import pandas as pd
from pyppeteer import launch # type: ignore

In [None]:
async def scrape_page(page, page_number):
    url = f"https://www.kinopoisk.ru/lists/movies/top250/?page={page_number}"
    print(f"Scraping: {url}")
    await page.goto(url)

    await page.waitForSelector('[data-test-id="movie-list-item"]')

    movie_elements = await page.querySelectorAll('[data-test-id="movie-list-item"]')

    movies_data = []
    for movie in movie_elements:
        title_elem = await movie.querySelector('.styles_mainTitle__IFQyZ')
        title = await (await title_elem.getProperty('textContent')).jsonValue() if title_elem else 'N/A'
        title = title.strip()

        secondary_elem = await movie.querySelector('.desktop-list-main-info_secondaryText__M_aus')
        secondary_text = await (
            await secondary_elem.getProperty('textContent')).jsonValue() if secondary_elem else 'N/A'
        secondary_text = secondary_text.strip()

        match = re.search(r'\b(19|20)\d{2}\b', secondary_text)
        year = match.group(0) if match else 'N/A'

        rating_elem = await movie.querySelector('.styles_kinopoiskValuePositive__7AAZG')
        rating = await (await rating_elem.getProperty('textContent')).jsonValue() if rating_elem else 'N/A'
        rating = rating.strip()

        additional_info_elems = await movie.querySelectorAll('.desktop-list-main-info_additionalInfo__Hqzof')
        country = genre = director = 'N/A'
        
        if additional_info_elems:
            text1 = await (await additional_info_elems[0].getProperty('textContent')).jsonValue()
            text1 = text1.strip()
            if "Режиссёр:" in text1:
                left, director_text = text1.split("Режиссёр:", 1)
                director = director_text.strip()
                if "•" in left:
                    country, genre = [part.strip() for part in left.split("•", 1)]
                else:
                    country = left.strip()
                    genre = 'N/A'
            else:
                if "•" in text1:
                    country, genre = [part.strip() for part in text1.split("•", 1)]
                else:
                    country = text1.strip()
                    genre = 'N/A'
        

        movies_data.append({
            'Название': title,
            'Год': year,
            'Рейтинг': rating,
            'Страна': country,
            'Жанр': genre,
            'Режиссёр': director
        })

    return movies_data

In [13]:
async def scraper():
    browser = await launch({
        "headless": False,
        "executablePath": r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"
    })
    page = await browser.newPage()
    
    all_movies = []
    for page_number in range(1, 6):
        movies_on_page = await scrape_page(page, page_number)
        all_movies.extend(movies_on_page)

    await browser.close()

    df = pd.DataFrame(all_movies)
    df["Название"] = df["Название"].astype(str)
    df["Год"] = df["Год"].astype(int)
    df["Рейтинг"] = df["Рейтинг"].astype(float)
    df["Страна"] = df["Страна"].astype(str)
    df["Жанр"] = df["Жанр"].astype(str)
    df["Режиссёр"] = df["Режиссёр"].astype(str)

    df.to_csv("kinopoisk250.csv", sep='\t', index=False, header=True)
    return df

In [14]:
df = await scraper()

Scraping: https://www.kinopoisk.ru/lists/movies/top250/?page=1
Scraping: https://www.kinopoisk.ru/lists/movies/top250/?page=2
Scraping: https://www.kinopoisk.ru/lists/movies/top250/?page=3
Scraping: https://www.kinopoisk.ru/lists/movies/top250/?page=4
Scraping: https://www.kinopoisk.ru/lists/movies/top250/?page=5


In [15]:
print(df)

                       Название   Год  Рейтинг          Страна        Жанр  \
0                           1+1  2011      8.4         Франция       драма   
1                  Интерстеллар  2014      8.3             США  фантастика   
2             Побег из Шоушенка  1994      8.2             США       драма   
3              Остров проклятых  2009      8.1             США     триллер   
4                  Зеленая миля  1999      8.1             США       драма   
..                          ...   ...      ...             ...         ...   
245             Сплетение судеб  2023      7.2         Франция       драма   
246               Летят журавли  1957      7.2            СССР     военный   
247                     Мементо  2000      7.2             США     триллер   
248  Мальчик в полосатой пижаме  2008      7.2  Великобритания       драма   
249         Ford против Ferrari  2019      7.2             США   биография   

              Режиссёр  
0         Оливье Накаш  
1      Кристо