In [1]:
import re
from pyppeteer import launch # type: ignore
from random import randint

import seaborn as sns
import matplotlib as plt


In [2]:
USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_7_10) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15',
]
CAPTCHA_MARKER = "captcha"
LIST_ITEM = '[data-test-id="movie-list-item"]'

In [3]:
async def wait_captcha(page):
  print('🛑 Captcha shown')
  while CAPTCHA_MARKER in page.url:
    await page.waitForNavigation({'waitUntil': 'domcontentloaded'})
  # await page.waitForSelector(LIST_ITEM, timeout=0)
  print('✅ Captcha solved')

In [4]:
async def safe_goto(page, url):
  await page.goto(url, {'waitUntil': 'domcontentloaded'})
  if CAPTCHA_MARKER in page.url:
    await wait_captcha(page)
  # await page.waitForSelector(LIST_ITEM, timeout=0)

In [5]:
async def scrape_movie(page, page_number):
    url = f"https://www.kinopoisk.ru/lists/movies/top250/?page={page_number}"
    print(f"Scraping: {url}")
    await safe_goto(page, url)

    movie_elements = await page.querySelectorAll('[data-test-id="movie-list-item"]')

    movies_data = []
    for movie in movie_elements:
        href_elem = await movie.querySelector('.base-movie-main-info_link__K161e')
        href = await (await href_elem.getProperty('href')).jsonValue() if href_elem else 'None'
        movie_id = re.search(r'(\d{3,})', href).group(1) if href != None else 'None'


        title_elem = await movie.querySelector('[class^="styles_mainTitle__"]')
        title = await (await title_elem.getProperty('textContent')).jsonValue() if title_elem else 'None'
        title = title.strip()

        secondary_elem = await movie.querySelector('[class^="desktop-list-main-info_secondaryTitleSlot__"]')
        secondary_text = await (
            await secondary_elem.getProperty('textContent')).jsonValue() if secondary_elem else 'None'
        secondary_text = secondary_text.strip()

        match = re.search(r'\b(19|20)\d{2}\b', secondary_text)
        year = match.group(0) if match else 'None'

        rating_elem = await movie.querySelector('[class^="styles_kinopoiskValuePositive__"]')
        rating = await (await rating_elem.getProperty('textContent')).jsonValue() if rating_elem else 'None'
        rating = rating.strip()

        additional_info_elems = await movie.querySelectorAll('[class^="desktop-list-main-info_additionalInfo__"]')
        country = genre = director = None

        if additional_info_elems:
            text1 = await (await additional_info_elems[0].getProperty('textContent')).jsonValue()
            text1 = text1.strip()
            if "Режиссёр:" in text1:
                left, director_text = text1.split("Режиссёр:", 1)
                director = director_text.strip()
                if "•" in left:
                    country, genre = [part.strip() for part in left.split("•", 1)]
                else:
                    country = left.strip()
                    genre = None
            else:
                if "•" in text1:
                    country, genre = [part.strip() for part in text1.split("•", 1)]
                else:
                    country = text1.strip()
                    genre = None
        

        movies_data.append({
            'id': movie_id,
            'Название': title,
            'Год': year,
            'Рейтинг': rating,
            'Страна': country,
            'Жанр': genre,  
            'Режиссёр': director
        })

    return movies_data

In [6]:
async def getMovieVotes(browser, movieId):
    newPage = await browser.newPage()
    await newPage.setUserAgent(USER_AGENTS[randint(0, len(USER_AGENTS)-1)])
    await safe_goto(newPage, f"https://www.kinopoisk.ru/film/{movieId}/votes/")

    votes = []
    votes_elements = await newPage.querySelectorAll('tr.rating_item')
    for row in votes_elements:
        href_elem = await row.querySelector('.profile_name a[href^="/user/"]')
        href = await (await href_elem.getProperty('href')).jsonValue() if href_elem else None
        
        user_id = None
        if href:
            match = re.search(r'/user/(\d+)/', href)
            user_id = match.group(1) if match else None

        rating_elem = await row.querySelector('.comm-title table td')
        rating = await (await rating_elem.getProperty('textContent')).jsonValue() if rating_elem else None
        rating = rating.strip() if rating else None
        
        votes.append({
            'user_id': user_id,
            'rating': rating
        })
        
    await newPage.close()

    print(votes)
    return votes

In [7]:
async def getProfileVotes(browser, profileId):
    newPage = await browser.newPage()
    await newPage.setUserAgent(USER_AGENTS[randint(0, len(USER_AGENTS)-1)])
    await safe_goto(newPage, f"https://www.kinopoisk.ru/user/{profileId}/votes/list/vs/vote/perpage/10/#list")

    votes = []

    items = await newPage.querySelectorAll('.profileFilmsList .item')

    for item in items:
        href_elem = await item.querySelector('.nameRus a')
        href = await (await href_elem.getProperty('href')).jsonValue() if href_elem else None
        
        # Extract movie ID from href (handles both /film/ and /movie/ URLs)
        movie_id = None
        if href:
            match = re.search(r'/film/(\d+)/?', href)
            movie_id = match.group(1) if match else None
        
        vote_elem = await item.querySelector('.vote')
        rating = await (await vote_elem.getProperty('textContent')).jsonValue() if vote_elem else None
        rating = rating.strip() if rating else None
        
        if movie_id and rating:
            votes.append({
                'movie_id': movie_id,
                'rating': rating
            })

    await newPage.close()
    
    print(votes)
    return votes

In [8]:
async def scrapeOtherStuff(browser, movies):
    parsed_movie_ids = set(movie['id'] for movie in movies)
    
    for item in movies:
        movieVotes = await getMovieVotes(browser, item['id'])
        movieVotes = movieVotes[:5] # Limit for testing

        for movieVotesList in movieVotes:
            profileVotes = await getProfileVotes(browser, movieVotesList['user_id'])
            
            # Find new movies that need to be parsed
            new_movie_ids = []
            for profileVote in profileVotes:
                movie_id = profileVote['movie_id']
                if movie_id not in parsed_movie_ids:
                    new_movie_ids.append(movie_id)
                    parsed_movie_ids.add(movie_id)
            
            # Parse new movies
            if new_movie_ids:
                print(f"Found {len(new_movie_ids)} new movies to parse")
                for movie_id in new_movie_ids:
                    new_movie = await scrape_single_movie(browser, movie_id)
                    if new_movie:
                        movies.append(new_movie)
                        print(f"Parsed new movie: {new_movie['Название']}")

        movieUniqueIds = [movie['id'] for movie in movies]
        print(f"Total unique movies: {len(set(movieUniqueIds))}")

In [9]:
async def scrape_single_movie(browser, movie_id):
    try:
        newPage = await browser.newPage()
        await newPage.setUserAgent(USER_AGENTS[randint(0, len(USER_AGENTS)-1)])
        await safe_goto(newPage, f"https://www.kinopoisk.ru/film/{movie_id}/")
        
        # Extract movie information
        title_elem = await newPage.querySelector('[class^="styles_title__"]')
        title = await (await title_elem.getProperty('textContent')).jsonValue() if title_elem else 'None'

        # Extract year using data-test-id selector
        year_elem = await newPage.querySelector('div[data-test-id="year"] .styles_valueDark__jsGKY a')
        year = await (await year_elem.getProperty('textContent')).jsonValue() if year_elem else 'None'
        year = year.strip() if year != 'None' else 'None'
        
        # If year not found, try extracting from title format "Film (2000)"
        if year == 'None' and title and title != 'None':
            match = re.search(r'\((\d{4})\)$', title)
            year = match.group(1) if match else 'None'
            
        # Remove year from title to get clean movie name
        if title and title != 'None':
            title = re.sub(r'\s*\(\d{4}\)$', '', title).strip()
        
        # Get rating
        rating_elem = await newPage.querySelector('[class*="styles_ratingKpTop__"] [class*="styles_value__"]')
        rating = await (await rating_elem.getProperty('textContent')).jsonValue() if rating_elem else 'None'
        rating = rating.strip()
        
        # Get genre using data-test-id selector
        genre_elem = await newPage.querySelector('div[data-test-id="genres"] .styles_valueDark__jsGKY a')
        genre = await (await genre_elem.getProperty('textContent')).jsonValue() if genre_elem else 'None'
        genre = genre.strip() if genre != 'None' else 'None'
        
        # Get country using data-test-id selector
        country_elem = await newPage.querySelector('div[data-test-id="countries"] .styles_valueDark__jsGKY a')
        country = await (await country_elem.getProperty('textContent')).jsonValue() if country_elem else 'None'
        country = country.strip() if country != 'None' else 'None'
        
        # Get director using data-test-id selector
        director_elem = await newPage.querySelector('div[data-test-id="directors"] .styles_valueDark__jsGKY a')
        director = await (await director_elem.getProperty('textContent')).jsonValue() if director_elem else 'None'
        director = director.strip() if director != 'None' else 'None'
        
        await newPage.close()
        
        return {
            'id': movie_id,
            'Название': title,
            'Год': year,
            'Рейтинг': rating,
            'Страна': country,
            'Жанр': genre,
            'Режиссёр': director
        }
    except Exception as e:
        print(f"Error scraping movie {movie_id}: {e}")
        if 'newPage' in locals():
            await newPage.close()
        return None

In [10]:
async def scraper():
    browser = await launch({
        "headless": False,
        "executablePath": r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
        "args": ["--disable-blink-features=AutomationControlled"]
    })
    page = await browser.newPage()
    await page.setUserAgent(USER_AGENTS[randint(0, len(USER_AGENTS)-1)])

    all_movies = []
    for page_number in range(1, 2):
        movies_on_page = await scrape_movie(page, page_number)
        all_movies.extend(movies_on_page)

    all_movies = all_movies[:2]  # Limit for testing

    await scrapeOtherStuff(browser, all_movies)

    # newPage = await browser.newPage()
    # await newPage.setUserAgent(USER_AGENTS[randint(0, len(USER_AGENTS)-1)])
    # await getProfileVotes(newPage, 16195960)  # Example movie ID

    return
    
    all_movies = []
    for page_number in range(1, 6):
        movies_on_page = await scrape_movie(page, page_number)
        all_movies.extend(movies_on_page)

    await browser.close()

    df = pd.DataFrame(all_movies)
    df["id"] = df["id"].astype(int)
    df["Название"] = df["Название"].astype(str)
    df["Год"] = df["Год"].astype(int)
    df["Рейтинг"] = df["Рейтинг"].astype(float)
    df["Страна"] = df["Страна"].astype(str)
    df["Жанр"] = df["Жанр"].astype(str)
    df["Режиссёр"] = df["Режиссёр"].astype(str)

    df.to_csv("movies.csv", index=False, header=True)
    return df

In [None]:
movies = await scraper()

Scraping: https://www.kinopoisk.ru/lists/movies/top250/?page=1
[{'user_id': '73414739', 'rating': '10'}, {'user_id': '35403457', 'rating': '9'}, {'user_id': '159391171', 'rating': '10'}, {'user_id': '198349381', 'rating': '10'}, {'user_id': '207545723', 'rating': '10'}, {'user_id': '40492414', 'rating': '8'}, {'user_id': '171426451', 'rating': '8'}, {'user_id': '96333201', 'rating': '8'}, {'user_id': '54103275', 'rating': '7'}, {'user_id': '72127065', 'rating': '10'}, {'user_id': '70768787', 'rating': '10'}, {'user_id': '199726760', 'rating': '10'}, {'user_id': '160912514', 'rating': '10'}, {'user_id': '207239672', 'rating': '10'}, {'user_id': '186316864', 'rating': '10'}, {'user_id': '200543940', 'rating': '10'}, {'user_id': '175864776', 'rating': '10'}, {'user_id': '144575965', 'rating': '10'}, {'user_id': '84803186', 'rating': '10'}, {'user_id': '16788099', 'rating': '10'}]
[{'movie_id': '535341', 'rating': '10'}, {'movie_id': '5417693', 'rating': '5'}, {'movie_id': '6440213', 'rati

Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error Target.detachFromTarget: Target closed.')>
pyppeteer.errors.NetworkError: Protocol error Target.detachFromTarget: Target closed.
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id


Parsed new movie: Трейлер (русский язык)
Parsed new movie: ТВ-ролик
Parsed new movie: Тизер (русский язык)
[{'movie_id': '958722', 'rating': '9'}, {'movie_id': '5456450', 'rating': '10'}, {'movie_id': '5446941', 'rating': '8'}, {'movie_id': '5450038', 'rating': '8'}, {'movie_id': '4626783', 'rating': '10'}, {'movie_id': '1047492', 'rating': '10'}, {'movie_id': '501172', 'rating': '10'}, {'movie_id': '195319', 'rating': '10'}, {'movie_id': '5138530', 'rating': '6'}]
Found 4 new movies to parse
Parsed new movie: Трейлер №2 (дублированный)
Parsed new movie: Трейлер №2 (дублированный)
Parsed new movie: Трейлер (дублированный)
Parsed new movie: Трейлер
Total unique movies: 532
[{'user_id': '166653650', 'rating': '9'}, {'user_id': '13629768', 'rating': '7'}, {'user_id': '21395558', 'rating': '10'}, {'user_id': '88996610', 'rating': '3'}, {'user_id': '105800846', 'rating': '9'}, {'user_id': '137775113', 'rating': '10'}, {'user_id': '99770889', 'rating': '6'}, {'user_id': '164324587', 'rating'

Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error Target.detachFromTarget: Target closed.')>
pyppeteer.errors.NetworkError: Protocol error Target.detachFromTarget: Target closed.
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id


Parsed new movie: Международный трейлер (дублированный)
Parsed new movie: Трейлер к 50-летию фильма (дублированный)
Parsed new movie: Трейлер
Parsed new movie: Трейлер (русский язык)
Parsed new movie: Трейлер (русский язык)
[{'movie_id': '819101', 'rating': '10'}, {'movie_id': '2494', 'rating': '9'}, {'movie_id': '195434', 'rating': '9'}, {'movie_id': '104938', 'rating': '10'}, {'movie_id': '838', 'rating': '9'}, {'movie_id': '939411', 'rating': '8'}, {'movie_id': '7466', 'rating': '8'}, {'movie_id': '835086', 'rating': '8'}, {'movie_id': '350', 'rating': '10'}, {'movie_id': '43970', 'rating': '7'}]
[{'movie_id': '671', 'rating': '7'}, {'movie_id': '391085', 'rating': '7'}, {'movie_id': '4606', 'rating': '7'}, {'movie_id': '2011', 'rating': '7'}, {'movie_id': '87789', 'rating': '7'}, {'movie_id': '3948', 'rating': '6'}, {'movie_id': '102510', 'rating': '6'}, {'movie_id': '580661', 'rating': '7'}, {'movie_id': '2602', 'rating': '7'}, {'movie_id': '468958', 'rating': '6'}]
Found 9 new mo

Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error Target.detachFromTarget: Target closed.')>
pyppeteer.errors.NetworkError: Protocol error Target.detachFromTarget: Target closed.
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id


[{'movie_id': '652833', 'rating': '8'}, {'movie_id': '5456450', 'rating': '8'}, {'movie_id': '2494', 'rating': '9'}, {'movie_id': '3491', 'rating': '8'}, {'movie_id': '420224', 'rating': '4'}, {'movie_id': '401177', 'rating': '5'}, {'movie_id': '1290942', 'rating': '5'}, {'movie_id': '376446', 'rating': '10'}, {'movie_id': '12090', 'rating': '8'}, {'movie_id': '261636', 'rating': '6'}]
Found 5 new movies to parse
Parsed new movie: Трейлер
Parsed new movie: Трейлер (дублированный)
Parsed new movie: Трейлер
Parsed new movie: Трейлер №3 (русский язык)
Parsed new movie: Трейлер №2 (дублированный)
[{'movie_id': '819101', 'rating': '10'}, {'movie_id': '2494', 'rating': '9'}, {'movie_id': '195434', 'rating': '9'}, {'movie_id': '104938', 'rating': '10'}, {'movie_id': '838', 'rating': '9'}, {'movie_id': '939411', 'rating': '8'}, {'movie_id': '7466', 'rating': '8'}, {'movie_id': '835086', 'rating': '8'}, {'movie_id': '350', 'rating': '10'}, {'movie_id': '43970', 'rating': '7'}]
[{'movie_id': '24

## 1. Popularity-Based Recommendation System

This system recommends movies based on their popularity using weighted ratings, similar to the book recommendation system.

In [None]:
def create_popularity_recommendations(movies_df, ratings_df):
    """
    Create popularity-based recommendations using weighted ratings
    """
    # Merge movies with ratings
    movie_ratings = ratings_df.merge(movies_df, left_on='movie_id', right_on='id', how='inner')
    
    # Calculate average ratings
    avg_ratings = movie_ratings.groupby('Название')['rating'].mean().reset_index()
    avg_ratings.columns = ['Название', 'avg_rating']
    
    # Calculate rating counts
    cnt_ratings = movie_ratings.groupby('Название')['rating'].count().reset_index()
    cnt_ratings.columns = ['Название', 'count_rating']
    
    # Merge average and count
    popularity = avg_ratings.merge(cnt_ratings, on='Название')
    
    # Calculate weighted score using IMDB formula
    v = popularity['count_rating']
    R = popularity['avg_rating']
    m = v.quantile(0.70)  # Movies must have at least 70th percentile of votes
    c = R.mean()  # Average rating across all movies
    
    # Calculate weighted score
    popularity['w_score'] = ((v * R) + (m * c)) / (v + m)
    
    # Sort by weighted score
    popular_movies = popularity.sort_values('w_score', ascending=False)
    
    return popular_movies

def plot_top_movies(popular_movies, top_n=10):
    """
    Plot top N movies by weighted score
    """
    top_movies = popular_movies.head(top_n)
    
    plt.figure(figsize=(15, 8))
    sns.barplot(data=top_movies, x='w_score', y='Название')
    plt.xlabel('Weighted Score')
    plt.ylabel('Movie Title')
    plt.title(f'Top {top_n} Movies by Weighted Score')
    plt.tight_layout()
    plt.show()
    
    return top_movies

## 2. Collaborative Filtering Recommendation System

This system finds similar users or similar movies based on rating patterns.

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

def prepare_collaborative_filtering_data(movies_df, ratings_df, min_ratings=5):
    """
    Prepare data for collaborative filtering
    """
    # Merge ratings with movie titles
    ratings_with_movies = ratings_df.merge(movies_df[['id', 'Название']], 
                                         left_on='movie_id', right_on='id', how='inner')
    
    # Filter users and movies with minimum ratings
    user_counts = ratings_with_movies['user_id'].value_counts()
    movie_counts = ratings_with_movies['Название'].value_counts()
    
    active_users = user_counts[user_counts >= min_ratings].index
    popular_movies = movie_counts[movie_counts >= min_ratings].index
    
    filtered_ratings = ratings_with_movies[
        ratings_with_movies['user_id'].isin(active_users) & 
        ratings_with_movies['Название'].isin(popular_movies)
    ]
    
    # Create user-movie matrix
    user_movie_matrix = filtered_ratings.pivot_table(
        index='user_id', 
        columns='Название', 
        values='rating'
    ).fillna(0)
    
    return user_movie_matrix, filtered_ratings

def find_similar_movies_collaborative(user_movie_matrix, movie_name, num_recommendations=10):
    """
    Find similar movies using collaborative filtering
    """
    if movie_name not in user_movie_matrix.columns:
        return f"Movie '{movie_name}' not found in the dataset"
    
    # Get movie ratings vector
    movie_ratings = user_movie_matrix[movie_name]
    
    # Calculate cosine similarity with all other movies
    movie_matrix = user_movie_matrix.T  # Transpose to get movies as rows
    similarity_scores = cosine_similarity([movie_ratings], movie_matrix.values)[0]
    
    # Create similarity dataframe
    movies_similarity = pd.DataFrame({
        'Название': movie_matrix.index,
        'similarity_score': similarity_scores
    }).sort_values('similarity_score', ascending=False)
    
    # Remove the input movie itself and return top recommendations
    recommendations = movies_similarity[movies_similarity['Название'] != movie_name].head(num_recommendations)
    
    return recommendations

def find_user_recommendations(user_movie_matrix, user_id, num_recommendations=10):
    """
    Find movie recommendations for a specific user
    """
    if user_id not in user_movie_matrix.index:
        return f"User {user_id} not found in the dataset"
    
    # Get user's ratings
    user_ratings = user_movie_matrix.loc[user_id]
    
    # Find movies the user hasn't rated
    unrated_movies = user_ratings[user_ratings == 0].index
    
    if len(unrated_movies) == 0:
        return "User has rated all movies in the dataset"
    
    # Calculate similarity with other users
    user_similarity = cosine_similarity([user_ratings], user_movie_matrix.values)[0]
    
    # Create similarity dataframe
    user_sim_df = pd.DataFrame({
        'user_id': user_movie_matrix.index,
        'similarity': user_similarity
    }).sort_values('similarity', ascending=False)
    
    # Remove the target user
    similar_users = user_sim_df[user_sim_df['user_id'] != user_id].head(10)
    
    # Calculate weighted average ratings for unrated movies
    recommendations = []
    for movie in unrated_movies:
        weighted_sum = 0
        similarity_sum = 0
        
        for _, similar_user in similar_users.iterrows():
            sim_user_id = similar_user['user_id']
            similarity = similar_user['similarity']
            rating = user_movie_matrix.loc[sim_user_id, movie]
            
            if rating > 0:  # Only consider movies rated by similar users
                weighted_sum += similarity * rating
                similarity_sum += similarity
        
        if similarity_sum > 0:
            predicted_rating = weighted_sum / similarity_sum
            recommendations.append({
                'Название': movie,
                'predicted_rating': predicted_rating
            })
    
    # Sort by predicted rating
    recommendations_df = pd.DataFrame(recommendations).sort_values('predicted_rating', ascending=False)
    
    return recommendations_df.head(num_recommendations)

## 3. Content-Based Recommendation System

This system recommends movies based on movie features like genre, director, and country.

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

def prepare_content_based_data(movies_df):
    """
    Prepare data for content-based recommendations
    """
    # Clean the data
    movies_clean = movies_df.copy()
    movies_clean = movies_clean.fillna('Unknown')
    
    # Create content features by combining genre, director, and country
    movies_clean['content_features'] = (
        movies_clean['Жанр'].astype(str) + ' ' + 
        movies_clean['Режиссёр'].astype(str) + ' ' + 
        movies_clean['Страна'].astype(str)
    )
    
    return movies_clean

def find_similar_movies_content_based(movies_df, movie_name, num_recommendations=10):
    """
    Find similar movies using content-based filtering
    """
    # Prepare data
    movies_clean = prepare_content_based_data(movies_df)
    
    # Check if movie exists
    if movie_name not in movies_clean['Название'].values:
        return f"Movie '{movie_name}' not found in the dataset"
    
    # Create TF-IDF matrix
    tfidf = TfidfVectorizer(stop_words='english', lowercase=True)
    tfidf_matrix = tfidf.fit_transform(movies_clean['content_features'])
    
    # Calculate cosine similarity
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    # Get index of the movie
    movie_idx = movies_clean[movies_clean['Название'] == movie_name].index[0]
    
    # Get similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    
    # Sort movies by similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get top similar movies (excluding the input movie)
    movie_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]
    
    # Return recommended movies with their features
    recommendations = movies_clean.iloc[movie_indices][['Название', 'Год', 'Жанр', 'Режиссёр', 'Страна', 'Рейтинг']]
    
    # Add similarity scores
    similarity_scores = [i[1] for i in sim_scores[1:num_recommendations+1]]
    recommendations = recommendations.copy()
    recommendations['similarity_score'] = similarity_scores
    
    return recommendations

def recommend_by_genre(movies_df, genre, num_recommendations=10):
    """
    Recommend top movies from a specific genre
    """
    # Filter movies by genre
    genre_movies = movies_df[movies_df['Жанр'].str.contains(genre, case=False, na=False)]
    
    if genre_movies.empty:
        return f"No movies found for genre '{genre}'"
    
    # Convert rating to numeric and sort
    genre_movies = genre_movies.copy()
    genre_movies['Рейтинг_numeric'] = pd.to_numeric(genre_movies['Рейтинг'], errors='coerce')
    
    # Sort by rating and return top recommendations
    top_movies = genre_movies.sort_values('Рейтинг_numeric', ascending=False).head(num_recommendations)
    
    return top_movies[['Название', 'Год', 'Жанр', 'Режиссёр', 'Страна', 'Рейтинг']]

## 4. Complete Recommendation System Implementation

Now let's create a comprehensive function that combines all recommendation methods.

In [None]:
class MovieRecommendationSystem:
    def __init__(self, movies_df, ratings_df):
        self.movies_df = movies_df
        self.ratings_df = ratings_df
        self.user_movie_matrix = None
        self.popular_movies = None
        self.setup_system()
    
    def setup_system(self):
        """Initialize all recommendation components"""
        # Prepare popularity-based recommendations
        self.popular_movies = create_popularity_recommendations(self.movies_df, self.ratings_df)
        
        # Prepare collaborative filtering data
        if not self.ratings_df.empty:
            try:
                self.user_movie_matrix, _ = prepare_collaborative_filtering_data(
                    self.movies_df, self.ratings_df, min_ratings=3
                )
            except Exception as e:
                print(f"Could not prepare collaborative filtering: {e}")
                self.user_movie_matrix = None
    
    def get_popular_movies(self, n=10):
        """Get top popular movies"""
        return self.popular_movies.head(n)
    
    def get_similar_movies(self, movie_name, method='content', n=10):
        """Get similar movies using specified method"""
        if method == 'content':
            return find_similar_movies_content_based(self.movies_df, movie_name, n)
        elif method == 'collaborative' and self.user_movie_matrix is not None:
            return find_similar_movies_collaborative(self.user_movie_matrix, movie_name, n)
        else:
            return "Collaborative filtering not available or method not recognized"
    
    def get_user_recommendations(self, user_id, n=10):
        """Get recommendations for a specific user"""
        if self.user_movie_matrix is not None:
            return find_user_recommendations(self.user_movie_matrix, user_id, n)
        else:
            return "Collaborative filtering not available"
    
    def get_genre_recommendations(self, genre, n=10):
        """Get top movies from a specific genre"""
        return recommend_by_genre(self.movies_df, genre, n)
    
    def get_comprehensive_recommendations(self, movie_name=None, user_id=None, genre=None, n=5):
        """Get comprehensive recommendations using multiple methods"""
        recommendations = {}
        
        # Popular movies
        recommendations['popular'] = self.get_popular_movies(n)
        
        # Movie-based recommendations
        if movie_name:
            recommendations['similar_content'] = self.get_similar_movies(movie_name, 'content', n)
            if self.user_movie_matrix is not None:
                recommendations['similar_collaborative'] = self.get_similar_movies(movie_name, 'collaborative', n)
        
        # User-based recommendations
        if user_id:
            recommendations['user_based'] = self.get_user_recommendations(user_id, n)
        
        # Genre-based recommendations
        if genre:
            recommendations['genre_based'] = self.get_genre_recommendations(genre, n)
        
        return recommendations