In [None]:
! nvidia-smi

In [1]:
!pip install implicit
!pip install implicit[gpu]



In [2]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from google.colab import drive
from scipy.sparse import coo_matrix, csr_matrix
from scipy.sparse.linalg import svds
from sklearn.metrics.pairwise import cosine_similarity
import os
import implicit
from implicit.als import AlternatingLeastSquares
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.preprocessing import MultiLabelBinarizer

In [50]:
# @title Функции

def top_to_parametr(k: int, n=10) -> pd.DataFrame:
    movie_id = k
    correlations_for_movie = movie_correlation_matrix[movie_id]
    top_10_movies = correlations_for_movie.sort_values(ascending=False)[1:n+1]
    top_10_movies = pd.merge(movie_details[['movieId', 'title']], top_10_movies, on='movieId', how='inner').sort_values(ascending=False, by=k)
    top_10_movies['relevance'] = top_10_movies[k]
    top_10_movies = top_10_movies.reset_index()
    del top_10_movies[k]
    del top_10_movies['index']
    return top_10_movies

def priority(X: str, y: int) -> None:
    X = str(X)
    movie_ratings.loc[movie_ratings['title'] == X, ['flag']] = y

def top_IMDb_score(n=10) -> pd.DataFrame:
    C = movie_ratings['average_rating'].mean()
    m = movie_ratings['vote_count'].quantile(0.99)
    def weighted_rating(x, m=m, C=C):
        flag = x['flag']
        v = x['vote_count']
        R = x['average_rating']
        if flag == 1:
            return 5.0
        elif flag == -1:
            return 0.0
        else:
            return ((v/(v+m) * R) + (m/(m+v) * C))
    q_movies = movie_ratings.copy().loc[(movie_ratings['vote_count'] >= m) & (movie_ratings['flag'] != -1) | (movie_ratings['flag'] == 1)]
    q_movies['score'] = q_movies.apply(weighted_rating, axis=1)
    q_movies = q_movies.sort_values('score', ascending=False)
    q_movies = q_movies[['movieId', 'vote_count', 'average_rating', 'score']][0:n]
    q_movies = pd.merge(movie_details[['movieId', 'title', 'first_genre']], q_movies, on='movieId', how='inner').sort_values(ascending=False, by='score')
    q_movies = q_movies.reset_index()
    del q_movies['index']
    return q_movies

def top_in_genre_IMDb_score(n=10) -> pd.DataFrame:
    C = movie_ratings['average_rating'].mean()
    m = movie_ratings['vote_count'].quantile(0.99)
    def weighted_rating(x, m=m, C=C):
        flag = x['flag']
        v = x['vote_count']
        R = x['average_rating']
        if flag == 1:
            return 5.0
        elif flag == -1:
            return 0.0
        else:
            return ((v/(v+m) * R) + (m/(m+v) * C))
    q_movies = pd.merge(movie_ratings[['movieId', 'vote_count', 'average_rating', 'flag']],
                        movie_details[['movieId', 'title', 'first_genre']],
                        on='movieId', how='inner')
    q_movies = q_movies.loc[(q_movies['vote_count'] >= m) & (q_movies['flag'] != -1) | (q_movies['flag'] == 1)]
    q_movies['score'] = q_movies.apply(weighted_rating, axis=1)
    q_movies = q_movies.sort_values('score', ascending=False)
    q_movies = q_movies[['movieId', 'title', 'first_genre', 'vote_count', 'average_rating', 'score']]
    q_movies = q_movies.groupby('first_genre').head(1)
    q_movies = q_movies.reset_index(drop=True)
    q_movies = q_movies[0:n]
    return q_movies

def get_top_n_recommendations(user_id, n=20, min_avg_rating=3.0):
    if user_id not in user_id_to_index:
        print(f"User {user_id} not found in filtered dataset.")
        return pd.DataFrame(columns=['movieId', 'predicted_rating'])

    user_idx = user_id_to_index[user_id]
    user_ratings = filtered_ratings[filtered_ratings['userId'] == user_id]
    seen_movies = user_ratings['movieId'].values

    unseen_movie_ids = np.setdiff1d(movie_ids, seen_movies)
    movie_avg_ratings = ratings.groupby('movieId')['rating'].mean()

    valid_movie_ids = [
        movie_id for movie_id in unseen_movie_ids
        if movie_id in movie_avg_ratings and movie_avg_ratings[movie_id] >= min_avg_rating
    ]

    top_n_movie_ids = []
    top_n_ratings = []

    for movie_id in valid_movie_ids:
        movie_idx = movie_id_to_index.get(movie_id)
        if movie_idx is None:
            continue
        predicted_rating = np.dot(np.dot(U[movie_idx, :], sigma), Vt[:, user_idx])
        top_n_movie_ids.append(movie_id)
        top_n_ratings.append(predicted_rating)

    if not top_n_movie_ids:
        return pd.DataFrame(columns=['movieId', 'predicted_rating'])

    top_n_movies = pd.DataFrame({
        'movieId': top_n_movie_ids,
        'predicted_rating': top_n_ratings
    })

    top_n_movies = pd.merge(movie_details[['movieId', 'title']],
                        top_n_movies[['movieId', 'predicted_rating']],
                        on='movieId', how='inner')

    if not top_n_movies.sort_values(by='predicted_rating', ascending=False).head(n).empty:
        print(f"Top 20 recommended movieIds for user {user_id}:")
        return top_n_movies.sort_values(by='predicted_rating', ascending=False).head(n)
    else:
        print("No recommendations found.")

def get_als_recommendations(user_id, n=20, min_avg_rating=3.0):
    if user_id not in user_id_to_index:
        print(f"User {user_id} not found.")
        pd.DataFrame(columns=['movieId', 'predicted_rating'])
    user_idx = user_id_to_index[user_id]

    if user_idx >= als_ratings.T.shape[0]:
        print(f"user_idx {user_idx} out of bounds for ALS matrix with shape {als_ratings.T.shape}")
        pd.DataFrame(columns=['movieId', 'predicted_rating'])
    user_rated_movies = filtered_ratings[filtered_ratings['userId'] == user_id]['movieId'].values
    movie_avg_ratings = ratings.groupby('movieId')['rating'].mean()
    user_items_csr = als_ratings.T.tocsr()
    user_items_for_recommendation = user_items_csr[user_idx]
    recommended_movie_indices, recommended_scores = als_model.recommend(
        user_idx,
        user_items_for_recommendation,
        N=100000,
        filter_items=[
            movie_id_to_index[mid] for mid in user_rated_movies if mid in movie_id_to_index
        ]
    )
    results = []

    index_to_movie_id = {index: movie_id for movie_id, index in movie_id_to_index.items()}
    results = []
    for movie_idx, score in zip(recommended_movie_indices, recommended_scores):
        if movie_idx in index_to_movie_id:
            movie_id = index_to_movie_id[movie_idx]
            if movie_id in movie_avg_ratings and movie_avg_ratings[movie_id] >= min_avg_rating:
                results.append((movie_id, score))
        if len(results) >= n:
            break
    df = pd.DataFrame(results, columns=['movieId', 'predicted_rating'])
    df = pd.merge(movie_details[['movieId', 'title']], df, on='movieId', how='inner')
    return df

def hybrid_user_recommendations(user_id, n=20, min_avg_rating=3.0, alpha=0.8):
    svd_df = get_top_n_recommendations(user_id, n=100000, min_avg_rating=min_avg_rating)
    als_df = get_als_recommendations(user_id, n=100000, min_avg_rating=min_avg_rating)
    hybrid_df = pd.merge(svd_df, als_df[['movieId', 'predicted_rating']], on='movieId', suffixes=('_svd', '_als'))
    hybrid_df['predicted_rating'] = alpha * hybrid_df['predicted_rating_svd'] + \
                                     (1 - alpha) * hybrid_df['predicted_rating_als']
    hybrid_df = pd.merge(hybrid_df, movie_ratings[['movieId', 'vote_count', 'average_rating']], on='movieId', how='inner')
    hybrid_df = hybrid_df[hybrid_df['vote_count'] > 500]
    return hybrid_df.sort_values(by='predicted_rating', ascending=False).head(n)

def get_genre_recommended_movies(userId, n=10):
    user_ratings = ratings[ratings['userId'] == userId]
    user_movie_ids = user_ratings['movieId'].unique()

    user_genres = []
    for movie_id in user_movie_ids:
        genres_string = movie_details[movie_details['movieId'] == movie_id]['genres'].values[0]
        user_genres.extend(genres_string.split())
    user_genres = list(set(user_genres))

    movie_details['genres_list'] = movie_details['genres'].apply(lambda x: x.split())
    mlb = MultiLabelBinarizer()
    genre_matrix = mlb.fit_transform(movie_details['genres_list'])

    user_vector = mlb.transform([user_genres])
    similarity_scores = cosine_similarity(user_vector, genre_matrix).flatten()
    temp_movie_details = movie_details.copy()
    temp_movie_details['genre_similarity'] = similarity_scores
    temp_movie_details = temp_movie_details[~temp_movie_details['movieId'].isin(user_movie_ids)]
    recommended_movies = temp_movie_details[['movieId', 'title', 'genre_similarity']].sort_values(by='genre_similarity', ascending=False).head(n)

    return recommended_movies

def genre_collab_hybrid(user_id, n=20, alpha=0.7):
    user_ratings = ratings[ratings['userId'] == user_id]
    user_movie_ids = user_ratings['movieId'].unique()

    genre_recs = get_genre_recommended_movies(user_id, n=50)

    top_rated_movies = user_ratings.sort_values(by='rating', ascending=False)['movieId'].head(3).tolist()
    collab_recs = pd.DataFrame()
    for movie_id in top_rated_movies:
        try:
            recs = top_to_parametr(movie_id, n=20)
            recs['source_movie'] = movie_id
            collab_recs = pd.concat([collab_recs, recs], ignore_index=True)
        except Exception:
            continue

    hybrid = pd.merge(collab_recs, genre_recs, on='movieId', how='outer')

    hybrid['relevance'] = hybrid['relevance'].fillna(0.0)
    hybrid['genre_similarity'] = hybrid['genre_similarity'].fillna(0.0)

    hybrid['final_score'] = alpha * hybrid['relevance'] + (1 - alpha) * hybrid['genre_similarity']
    hybrid = hybrid[['movieId', 'final_score']].drop_duplicates()
    hybrid = pd.merge(hybrid[['movieId', 'final_score']], movie_details[['movieId', 'title']], on='movieId', how='inner')
    hybrid = hybrid[['movieId', 'title', 'final_score']]

    return hybrid.sort_values(by='final_score', ascending=False).head(n)

def recommend_movies_after_first_choice(viewed_movie_ids, n=15):
    if isinstance(viewed_movie_ids, int):
        viewed_movie_ids = [viewed_movie_ids]

    top_movies = top_in_genre_IMDb_score(n=n+5)
    top_movies = top_movies[~top_movies['movieId'].isin(viewed_movie_ids)]

    similar_movies = pd.DataFrame()
    for movie_id in viewed_movie_ids:
        try:
            recs = top_to_parametr(movie_id, n=7)
            recs['source_movie'] = movie_id
            similar_movies = pd.concat([similar_movies, recs], ignore_index=True)
        except Exception:
            continue

    combined_movies = pd.concat([top_movies[['movieId', 'title']], similar_movies[['movieId', 'title']]], ignore_index=True)
    combined_movies = combined_movies.drop_duplicates(subset='movieId')
    combined_movies = combined_movies.sample(frac=1).reset_index(drop=True)

    return combined_movies.head(n)

def recommend_for_user(user_id):
    user_ratings = ratings[ratings['userId'] == user_id]
    rated_movies = user_ratings['movieId'].tolist()

    if len(rated_movies) == 0:
        return top_in_genre_IMDb_score()
    elif len(rated_movies) <= 2:
        return recommend_movies_after_first_choice(rated_movies)
    elif len(rated_movies) < 20:
        return genre_collab_hybrid(user_id)
    else:
        return hybrid_user_recommendations(user_id)

def create_tables(n=999999, k=999999, force_recompute=False):
    drive.mount('/content/drive')

    cache_path = '/content/drive/MyDrive/ml-latest/movie_corr.parquet'

    movie_tags = pd.read_csv('/content/drive/MyDrive/ml-latest/genome-scores.csv', low_memory=False)
    movie_tags['relevance'] = movie_tags['relevance'].astype(np.float32)
    movie_tags = movie_tags[movie_tags['movieId'] < n]

    top_movie_ids = movie_tags.groupby('movieId')['relevance'].sum().nlargest(5000).index
    movie_tags = movie_tags[movie_tags['movieId'].isin(top_movie_ids)]

    if os.path.exists(cache_path) and not force_recompute:
        print("Загружаем кэшированную матрицу сходства фильмов...")
        movie_correlation_matrix = pd.read_parquet(cache_path)
    else:
        print("Рассчитываем матрицу сходства фильмов...")
        pivot_df = movie_tags.pivot(index='tagId', columns='movieId', values='relevance').fillna(0)
        similarity = cosine_similarity(pivot_df.T)
        movie_correlation_matrix = pd.DataFrame(similarity, index=pivot_df.columns, columns=pivot_df.columns).round(5)
        movie_correlation_matrix.to_parquet(cache_path)
        print(f"Матрица сохранена в {cache_path}")

    tags = pd.read_csv('/content/drive/MyDrive/ml-latest/genome-tags.csv', low_memory=False)
    movie_tags = pd.merge(movie_tags, tags, on='tagId', how='left')

    movies = pd.read_csv('/content/drive/MyDrive/ml-latest/links.csv', low_memory=False)
    movies = movies[movies['movieId'] < n]

    movie_details = pd.read_csv('/content/drive/MyDrive/ml-latest/movies.csv', low_memory=False)
    movie_details = movie_details[movie_details['movieId'] < n]
    movie_details['genres'] = movie_details['genres'].str.replace('|', ' ', regex=False)
    movie_details['first_genre'] = movie_details['genres'].str.split().str[0]
    movie_details = movie_details[['movieId', 'title', 'genres', 'first_genre']]

    ratings = pd.read_csv('/content/drive/MyDrive/ml-latest/ratings.csv', low_memory=False)
    #ratings['date'] = pd.to_datetime(ratings['timestamp'], unit='s').dt.strftime('%Y-%m-%d')
    ratings = ratings[ratings['movieId'] < n]
    ratings = ratings[ratings['userId'] < k]
    #ratings = pd.merge(movie_details[['movieId', 'title']], ratings, on='movieId', how='inner')
    #ratings = ratings[['userId', 'movieId', 'title', 'rating', 'timestamp', 'date']]

    movie_ratings = ratings.groupby('movieId').agg(
        average_rating=('rating', 'mean'),
        vote_count=('rating', 'count')
    ).reset_index()
    movie_ratings = pd.merge(movie_details[['movieId', 'title']], movie_ratings, on='movieId', how='inner')
    movie_ratings['flag'] = 0

    user_tags = pd.read_csv('/content/drive/MyDrive/ml-latest/tags.csv', low_memory=False)
    #user_tags['date'] = pd.to_datetime(user_tags['timestamp'], unit='s').dt.strftime('%Y-%m-%d')
    user_tags = user_tags[user_tags['movieId'] < n]
    user_tags = user_tags[user_tags['userId'] < k]

    return movie_correlation_matrix, tags, movie_tags, movies, movie_details, ratings, movie_ratings, user_tags

In [4]:
movie_correlation_matrix, tags, movie_tags, movies, movie_details, ratings, movie_ratings, user_tags = create_tables()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Загружаем кэшированную матрицу сходства фильмов...


In [5]:
# @title SVD

user_rating_counts = ratings.groupby('userId').size()
filtered_ratings = ratings[ratings['userId'].isin(user_rating_counts[user_rating_counts >= 20].index)].copy()

movie_ids = filtered_ratings['movieId'].unique()
user_ids = filtered_ratings['userId'].unique()

movie_id_to_index = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}
user_id_to_index = {user_id: idx for idx, user_id in enumerate(user_ids)}
index_to_user_id = {idx: user_id for user_id, idx in user_id_to_index.items()}

filtered_ratings['movie_idx'] = filtered_ratings['movieId'].map(movie_id_to_index)
filtered_ratings['user_idx'] = filtered_ratings['userId'].map(user_id_to_index)

ratings_sparse = coo_matrix((
    filtered_ratings['rating'],
    (filtered_ratings['movie_idx'], filtered_ratings['user_idx'])
))

user_means = filtered_ratings.groupby('user_idx')['rating'].mean()
filtered_ratings['normalized_rating'] = filtered_ratings.apply(
    lambda row: row['rating'] - user_means[row['user_idx']], axis=1
)

ratings_sparse_normalized = coo_matrix((
    filtered_ratings['normalized_rating'],
    (filtered_ratings['movie_idx'], filtered_ratings['user_idx'])
))

n_components = 50
U, sigma, Vt = svds(ratings_sparse_normalized, k=n_components)
sigma = np.diag(sigma)

In [5]:
# @title Попытка накрутить подбор гиперпараметров ALS (пока неудача)

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix
from implicit.als import AlternatingLeastSquares
from sklearn.model_selection import train_test_split, ParameterGrid
from collections import defaultdict

train_val, test = train_test_split(filtered_ratings, test_size=0.2, random_state=42)
train, val = train_test_split(train_val, test_size=0.25, random_state=42)

def build_interaction_matrix(df, shape, use_normalized=False):
    values = df['normalized_rating'] if use_normalized else df['rating']
    return csr_matrix((values, (df['user_idx'], df['movie_idx'])), shape=shape)

num_users = filtered_ratings['user_idx'].max() + 1
num_items = filtered_ratings['movie_idx'].max() + 1
matrix_shape = (num_users, num_items)

train_matrix = build_interaction_matrix(train, matrix_shape)
val_matrix = build_interaction_matrix(val, matrix_shape)
test_matrix = build_interaction_matrix(test, matrix_shape)

def precision_at_k(model, train_mat, test_df, K=10):
    test_dict = defaultdict(set)
    for _, row in test_df.iterrows():
        test_dict[row['user_idx']].add(row['movie_idx'])

    precisions = []
    for user in test_dict:
        if user >= train_mat.shape[0]:
            continue

        try:
            recommended = model.recommend(
                userid=user,
                user_items=train_mat[user],
                N=K,
                filter_already_liked_items=True
            )
        except Exception:
            continue

        rec_items = set([item_id for item_id, _ in recommended])
        true_items = test_dict[user]
        if not true_items:
            continue
        precision = len(rec_items & true_items) / K
        precisions.append(precision)

    return np.mean(precisions) if precisions else 0.0

param_grid = {
    'factors': [25, 50],
    'regularization': [0.001, 0.01],
    'iterations': [25, 50]
}

best_score = -1
best_model = None
best_params = None

print("Поиск лучших параметров...")

for params in ParameterGrid(param_grid):
    print(f"\nПараметры: {params}")
    model = AlternatingLeastSquares(
        factors=params['factors'],
        regularization=params['regularization'],
        iterations=params['iterations'],
        use_gpu=False
    )

    train_matrix_T = train_matrix.T.tocsr()
    model.fit(train_matrix_T)

    score = precision_at_k(model, train_matrix, val)
    print(f"Precision@10: {score:.4f}")

    if score > best_score:
        best_score = score
        best_model = model
        best_params = params

print(f"\nЛучшая конфигурация: {best_params}, Precision@10 = {best_score:.4f}")

final_train_df = pd.concat([train, val])
final_train_matrix = build_interaction_matrix(final_train_df, matrix_shape)
final_train_matrix_T = final_train_matrix.T.tocsr()

final_model = AlternatingLeastSquares(
    factors=best_params['factors'],
    regularization=best_params['regularization'],
    iterations=best_params['iterations'],
    use_gpu=False
)
final_model.fit(final_train_matrix_T)

final_score = precision_at_k(final_model, final_train_matrix, test)
print(f"\nФинальный Precision@10 на тесте: {final_score:.4f}")

Поиск лучших параметров...

Параметры: {'factors': 25, 'iterations': 25, 'regularization': 0.001}


  check_blas_config()


  0%|          | 0/25 [00:00<?, ?it/s]

Precision@10: 0.0000

Параметры: {'factors': 25, 'iterations': 25, 'regularization': 0.01}


  0%|          | 0/25 [00:00<?, ?it/s]

Precision@10: 0.0000

Параметры: {'factors': 25, 'iterations': 50, 'regularization': 0.001}


  0%|          | 0/50 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [6]:
# @title ALS

cache_model_path = '/content/drive/MyDrive/ml-latest/als_model.pkl'

force_recompute = False

if os.path.exists(cache_model_path) and not force_recompute:
    print("Загружаем кэшированную ALS модель...")

    als_ratings = csr_matrix((
        filtered_ratings['rating'],
        (filtered_ratings['movie_idx'], filtered_ratings['user_idx'])
    ))

    als_model = joblib.load(cache_model_path)

else:
    print("Рассчитываем ALS...")

    als_ratings = csr_matrix((
        filtered_ratings['rating'],
        (filtered_ratings['movie_idx'], filtered_ratings['user_idx'])
    ))

    als_model = AlternatingLeastSquares(
        factors=50,
        regularization=0.01,
        iterations=20,
        use_gpu=False
    )
    als_model.fit(als_ratings)

    joblib.dump(als_model, cache_model_path)

    print(f"Модель сохранена в {cache_model_path}")

Рассчитываем ALS...


  check_blas_config()


  0%|          | 0/20 [00:00<?, ?it/s]

Модель сохранена в /content/drive/MyDrive/ml-latest/als_model.pkl


In [51]:
recommend_for_user(1)

Top 20 recommended movieIds for user 1:


Unnamed: 0,movieId,title,predicted_rating_svd,predicted_rating_als,predicted_rating,vote_count,average_rating
0,1198,Raiders of the Lost Ark (Indiana Jones and the...,0.395715,1.159981,0.548568,75248,4.101039
2,6377,Finding Nemo (2003),0.227251,0.647466,0.311294,48124,3.820724
14,1923,There's Something About Mary (1998),0.064988,1.289573,0.309905,31185,3.515456
1,3147,"Green Mile, The (1999)",0.258113,0.511453,0.308781,41222,4.042696
23426,7700,"Wages of Fear, The (Salaire de la peur, Le) (1...",-0.001467,1.454725,0.289772,1321,4.013626
249,1201,"Good, the Bad and the Ugly, The (Buono, il bru...",0.005599,1.41862,0.288203,23823,4.13191
23514,1014,Pollyanna (1960),-0.001612,1.440113,0.286733,1815,3.377135
5,2115,Indiana Jones and the Temple of Doom (1984),0.12715,0.916558,0.285032,30436,3.698663
122,81788,"Next Three Days, The (2010)",0.012613,1.351828,0.280456,2269,3.739974
19,63082,Slumdog Millionaire (2008),0.054619,1.17756,0.279207,26797,3.828694


In [None]:
priority("Shawshank Redemption, The (1994)", 0)

In [None]:
top_IMDb_score()

Unnamed: 0,movieId,title,first_genre,vote_count,average_rating,score
0,318,"Shawshank Redemption, The (1994)",Crime,122296,4.416792,4.309984
1,858,"Godfather, The (1972)",Crime,75004,4.326603,4.171094
2,50,"Usual Suspects, The (1995)",Crime,72893,4.267865,4.115529
3,527,Schindler's List (1993),Drama,84232,4.242337,4.111043
4,2959,Fight Club (1999),Action,86207,4.236019,4.10808
5,296,Pulp Fiction (1994),Comedy,108756,4.191778,4.091866
6,2571,"Matrix, The (1999)",Action,107056,4.160631,4.061946
7,1221,"Godfather: Part II, The (1974)",Crime,47271,4.26951,4.048814
8,593,"Silence of the Lambs, The (1991)",Crime,101802,4.150287,4.047899
9,58559,"Dark Knight, The (2008)",Action,65349,4.187539,4.030689


In [None]:
top_in_genre_IMDb_score()

Unnamed: 0,movieId,title,first_genre,vote_count,average_rating,score
0,318,"Shawshank Redemption, The (1994)",Crime,122296,4.416792,4.309984
1,527,Schindler's List (1993),Drama,84232,4.242337,4.111043
2,2959,Fight Club (1999),Action,86207,4.236019,4.10808
3,296,Pulp Fiction (1994),Comedy,108756,4.191778,4.091866
4,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure,79940,4.099331,3.977774
5,4226,Memento (2000),Mystery,55649,4.143713,3.970426
6,109487,Interstellar (2014),Sci-Fi,40603,4.146972,3.921583
7,1214,Alien (1979),Horror,46572,4.069505,3.88164
8,457,"Fugitive, The (1993)",Thriller,61732,3.976876,3.841666
9,1148,Wallace & Gromit: The Wrong Trousers (1993),Animation,18597,4.111201,3.725353


In [None]:
top_to_parametr(260)

Unnamed: 0,movieId,title,relevance
0,1196,Star Wars: Episode V - The Empire Strikes Back...,0.97113
1,1210,Star Wars: Episode VI - Return of the Jedi (1983),0.96792
2,122886,Star Wars: Episode VII - The Force Awakens (2015),0.90718
3,1198,Raiders of the Lost Ark (Indiana Jones and the...,0.89863
4,166528,Rogue One: A Star Wars Story (2016),0.89616
5,1200,Aliens (1986),0.89116
6,33493,Star Wars: Episode III - Revenge of the Sith (...,0.88665
7,480,Jurassic Park (1993),0.88403
8,2628,Star Wars: Episode I - The Phantom Menace (1999),0.87765
9,1270,Back to the Future (1985),0.87325


In [None]:
recommend_movies_after_first_choice(260)

Unnamed: 0,movieId,title
0,318,"Shawshank Redemption, The (1994)"
1,527,Schindler's List (1993)
2,2959,Fight Club (1999)
3,296,Pulp Fiction (1994)
4,109487,Interstellar (2014)
5,1148,Wallace & Gromit: The Wrong Trousers (1993)
6,1198,Raiders of the Lost Ark (Indiana Jones and the...
7,166528,Rogue One: A Star Wars Story (2016)
8,1210,Star Wars: Episode VI - Return of the Jedi (1983)
9,4993,"Lord of the Rings: The Fellowship of the Ring,..."


In [7]:
get_top_n_recommendations(1)

Top 10 recommended movieIds for user 1:


Unnamed: 0,movieId,title,predicted_rating
842,1198,Raiders of the Lost Ark (Indiana Jones and the...,0.395715
2063,3147,"Green Mile, The (1999)",0.258113
4020,6377,Finding Nemo (2003),0.227251
37,47,Seven (a.k.a. Se7en) (1995),0.208475
2620,4022,Cast Away (2000),0.159786
3189,4963,Ocean's Eleven (2001),0.153115
417,608,Fargo (1996),0.146575
3786,5989,Catch Me If You Can (2002),0.141054
1410,2115,Indiana Jones and the Temple of Doom (1984),0.12715
2466,3753,"Patriot, The (2000)",0.117596


In [16]:
get_als_recommendations(1)

Unnamed: 0,movieId,title,predicted_rating
0,5042,Forbidden Zone (1980),1.622929
1,7039,Thunderheart (1992),1.560926
2,8522,My Little Chickadee (1940),1.554901
3,41769,Mozart and the Whale (2005),1.557553
4,109243,Joe (2013),1.521112
5,171339,Bear Island (1979),1.874038
6,177331,Ping Pong (2002),1.495003
7,185107,Schubert in Love (2016),1.497572
8,201298,Karmouz War (2018),1.583997
9,271793,Badhaai Do (2022),1.527865


In [19]:
hybrid_user_recommendations(1)

Top 10 recommended movieIds for user 1:


Unnamed: 0,movieId,title_svd,predicted_rating_svd,title_als,predicted_rating_als,predicted_rating
0,1198,Raiders of the Lost Ark (Indiana Jones and the...,0.395715,Raiders of the Lost Ark (Indiana Jones and the...,1.207861,0.476929
1,3147,"Green Mile, The (1999)",0.258113,"Green Mile, The (1999)",0.580509,0.290353
2,6377,Finding Nemo (2003),0.227251,Finding Nemo (2003),0.534494,0.257975
4,608,Fargo (1996),0.146575,Fargo (1996),0.774279,0.209345
5,2115,Indiana Jones and the Temple of Doom (1984),0.12715,Indiana Jones and the Temple of Doom (1984),0.910411,0.205476
3,4022,Cast Away (2000),0.159786,Cast Away (2000),0.594721,0.203279
7185,171339,Bear Island (1979),1.7e-05,Bear Island (1979),1.874038,0.187419
12,288,Natural Born Killers (1994),0.069481,Natural Born Killers (1994),1.190148,0.181548
14,1923,There's Something About Mary (1998),0.064988,There's Something About Mary (1998),1.1833,0.176819
7,364,"Lion King, The (1994)",0.10945,"Lion King, The (1994)",0.766488,0.175154


In [196]:
get_genre_recommended_movies(1)

Unnamed: 0,movieId,title,genre_similarity
4615,4719,Osmosis Jones (2001),0.68313
2895,2987,Who Framed Roger Rabbit? (1988),0.68313
1818,1907,Mulan (1998),0.68313
454,459,"Getaway, The (1994)",0.632456
4851,4956,"Stunt Man, The (1980)",0.632456
2526,2617,"Mummy, The (1999)",0.632456
2323,2414,Young Sherlock Holmes (1985),0.632456
196,198,Strange Days (1995),0.632456
661,673,Space Jam (1996),0.632456
540,546,Super Mario Bros. (1993),0.632456


In [208]:
genre_collab_hybrid(1)

Unnamed: 0,movieId,title,final_score
56,3114,Toy Story 2 (1999),0.743995
0,1,Toy Story (1995),0.47321
39,2355,"Bug's Life, A (1998)",0.465415
38,2000,Lethal Weapon (1987),0.4647
67,4306,Shrek (2001),0.45677
3,318,"Shawshank Redemption, The (1994)",0.45351
34,1704,Good Will Hunting (1997),0.451705
57,3147,"Green Mile, The (1999)",0.44693
65,4187,Lilies of the Field (1963),0.44372
4,377,Speed (1994),0.441625


In [None]:
movie_tags

Unnamed: 0,movieId,tagId,relevance,tag
0,1,1,0.03200,007
1,1,2,0.02225,007 (series)
2,1,3,0.07000,18th century
3,1,4,0.05900,1920s
4,1,5,0.12300,1930s
...,...,...,...,...
5639995,288167,1124,0.09875,writing
5639996,288167,1125,0.02950,wuxia
5639997,288167,1126,0.02275,wwii
5639998,288167,1127,0.11225,zombie


In [None]:
tags

Unnamed: 0,tagId,tag
0,1,007
1,2,007 (series)
2,3,18th century
3,4,1920s
4,5,1930s
...,...,...
1123,1124,writing
1124,1125,wuxia
1125,1126,wwii
1126,1127,zombie


In [None]:
movies

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
...,...,...,...
86532,288967,14418234,845861.0
86533,288971,11162178,878958.0
86534,288975,70199,150392.0
86535,288977,23050520,1102551.0


In [187]:
movie_details

Unnamed: 0,movieId,title,genres,first_genre,genres_list,genre_similarity
1,2,Jumanji (1995),Adventure Children Fantasy,Adventure,"[Adventure, Children, Fantasy]",0.447214
2,3,Grumpier Old Men (1995),Comedy Romance,Comedy,"[Comedy, Romance]",0.365148
3,4,Waiting to Exhale (1995),Comedy Drama Romance,Comedy,"[Comedy, Drama, Romance]",0.447214
4,5,Father of the Bride Part II (1995),Comedy,Comedy,[Comedy],0.258199
5,6,Heat (1995),Action Crime Thriller,Action,"[Action, Crime, Thriller]",0.447214
...,...,...,...,...,...,...
4889,4994,"Majestic, The (2001)",Comedy Drama Romance,Comedy,"[Comedy, Drama, Romance]",0.447214
4891,4996,Little Otik (Otesánek) (2000),Comedy Drama Fantasy,Comedy,"[Comedy, Drama, Fantasy]",0.447214
4892,4997,"Convent, The (2000)",Horror Sci-Fi,Horror,"[Horror, Sci-Fi]",0.365148
4893,4998,"Defiant Ones, The (1958)",Adventure Crime Drama Thriller,Adventure,"[Adventure, Crime, Drama, Thriller]",0.516398


In [None]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,1225734739
1,1,110,4.0,1225865086
2,1,158,4.0,1225733503
3,1,260,4.5,1225735204
4,1,356,5.0,1225735119
...,...,...,...,...
33832157,330975,8340,2.0,1091583256
33832158,330975,8493,2.5,1091585709
33832159,330975,8622,4.0,1091581777
33832160,330975,8665,3.0,1091581765


In [None]:
user_tags

Unnamed: 0,userId,movieId,tag,timestamp
0,10,260,good vs evil,1430666558
1,10,260,Harrison Ford,1430666505
2,10,260,sci-fi,1430666538
3,14,1221,Al Pacino,1311600756
4,14,1221,mafia,1311600746
...,...,...,...,...
2328310,330923,176599,politically correct,1507547491
2328311,330933,3317,coming of age,1351279384
2328312,330933,3317,sexuality,1351279389
2328313,330947,5782,Not Luc Besson,1154110902


In [172]:
movie_ratings

Unnamed: 0,movieId,title,average_rating,vote_count,flag
0,1,Toy Story (1995),3.843670,1177,0
1,2,Jumanji (1995),3.292952,454,0
2,3,Grumpier Old Men (1995),3.120172,233,0
3,4,Waiting to Exhale (1995),3.177419,31,0
4,5,Father of the Bride Part II (1995),3.150826,242,0
...,...,...,...,...,...
4451,4995,"Beautiful Mind, A (2001)",3.887073,673,0
4452,4996,Little Otik (Otesánek) (2000),2.700000,5,0
4453,4997,"Convent, The (2000)",2.500000,1,0
4454,4998,"Defiant Ones, The (1958)",3.666667,6,0
