In [1]:
import numpy as np
import pandas as pd
from google.colab import drive

In [69]:
# @title Функции

def top_to_parametr(k: int, n=10) -> pd.DataFrame:
    movie_id = k
    correlations_for_movie = correlation_matrix[movie_id]
    top_10_movies = correlations_for_movie.sort_values(ascending=False)[1:n+1]
    top_10_movies = pd.merge(movie_details[['movieId', 'title']], top_10_movies, on='movieId', how='inner').sort_values(ascending=False, by=k)
    top_10_movies['relevance'] = top_10_movies[k]
    top_10_movies = top_10_movies.reset_index()
    del top_10_movies[k]
    del top_10_movies['index']
    return top_10_movies

def top_to_user(k: int, n=10) -> pd.DataFrame:
    user_id = k
    correlations_for_user = correlation_matrix[user_id]
    top_10_users = correlations_for_user.sort_values(ascending=False)[1:n+1]
    return top_10_users

def top_IMDb_score(n=10) -> pd.DataFrame:
    C = movie_ratings['average_rating'].mean()
    m = movie_ratings['vote_count'].quantile(0.99)
    def weighted_rating(x, m=m, C=C):
        v = x['vote_count']
        R = x['average_rating']
        return (v/(v+m) * R) + (m/(m+v) * C)
    q_movies = movie_ratings.copy().loc[movie_ratings['vote_count'] >= m]
    q_movies['score'] = q_movies.apply(weighted_rating, axis=1)
    q_movies = q_movies.sort_values('score', ascending=False)
    q_movies = q_movies[['movieId', 'vote_count', 'average_rating', 'score']][0:n]
    q_movies = pd.merge(movie_details[['movieId', 'title', 'first_genre']], q_movies, on='movieId', how='inner').sort_values(ascending=False, by='score')
    q_movies = q_movies.reset_index()
    del q_movies['index']
    return q_movies

def top_in_genre_IMDb_score(n=10) -> pd.DataFrame:
    C = movie_ratings['average_rating'].mean()
    m = movie_ratings['vote_count'].quantile(0.99)
    def weighted_rating(x, m=m, C=C):
        v = x['vote_count']
        R = x['average_rating']
        return (v/(v+m) * R) + (m/(m+v) * C)
    q_movies = pd.merge(movie_ratings[['movieId', 'vote_count', 'average_rating']],
                        movie_details[['movieId', 'title', 'first_genre']],
                        on='movieId', how='inner')
    q_movies = q_movies.loc[q_movies['vote_count'] >= m]
    q_movies['score'] = q_movies.apply(weighted_rating, axis=1)
    q_movies = q_movies.sort_values('score', ascending=False)
    q_movies = q_movies[['movieId', 'title', 'first_genre', 'vote_count', 'average_rating', 'score']]
    q_movies = q_movies.groupby('first_genre').head(1)
    q_movies = q_movies.reset_index(drop=True)
    q_movies = q_movies[0:n]
    return q_movies

def recommend_movies_after_first_choice(viewed_movie_id: int, n=10) -> pd.DataFrame:
    top_movies = top_in_genre_IMDb_score(n=n)
    top_movies = top_movies[top_movies['movieId'] != viewed_movie_id]
    if len(top_movies) == 9:
        similar_movies = top_to_parametr(viewed_movie_id, n=6)
    else:
        similar_movies = top_to_parametr(viewed_movie_id, n=7)
    combined_movies = pd.concat([top_movies, similar_movies], ignore_index=True)
    middle_movies = combined_movies.iloc[4:15].copy()
    middle_movies = middle_movies.sample(frac=1).reset_index(drop=True)
    combined_movies = pd.concat([combined_movies[:4], middle_movies], ignore_index=True)
    combined_movies = combined_movies[['movieId', 'title']]
    return combined_movies


def create_tables(n=999999, k=999999) -> pd.DataFrame:

    drive.mount('/content/drive')

    movie_tags = pd.read_csv('/content/drive/MyDrive/ml-latest/genome-scores.csv', low_memory=False)
    movie_tags = movie_tags[movie_tags['movieId'] < n]

    pivot_df = movie_tags.pivot_table(index='tagId', columns='movieId', values='relevance')
    pivot_df = pivot_df.fillna(0)
    correlation_matrix = pivot_df.corr()

    tags = pd.read_csv('/content/drive/MyDrive/ml-latest/genome-tags.csv', low_memory=False)
    movie_tags = pd.merge(movie_tags, tags, on='tagId', how='left')

    movies = pd.read_csv('/content/drive/MyDrive/ml-latest/links.csv', low_memory=False)
    movies = movies[movies['movieId'] < n]

    movie_details = pd.read_csv('/content/drive/MyDrive/ml-latest/movies.csv', low_memory=False)
    movie_details = movie_details[movie_details['movieId'] < n]
    movie_details['genres'] = movie_details['genres'].str.replace('|', ' ', regex=False)
    movie_details['first_genre'] = movie_details['genres'].str.split().str[0]
    movie_details = movie_details[['movieId', 'title', 'genres', 'first_genre']]

    ratings = pd.read_csv('/content/drive/MyDrive/ml-latest/ratings.csv', low_memory=False)
    ratings['date'] = pd.to_datetime(ratings['timestamp'], unit='s').dt.strftime('%Y-%m-%d')
    ratings = ratings[ratings['movieId'] < n]
    ratings = ratings[ratings['userId'] < k]
    ratings = pd.merge(movie_details[['movieId', 'title']], ratings, on='movieId', how='inner')
    ratings = ratings[['userId', 'movieId', 'title', 'rating', 'timestamp', 'date']]

    ratings_matrix = ratings.pivot_table(index='movieId', columns='userId', values='rating').fillna(0)
    correlation_matrix = ratings_matrix.corr()

    movie_ratings = ratings.groupby('movieId').agg(
        average_rating=('rating', 'mean'),
        vote_count=('rating', 'count')
    ).reset_index()
    movie_ratings = pd.merge(movie_details[['movieId', 'title']], movie_ratings, on='movieId', how='inner')

    user_tags = pd.read_csv('/content/drive/MyDrive/ml-latest/tags.csv', low_memory=False)
    user_tags['date'] = pd.to_datetime(user_tags['timestamp'], unit='s').dt.strftime('%Y-%m-%d')
    user_tags = user_tags[user_tags['movieId'] < n]
    user_tags = user_tags[user_tags['userId'] < k]

    return correlation_matrix, tags, movie_tags, movies, movie_details, ratings, movie_ratings, user_tags

In [3]:
correlation_matrix, tags, movie_tags, movies, movie_details, ratings, movie_ratings, user_tags = create_tables()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
top_in_genre_IMDb_score()

Unnamed: 0,movieId,title,first_genre,vote_count,average_rating,score
0,318,"Shawshank Redemption, The (1994)",Crime,122296,4.416792,4.309984
1,527,Schindler's List (1993),Drama,84232,4.242337,4.111043
2,2959,Fight Club (1999),Action,86207,4.236019,4.10808
3,296,Pulp Fiction (1994),Comedy,108756,4.191778,4.091866
4,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure,79940,4.099331,3.977774
5,4226,Memento (2000),Mystery,55649,4.143713,3.970426
6,109487,Interstellar (2014),Sci-Fi,40603,4.146972,3.921583
7,1214,Alien (1979),Horror,46572,4.069505,3.88164
8,457,"Fugitive, The (1993)",Thriller,61732,3.976876,3.841666
9,1148,Wallace & Gromit: The Wrong Trousers (1993),Animation,18597,4.111201,3.725353


In [28]:
top_to_parametr(318)

Unnamed: 0,movieId,title,relevance
0,3147,"Green Mile, The (1999)",0.895397
1,1704,Good Will Hunting (1997),0.846869
2,1193,One Flew Over the Cuckoo's Nest (1975),0.841241
3,356,Forrest Gump (1994),0.837761
4,1276,Cool Hand Luke (1967),0.836964
5,182723,Cosmos: A Spacetime Odissey,0.834826
6,203375,Ford v. Ferrari (2019),0.830897
7,208703,1917 (2019),0.830277
8,30707,Million Dollar Baby (2004),0.826535
9,63082,Slumdog Millionaire (2008),0.825927


In [72]:
recommend_movies_after_first_choice(318)

Unnamed: 0,movieId,title
0,527,Schindler's List (1993)
1,2959,Fight Club (1999)
2,296,Pulp Fiction (1994)
3,4993,"Lord of the Rings: The Fellowship of the Ring,..."
4,1193,One Flew Over the Cuckoo's Nest (1975)
5,1214,Alien (1979)
6,457,"Fugitive, The (1993)"
7,1148,Wallace & Gromit: The Wrong Trousers (1993)
8,109487,Interstellar (2014)
9,3147,"Green Mile, The (1999)"


In [6]:
movie_tags

Unnamed: 0,movieId,tagId,relevance,tag
0,1,1,0.03200,007
1,1,2,0.02225,007 (series)
2,1,3,0.07000,18th century
3,1,4,0.05900,1920s
4,1,5,0.12300,1930s
...,...,...,...,...
18472123,288167,1124,0.09875,writing
18472124,288167,1125,0.02950,wuxia
18472125,288167,1126,0.02275,wwii
18472126,288167,1127,0.11225,zombie


In [7]:
tags

Unnamed: 0,tagId,tag
0,1,007
1,2,007 (series)
2,3,18th century
3,4,1920s
4,5,1930s
...,...,...
1123,1124,writing
1124,1125,wuxia
1125,1126,wwii
1126,1127,zombie


In [8]:
movies

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
...,...,...,...
86532,288967,14418234,845861.0
86533,288971,11162178,878958.0
86534,288975,70199,150392.0
86535,288977,23050520,1102551.0


In [9]:
movie_details

Unnamed: 0,movieId,title,genres,first_genre
0,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy,Adventure
1,2,Jumanji (1995),Adventure Children Fantasy,Adventure
2,3,Grumpier Old Men (1995),Comedy Romance,Comedy
3,4,Waiting to Exhale (1995),Comedy Drama Romance,Comedy
4,5,Father of the Bride Part II (1995),Comedy,Comedy
...,...,...,...,...
86532,288967,State of Siege: Temple Attack (2021),Action Drama,Action
86533,288971,Ouija Japan (2021),Action Horror,Action
86534,288975,The Men Who Made the Movies: Howard Hawks (1973),Documentary,Documentary
86535,288977,Skinford: Death Sentence (2023),Crime Thriller,Crime


In [10]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp,date
0,1,1,4.0,1225734739,2008-11-03
1,1,110,4.0,1225865086,2008-11-05
2,1,158,4.0,1225733503,2008-11-03
3,1,260,4.5,1225735204,2008-11-03
4,1,356,5.0,1225735119,2008-11-03
...,...,...,...,...,...
33832157,330975,8340,2.0,1091583256,2004-08-04
33832158,330975,8493,2.5,1091585709,2004-08-04
33832159,330975,8622,4.0,1091581777,2004-08-04
33832160,330975,8665,3.0,1091581765,2004-08-04


In [11]:
user_tags

Unnamed: 0,userId,movieId,tag,timestamp,date
0,10,260,good vs evil,1430666558,2015-05-03
1,10,260,Harrison Ford,1430666505,2015-05-03
2,10,260,sci-fi,1430666538,2015-05-03
3,14,1221,Al Pacino,1311600756,2011-07-25
4,14,1221,mafia,1311600746,2011-07-25
...,...,...,...,...,...
2328310,330923,176599,politically correct,1507547491,2017-10-09
2328311,330933,3317,coming of age,1351279384,2012-10-26
2328312,330933,3317,sexuality,1351279389,2012-10-26
2328313,330947,5782,Not Luc Besson,1154110902,2006-07-28


In [12]:
movie_ratings

Unnamed: 0,movieId,title,average_rating,vote_count
0,1,Toy Story (1995),3.893508,76813
1,2,Jumanji (1995),3.278179,30209
2,3,Grumpier Old Men (1995),3.171271,15820
3,4,Waiting to Exhale (1995),2.868395,3028
4,5,Father of the Bride Part II (1995),3.076957,15801
...,...,...,...,...
83234,288967,State of Siege: Temple Attack (2021),3.500000,1
83235,288971,Ouija Japan (2021),0.500000,1
83236,288975,The Men Who Made the Movies: Howard Hawks (1973),4.000000,1
83237,288977,Skinford: Death Sentence (2023),3.000000,1
