In [1]:
import numpy as np
import pandas as pd
import torch
from collections import defaultdict

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
ratings = np.load('./data/movielens_ratings.npy')
ratings

array([[   1, 2970],
       [   1, 1179],
       [   1, 1575],
       ...,
       [6040, 1619],
       [6040,  156],
       [6040, 1132]])

In [3]:
def load_model_and_recommend(u, N, model_path):
    model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()

    user_history = defaultdict(list)
    for row in ratings:
        uid, iid = int(row[0]), int(row[1])
        user_history[uid].append(iid)

    if u not in user_history or len(user_history[u]) < 1:
        print(f"User {u} has no interactions.")
        return []

    seq = np.zeros([50], dtype=np.int32)
    idx = 50 - 1
    for i in reversed(user_history[u]):
        seq[idx] = i
        idx -= 1
        if idx < 0: break

    rated_items = set(user_history[u])
    candidates = [i for i in range(1, 3706 + 1) if i not in rated_items]

    scores = -model.predict(np.array([u]), np.array([seq]), candidates)
    top_indices = scores[0].argsort()[:N]
    recommended_items = [candidates[i] for i in top_indices]

    return recommended_items

In [4]:
def get_movie_information(movie_indices, index_to_id_path='./data/movie_id_to_index.csv', movies_path='./data/movies.csv'):
    index_to_id_df = pd.read_csv(index_to_id_path, header=None, names=['movieId', 'index'])
    index_to_id = dict(zip(index_to_id_df['index'], index_to_id_df['movieId']))

    movies_df = pd.read_csv(movies_path)
    movieid_to_info = dict(zip(movies_df['movieId'], zip(movies_df['title'], movies_df['genres'])))

    titles = []
    titles_with_genres = []
    genre_count = defaultdict(int)

    for idx in movie_indices:
        movie_id = index_to_id.get(idx)
        title, genres = movieid_to_info.get(movie_id)
        titles.append(title)
        titles_with_genres.append((title, genres))
        genres = genres.split('|')
        for genre in genres:
            genre_count[genre] += 1
    return titles, titles_with_genres, genre_count


In [5]:
def get_all_user_watched_movies(user_id):
    user_ratings = ratings[ratings[:, 0] == user_id]
    watched_movie_ids = np.unique(user_ratings[:, 1].astype(int))
    return watched_movie_ids

In [6]:
all_movie_indices = get_all_user_watched_movies(1)
titles, titles_with_genres, genre_count = get_movie_information(all_movie_indices)

In [7]:
titles

['Toy Story (1995)',
 'Pocahontas (1995)',
 'Apollo 13 (1995)',
 'Star Wars: Episode IV - A New Hope (1977)',
 "Schindler's List (1993)",
 'Secret Garden, The (1993)',
 'Aladdin (1992)',
 'Snow White and the Seven Dwarfs (1937)',
 'Beauty and the Beast (1991)',
 'Fargo (1996)',
 'James and the Giant Peach (1996)',
 'Wallace & Gromit: The Best of Aardman Animation (1996)',
 'Close Shave, A (1995)',
 'Hunchback of Notre Dame, The (1996)',
 'My Fair Lady (1964)',
 'Wizard of Oz, The (1939)',
 'Gigi (1958)',
 'Cinderella (1950)',
 'Mary Poppins (1964)',
 'Dumbo (1941)',
 'Sound of Music, The (1965)',
 'E.T. the Extra-Terrestrial (1982)',
 "One Flew Over the Cuckoo's Nest (1975)",
 'Princess Bride, The (1987)',
 'To Kill a Mockingbird (1962)',
 'Dead Poets Society (1989)',
 'Back to the Future (1985)',
 'Ben-Hur (1959)',
 'Ponette (1996)',
 'Hercules (1997)',
 'Titanic (1997)',
 'Last Days of Disco, The (1998)',
 'Mulan (1998)',
 'Rain Man (1988)',
 'Driving Miss Daisy (1989)',
 'Bambi (194

In [8]:
titles_with_genres

[('Toy Story (1995)', "Animation|Children's|Comedy"),
 ('Pocahontas (1995)', "Animation|Children's|Musical|Romance"),
 ('Apollo 13 (1995)', 'Drama'),
 ('Star Wars: Episode IV - A New Hope (1977)',
  'Action|Adventure|Fantasy|Sci-Fi'),
 ("Schindler's List (1993)", 'Drama|War'),
 ('Secret Garden, The (1993)', "Children's|Drama"),
 ('Aladdin (1992)', "Animation|Children's|Comedy|Musical"),
 ('Snow White and the Seven Dwarfs (1937)', "Animation|Children's|Musical"),
 ('Beauty and the Beast (1991)', "Animation|Children's|Musical"),
 ('Fargo (1996)', 'Crime|Drama|Thriller'),
 ('James and the Giant Peach (1996)', "Animation|Children's|Musical"),
 ('Wallace & Gromit: The Best of Aardman Animation (1996)', 'Animation'),
 ('Close Shave, A (1995)', 'Animation|Comedy|Thriller'),
 ('Hunchback of Notre Dame, The (1996)', "Animation|Children's|Musical"),
 ('My Fair Lady (1964)', 'Musical|Romance'),
 ('Wizard of Oz, The (1939)', "Adventure|Children's|Drama|Musical"),
 ('Gigi (1958)', 'Musical'),
 ('Ci

In [9]:
genre_count

defaultdict(int,
            {'Animation': 18,
             "Children's": 20,
             'Comedy': 14,
             'Musical': 14,
             'Romance': 6,
             'Drama': 21,
             'Action': 5,
             'Adventure': 5,
             'Fantasy': 3,
             'Sci-Fi': 3,
             'War': 2,
             'Crime': 2,
             'Thriller': 3})

In [10]:
len(all_movie_indices)

53

In [11]:
def get_top_genres(genre_count, top_k=5):
    sorted_genres = sorted(genre_count.items(), key=lambda x: x[1], reverse=True)
    return [g for g, _ in sorted_genres[:top_k]]


In [12]:
top_genres = get_top_genres(genre_count)

In [13]:
def calculate_relevancy_score(genres_string, top_genres):
    recommended_genres = [g.strip().lower() for g in genres_string.split('|')]
    top_genres = [g.strip().lower() for g in top_genres]

    overlap = sum(1 for g in recommended_genres if g in top_genres)

    if overlap >= 3:
        label = "Green"   # very high relevance
        score = 3
    elif overlap == 2:
        label = "Blue"    # high relevance
        score = 2
    elif overlap == 1:
        label = "Brown"   # mid relevance
        score = 1
    else:
        label = "Red"     # outlier
        score = 0

    return score, label


In [14]:
def find_relevancy(titles_with_genres, top_genres):
    for title, genres in titles_with_genres:
        score, label = calculate_relevancy_score(genres, top_genres)
        print(f"{title:30s} {genres:40s} -> {label} (score={score})")

##### TMCP

In [15]:
## TMCP_SASRec
from tmcp import TemporalGCNLayer, PointWiseFeedForward, SASRec
tmcp_recommended_indices = load_model_and_recommend(1, 10, model_path='./models/tmcp_sasrec_model_full.pth')
titles, titles_with_genres, genre_count = get_movie_information(tmcp_recommended_indices)


In [16]:
titles

['Lion King, The (1994)',
 'Anastasia (1997)',
 'Fantasia (1940)',
 'Alice in Wonderland (1951)',
 'Babe (1995)',
 'Nightmare Before Christmas, The (1993)',
 'Return of Jafar, The (1993)',
 'Peter Pan (1953)',
 'Little Mermaid, The (1989)',
 'Lady and the Tramp (1955)']

In [17]:
titles_with_genres

[('Lion King, The (1994)', "Animation|Children's|Musical"),
 ('Anastasia (1997)', "Animation|Children's|Musical"),
 ('Fantasia (1940)', "Animation|Children's|Musical"),
 ('Alice in Wonderland (1951)', "Animation|Children's|Musical"),
 ('Babe (1995)', "Children's|Comedy|Drama"),
 ('Nightmare Before Christmas, The (1993)', "Children's|Comedy|Musical"),
 ('Return of Jafar, The (1993)', "Animation|Children's|Musical"),
 ('Peter Pan (1953)', "Animation|Children's|Fantasy|Musical"),
 ('Little Mermaid, The (1989)', "Animation|Children's|Comedy|Musical|Romance"),
 ('Lady and the Tramp (1955)', "Animation|Children's|Comedy|Musical|Romance")]

In [18]:
genre_count

defaultdict(int,
            {'Animation': 8,
             "Children's": 10,
             'Musical': 9,
             'Comedy': 4,
             'Drama': 1,
             'Fantasy': 1,
             'Romance': 2})

In [19]:
find_relevancy(titles_with_genres, top_genres)

Lion King, The (1994)          Animation|Children's|Musical             -> Green (score=3)
Anastasia (1997)               Animation|Children's|Musical             -> Green (score=3)
Fantasia (1940)                Animation|Children's|Musical             -> Green (score=3)
Alice in Wonderland (1951)     Animation|Children's|Musical             -> Green (score=3)
Babe (1995)                    Children's|Comedy|Drama                  -> Green (score=3)
Nightmare Before Christmas, The (1993) Children's|Comedy|Musical                -> Green (score=3)
Return of Jafar, The (1993)    Animation|Children's|Musical             -> Green (score=3)
Peter Pan (1953)               Animation|Children's|Fantasy|Musical     -> Green (score=3)
Little Mermaid, The (1989)     Animation|Children's|Comedy|Musical|Romance -> Green (score=3)
Lady and the Tramp (1955)      Animation|Children's|Comedy|Musical|Romance -> Green (score=3)


##### MCP

In [20]:
## MCP_SASRec
from mcp import GCNLayer, PointWiseFeedForward, SASRec
mcp_recommended_indices = load_model_and_recommend(1, 10, model_path='./models/mcp_sasrec_model_full.pth')
titles, titles_with_genres, genre_count = get_movie_information(mcp_recommended_indices)

In [21]:
titles

['Lion King, The (1994)',
 'Anastasia (1997)',
 'Nightmare Before Christmas, The (1993)',
 '101 Dalmatians (1961)',
 'South Park: Bigger, Longer and Uncut (1999)',
 'Beavis and Butt-head Do America (1996)',
 'Balto (1995)',
 'Alice in Wonderland (1951)',
 'Prince of Egypt, The (1998)',
 'Return of Jafar, The (1993)']

In [22]:
titles_with_genres

[('Lion King, The (1994)', "Animation|Children's|Musical"),
 ('Anastasia (1997)', "Animation|Children's|Musical"),
 ('Nightmare Before Christmas, The (1993)', "Children's|Comedy|Musical"),
 ('101 Dalmatians (1961)', "Animation|Children's"),
 ('South Park: Bigger, Longer and Uncut (1999)', 'Animation|Comedy'),
 ('Beavis and Butt-head Do America (1996)', 'Animation|Comedy'),
 ('Balto (1995)', "Animation|Children's"),
 ('Alice in Wonderland (1951)', "Animation|Children's|Musical"),
 ('Prince of Egypt, The (1998)', 'Animation|Musical'),
 ('Return of Jafar, The (1993)', "Animation|Children's|Musical")]

In [23]:
genre_count

defaultdict(int, {'Animation': 9, "Children's": 7, 'Musical': 6, 'Comedy': 3})

In [24]:
find_relevancy(titles_with_genres, top_genres)

Lion King, The (1994)          Animation|Children's|Musical             -> Green (score=3)
Anastasia (1997)               Animation|Children's|Musical             -> Green (score=3)
Nightmare Before Christmas, The (1993) Children's|Comedy|Musical                -> Green (score=3)
101 Dalmatians (1961)          Animation|Children's                     -> Blue (score=2)
South Park: Bigger, Longer and Uncut (1999) Animation|Comedy                         -> Blue (score=2)
Beavis and Butt-head Do America (1996) Animation|Comedy                         -> Blue (score=2)
Balto (1995)                   Animation|Children's                     -> Blue (score=2)
Alice in Wonderland (1951)     Animation|Children's|Musical             -> Green (score=3)
Prince of Egypt, The (1998)    Animation|Musical                        -> Blue (score=2)
Return of Jafar, The (1993)    Animation|Children's|Musical             -> Green (score=3)


##### GES

In [25]:
from ges import GCNLayer, PointWiseFeedForward, SASRec
ges_recommended_indices = load_model_and_recommend(1, 10, model_path='./models/ges_sasrec_model_full.pth')
titles, titles_with_genres, genre_count = get_movie_information(ges_recommended_indices)

In [26]:
titles

['Lion King, The (1994)',
 'Prince of Egypt, The (1998)',
 'Shakespeare in Love (1998)',
 'Babe (1995)',
 'Santa Clause, The (1994)',
 'Alice in Wonderland (1951)',
 'Mask, The (1994)',
 'Sleeping Beauty (1959)',
 'Jungle Book, The (1967)',
 'Lady and the Tramp (1955)']

In [27]:
titles_with_genres

[('Lion King, The (1994)', "Animation|Children's|Musical"),
 ('Prince of Egypt, The (1998)', 'Animation|Musical'),
 ('Shakespeare in Love (1998)', 'Comedy|Romance'),
 ('Babe (1995)', "Children's|Comedy|Drama"),
 ('Santa Clause, The (1994)', "Children's|Comedy|Fantasy"),
 ('Alice in Wonderland (1951)', "Animation|Children's|Musical"),
 ('Mask, The (1994)', 'Comedy|Crime|Fantasy'),
 ('Sleeping Beauty (1959)', "Animation|Children's|Musical"),
 ('Jungle Book, The (1967)', "Animation|Children's|Comedy|Musical"),
 ('Lady and the Tramp (1955)', "Animation|Children's|Comedy|Musical|Romance")]

In [28]:
genre_count

defaultdict(int,
            {'Animation': 6,
             "Children's": 7,
             'Musical': 6,
             'Comedy': 6,
             'Romance': 2,
             'Drama': 1,
             'Fantasy': 2,
             'Crime': 1})

In [29]:
find_relevancy(titles_with_genres, top_genres)

Lion King, The (1994)          Animation|Children's|Musical             -> Green (score=3)
Prince of Egypt, The (1998)    Animation|Musical                        -> Blue (score=2)
Shakespeare in Love (1998)     Comedy|Romance                           -> Brown (score=1)
Babe (1995)                    Children's|Comedy|Drama                  -> Green (score=3)
Santa Clause, The (1994)       Children's|Comedy|Fantasy                -> Blue (score=2)
Alice in Wonderland (1951)     Animation|Children's|Musical             -> Green (score=3)
Mask, The (1994)               Comedy|Crime|Fantasy                     -> Brown (score=1)
Sleeping Beauty (1959)         Animation|Children's|Musical             -> Green (score=3)
Jungle Book, The (1967)        Animation|Children's|Comedy|Musical      -> Green (score=3)
Lady and the Tramp (1955)      Animation|Children's|Comedy|Musical|Romance -> Green (score=3)


##### SASRec

In [30]:
## SASRec
from sasrec import PointWiseFeedForward, SASRec
sasrec_recommended_indices = load_model_and_recommend(1, 10, model_path='./models/sasrec_model.pth')
titles, titles_with_genres, genre_count = get_movie_information(sasrec_recommended_indices)

In [31]:
titles

['Tarzan and the Lost City (1998)',
 'Sheltering Sky, The (1990)',
 'Rebel Without a Cause (1955)',
 'Phat Beach (1996)',
 'Red Rock West (1992)',
 'Talk of Angels (1998)',
 'Best Years of Our Lives, The (1946)',
 'Faces (1968)',
 'Siege, The (1998)',
 'Days of Thunder (1990)']

In [32]:
titles_with_genres

[('Tarzan and the Lost City (1998)', 'Action|Adventure'),
 ('Sheltering Sky, The (1990)', 'Drama'),
 ('Rebel Without a Cause (1955)', 'Drama'),
 ('Phat Beach (1996)', 'Comedy'),
 ('Red Rock West (1992)', 'Thriller'),
 ('Talk of Angels (1998)', 'Drama'),
 ('Best Years of Our Lives, The (1946)', 'Drama|War'),
 ('Faces (1968)', 'Drama'),
 ('Siege, The (1998)', 'Action|Thriller'),
 ('Days of Thunder (1990)', 'Action|Romance')]

In [33]:
genre_count

defaultdict(int,
            {'Action': 3,
             'Adventure': 1,
             'Drama': 5,
             'Comedy': 1,
             'Thriller': 2,
             'War': 1,
             'Romance': 1})

In [34]:
find_relevancy(titles_with_genres, top_genres)

Tarzan and the Lost City (1998) Action|Adventure                         -> Red (score=0)
Sheltering Sky, The (1990)     Drama                                    -> Brown (score=1)
Rebel Without a Cause (1955)   Drama                                    -> Brown (score=1)
Phat Beach (1996)              Comedy                                   -> Brown (score=1)
Red Rock West (1992)           Thriller                                 -> Red (score=0)
Talk of Angels (1998)          Drama                                    -> Brown (score=1)
Best Years of Our Lives, The (1946) Drama|War                                -> Brown (score=1)
Faces (1968)                   Drama                                    -> Brown (score=1)
Siege, The (1998)              Action|Thriller                          -> Red (score=0)
Days of Thunder (1990)         Action|Romance                           -> Red (score=0)
