In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.layers import Input, Embedding, Flatten, Dot, Dense
from keras.models import Model
from keras.models import load_model
from sklearn.metrics import mean_absolute_error

In [3]:
ratings = pd.read_csv('../base_dataset/ml-100k/u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

In [4]:
# Encoding users and movies as integer indices.
user_ids = ratings['user_id'].unique().tolist()
movie_ids = ratings['movie_id'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}

ratings['user'] = ratings['user_id'].map(user2user_encoded)
ratings['movie'] = ratings['movie_id'].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)

In [5]:
# Split the data into training and test datasets.
X = ratings[['user', 'movie']].values
y = ratings['rating'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [6]:
# Define the NCF model using the Functional API.
user_input = Input(shape=(1,), dtype='int32', name='user')
movie_input = Input(shape=(1,), dtype='int32', name='movie')

user_embedding = Embedding(output_dim=50, input_dim=num_users, input_length=1, name='user_embedding')(user_input)
movie_embedding = Embedding(output_dim=50, input_dim=num_movies, input_length=1, name='movie_embedding')(movie_input)

user_flatten = Flatten()(user_embedding)
movie_flatten = Flatten()(movie_embedding)

dot_product = Dot(axes=1)([user_flatten, movie_flatten])

In [7]:
dense1 = Dense(128, activation='relu')(dot_product)
dense2 = Dense(1)(dense1)

model = Model(inputs=[user_input, movie_input], outputs=dense2)

# Compile the model.
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [8]:
# Train the model.
history = model.fit([X_train[:, 0], X_train[:, 1]], y_train, epochs=5, verbose=1, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
# Save the model
model.save('C:\\Users\\dobis\\OneDrive\\itrytostudy\\nn\\app\\NN_CourseProject\\src\\model\\ncf_model')



INFO:tensorflow:Assets written to: C:\Users\dobis\OneDrive\itrytostudy\nn\app\NN_CourseProject\src\model\ncf_model\assets


INFO:tensorflow:Assets written to: C:\Users\dobis\OneDrive\itrytostudy\nn\app\NN_CourseProject\src\model\ncf_model\assets


In [10]:
# Load the movie metadata
movie_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url',
              'unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy',
              'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
              'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies = pd.read_csv('../base_dataset/ml-100k/u.item', sep='|', names=movie_cols, encoding='latin-1')

In [24]:
# Make predictions on the test data
y_pred = model.predict([X_test[:, 0], X_test[:, 1]])

# Compute MAE
mae = mean_absolute_error(y_test, y_pred)

print(f'Test MAE: {mae}')

Test MAE: 0.8285050887227059


In [43]:
def get_genres(row):
    # Here we're returning a string that combines all the genres of a movie
    return ', '.join([col for col in movie_cols[5:] if row[col]])

def transform_genres(movie):
    genres = []
    genre_mapping = {
        'unknown': 'unknown',
        'Action': 'Action',
        'Adventure': 'Adventure',
        'Animation': 'Animation',
        "Children's": '_10',
        'Comedy': 'Comedy',
        'Crime': 'Crime',
        'Documentary': 'Documentary',
        'Drama': 'Drama',
        'Fantasy': 'Fantasy',
        'Film-Noir': '_16',
        'Horror': 'Horror',
        'Musical': 'Musical',
        'Mystery': 'Mystery',
        'Romance': 'Romance',
        'Sci-Fi': '_21',
        'Thriller': 'Thriller',
        'War': 'War',
        'Western': 'Western'
    }
    for old_genre, new_genre in genre_mapping.items():
        if getattr(movie, new_genre):
            genres.append(old_genre)
    return ', '.join(genres)

def recommend_movies(user_id, num_recommendations):
    user = user2user_encoded[user_id]
    movies_not_watched = ratings[ratings['user_id']!=user_id]['movie_id'].unique()
    movies_not_watched = [movie2movie_encoded.get(x) for x in movies_not_watched]
    user_encoder = [user] * len(movies_not_watched)

    user_encoder = np.array(user_encoder).reshape(-1,1)
    movies_not_watched = np.array(movies_not_watched).reshape(-1,1)

    ratings_predicted = model.predict([user_encoder, movies_not_watched])

    top_ratings_indices = ratings_predicted.flatten().argsort()[-num_recommendations:][::-1]
    recommended_movie_ids = [movie_ids[i] for i in top_ratings_indices]
    recommended_ratings = [ratings_predicted[i][0] for i in top_ratings_indices]

    print("Showing recommendations for user: {}".format(user_id))
    print("====" * 9)
    print("Movies with high ratings from user")
    print("----" * 8)
    top_movies_user = (
        ratings[ratings['user_id'] == user_id]
        .sort_values(by="rating", ascending=False)
        .head(5)
        .movie_id.values
    )

    movie_df_rows = movies[movies["movie_id"].isin(top_movies_user)]

    for row in movie_df_rows.itertuples():
        print(row.title, ":", transform_genres(row))

    print("----" * 8)
    print("Top movie recommendations")
    print("----" * 8)
    recommended_movies = movies[movies["movie_id"].isin(recommended_movie_ids)]
    for idx, row in enumerate(recommended_movies.itertuples()):
        print(row.title, ": Predicted rating - ", recommended_ratings[idx], ", Genres - ", transform_genres(row))

In [None]:
ratings_new = ratings.filter()

In [58]:
recommend_movies(user_id=2, num_recommendations=10)

Showing recommendations for user: 2
Movies with high ratings from user
--------------------------------
Godfather, The (1972) : Action, Crime, Drama
Kolya (1996) : Comedy
Good Will Hunting (1997) : Drama
Emma (1996) : Drama, Romance
Wings of the Dove, The (1997) : Drama, Romance, Thriller
--------------------------------
Top movie recommendations
--------------------------------
Usual Suspects, The (1995) : Predicted rating -  4.795822 , Genres -  Crime, Thriller
Fargo (1996) : Predicted rating -  4.782403 , Genres -  Crime, Drama, Thriller
2001: A Space Odyssey (1968) : Predicted rating -  4.7804933 , Genres -  Drama, Mystery, Sci-Fi, Thriller
Godfather: Part II, The (1974) : Predicted rating -  4.7764606 , Genres -  Action, Crime, Drama
Good Will Hunting (1997) : Predicted rating -  4.7748413 , Genres -  Drama
One Flew Over the Cuckoo's Nest (1975) : Predicted rating -  4.771671 , Genres -  Drama
Adventures of Robin Hood, The (1938) : Predicted rating -  4.77125 , Genres -  Action, A

In [57]:
def print_user_ratings(user_id):
    # Get all movies rated by the user
    user_movies = ratings[ratings['user_id'] == user_id].movie_id.values

    # Get details of these movies from the movies DataFrame
    movie_df_rows = movies[movies["movie_id"].isin(user_movies)]

    # For each movie, print the title, genres, and rating
    for row in movie_df_rows.itertuples():
        user_rating = ratings[(ratings['user_id'] == user_id) & (ratings['movie_id'] == row.movie_id)].rating.values[0]
        print(row.title, ":", transform_genres(row), ":", user_rating)

user_id = 2
print(f"User {user_id}'s ratings:")
print_user_ratings(user_id)

User 2's ratings:
Toy Story (1995) : Animation, Children's, Comedy : 4
Richard III (1995) : Drama, War : 2
Mighty Aphrodite (1995) : Comedy : 4
Postino, Il (1994) : Drama, Romance : 4
Antonia's Line (1995) : Drama : 3
Birdcage, The (1996) : Comedy : 4
Star Wars (1977) : Action, Adventure, Romance, Sci-Fi, War : 5
Fargo (1996) : Crime, Drama, Thriller : 5
Truth About Cats & Dogs, The (1996) : Comedy, Romance : 4
Godfather, The (1972) : Action, Crime, Drama : 5
Jerry Maguire (1996) : Drama, Romance : 4
Kolya (1996) : Comedy : 5
Shall We Dance? (1996) : Comedy : 5
My Best Friend's Wedding (1997) : Comedy, Romance : 4
Men in Black (1997) : Action, Adventure, Comedy, Sci-Fi : 4
Contact (1997) : Drama, Sci-Fi : 3
Full Monty, The (1997) : Comedy : 4
Good Will Hunting (1997) : Drama : 5
Heat (1995) : Action, Crime, Thriller : 4
Sabrina (1995) : Comedy, Romance : 3
Sense and Sensibility (1995) : Drama, Romance : 5
Leaving Las Vegas (1995) : Drama, Romance : 4
Restoration (1995) : Drama : 4
Bed 