In [19]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Dense, Dropout
from sklearn.model_selection import train_test_split

In [20]:
# Load dataset
df_ratings = pd.read_csv('../data/ratings_filtered.csv')


In [21]:
# Display dataset information
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11016557 entries, 0 to 11016556
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  int64  
dtypes: float64(1), int64(3)
memory usage: 336.2 MB


In [22]:
# Display first 5 rows of the dataset
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1246,5.0,1425941556
4,1,1968,4.0,1425942148


In [23]:
# Check for missing values
df_ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [24]:
# Create a test dataset
df_test = df_ratings[:1000000]

In [25]:
# Create user-item matrix
user_item_matrix = df_test.pivot(index='userId', columns='movieId', values='rating')

In [26]:
# Binarize the user-item matrix
user_item_matrix_binary = (user_item_matrix > 0).astype(int)

In [27]:
# Split the data into training and test sets
train_data, test_data = train_test_split(user_item_matrix_binary, test_size=0.2, random_state=42)

In [28]:
# Define the DNN model
def build_dnn_model(input_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=input_dim))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(input_dim, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy')
    return model

In [29]:
# Build the DNN model
num_visible = user_item_matrix_binary.shape[1]
dnn = build_dnn_model(num_visible)

In [30]:
# Train the DNN model
dnn.fit(train_data, train_data, batch_size=64, epochs=10, shuffle=True, validation_data=(test_data, test_data))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x25006364908>

In [31]:
# Extract the embeddings
user_embedding = dnn.layers[0].get_weights()[0].T
item_embedding = dnn.layers[2].get_weights()[0]

In [32]:
# Parameters for prediction of the top N movies for a user
user_id = 1
n_movies = 10

In [33]:
# Function to recommend the top N movies for a user
def n_recommendations(user_id, n):
    user_movies = user_embedding[user_id - 1]
    predicted_ratings = np.dot(user_embedding, user_movies)
    recommended_movies = np.argsort(predicted_ratings)[::-1]
    return recommended_movies[:n]

In [34]:
# Print the top recommended movies for a user
print(f'Top recommended movies for user {user_id} are {n_recommendations(user_id, n_movies)}')

Top recommended movies for user 1 are [  0   8  20 109 115  75  31  76  72 108]


In [35]:
# Save the DNN model
dnn.save('../dnn.h5')