In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from tensorflow.python.keras.layers import Embedding, Dot, Flatten, Input, Dense, Dropout
from tensorflow.python.keras.models import  Model

  'Matplotlib is building the font cache using fc-list. '


In [2]:
columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
data = pd.read_table('u.data', names=columns)
data.head()

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [3]:
#check for missing data 
data.isnull().sum()

UserID       0
MovieID      0
Rating       0
Timestamp    0
dtype: int64

In [4]:
#Get rid of data we don't need 
data.drop('Timestamp', axis=1, inplace=True)
data.head(10)

Unnamed: 0,UserID,MovieID,Rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
5,298,474,4
6,115,265,2
7,253,465,5
8,305,451,3
9,6,86,3


In [5]:
#create a list of unique users and movies
n_users, n_movies = len(data['UserID'].unique()), len(data['MovieID'].unique())
print('number of users:'+ str(n_users), 'number of movies:'+ str(n_movies))

number of users:943 number of movies:1682


In [6]:
train, test = train_test_split(data, test_size=0.2)

In [7]:
#The model architechure! From embedding and flattening to dense layers and output
dim = 30

user_input = Input(shape=[1],name='user_input')
movie_input = Input(shape=[1],name='movie_input')

user_embedding = Flatten()(Embedding(n_users+1, dim, input_length=1, name='user_embedding')(user_input))
movie_embedding = Flatten()(Embedding(n_movies+1, dim, input_length=1, name='movie_embedding')(movie_input))

matrix = Dot(axes=1)([user_embedding, movie_embedding])

dense_1 = Dense(50, activation='relu', name = "Dense1")(matrix)
dense_1 = Dropout(0.2)(dense_1)
dense_2 = Dense(20, activation="relu", name = "Dense2")(dense_1)
dense_2 = Dropout(0.2)(dense_2)
output = Dense(1, activation='relu', name = "Output")(dense_2)

nn_model = Model([user_input, movie_input], output)

nn_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
movie_input (InputLayer)        [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 30)        28320       user_input[0][0]                 
__________________________________________________________________________________________________
movie_embedding (Embedding)     (None, 1, 30)        50490       movie_input[0][0]                
______________________________________________________________________________________________

In [8]:
nn_model.compile(optimizer='adam',loss='mse')
history = nn_model.fit([train['UserID'], train['MovieID']], train['Rating'], batch_size=128, epochs=5)

Train on 80000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
