In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'
column_names = ['user_id','item_id','rating','timestamp']
ratings = pd.read_csv(url, sep='\t', names=column_names)
ratings.head()





Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.item'
column_names = ['movie_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL', 
                'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 
                'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 
                'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies = pd.read_csv(url, sep='|', names=column_names, encoding='latin-1')
movies.head()

Unnamed: 0,movie_id,title,release_date,video_release_date,IMDb_URL,unknown,Action,Adventure,Animation,Children,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [22]:
data = pd.merge(ratings,movies,left_on= 'item_id', right_on = 'movie_id')
features =['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 
            'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 
            'Thriller', 'War', 'Western']

X = data[features].values
y= data['rating'].values

X_train, X_test, y_train, y_test, train_idx, test_idx = train_test_split(X, y, data.index, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [23]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error')

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 2.0928 - val_loss: 1.2850
Epoch 2/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.2497 - val_loss: 1.2477
Epoch 3/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 1.2316 - val_loss: 1.2229
Epoch 4/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 1.2186 - val_loss: 1.2177
Epoch 5/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 1.2280 - val_loss: 1.2155
Epoch 6/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.2217 - val_loss: 1.2117
Epoch 7/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.2146 - val_loss: 1.2180
Epoch 8/50
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.2164 - val_loss: 1.2117
Epoch 9/50
[1m2000/2000

In [24]:
test_loss= model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')

predictions = model.predict(X_test)
test_movie_names = data.iloc[test_idx]['title'].values


for i in range(10):
    print(f'Movie: {test_movie_names[i]}, Predicted Rating: {predictions[i][0]}')

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.1702
Test Loss: 1.1846116781234741
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Movie: Muriel's Wedding (1994), Predicted Rating: 3.528111696243286
Movie: American in Paris, An (1951), Predicted Rating: 3.683572769165039
Movie: Highlander (1986), Predicted Rating: 3.5782830715179443
Movie: She's So Lovely (1997), Predicted Rating: 3.557537317276001
Movie: GoodFellas (1990), Predicted Rating: 3.7773220539093018
Movie: Reality Bites (1994), Predicted Rating: 3.5528972148895264
Movie: English Patient, The (1996), Predicted Rating: 3.9719796180725098
Movie: It's a Wonderful Life (1946), Predicted Rating: 3.766275405883789
Movie: Mr. Holland's Opus (1995), Predicted Rating: 3.766275405883789
Movie: Army of Darkness (1993), Predicted Rating: 3.528111696243286
