# DeepLearning projekt with tensorflow and the MovieLens

first the imports

In [None]:
import tensorflow as tf
import math
import matplotlib.pyplot as plt
from tensorflow import keras
import pandas as pd
import pathlib as Path
import zipfile as ZipFile
from tensorflow.keras import layers
import numpy as np

In [None]:
import pathlib

movielens_data_file_url = (
    "http://files.grouplens.org/datasets/movielens/ml-100k.zip"
)

movielens_zipped_file = keras.utils.get_file(
    "ml-100k.zip", movielens_data_file_url, extract=False
)

keras_datasets_path = pathlib.Path(movielens_zipped_file).parents[0]
movielens_dir = keras_datasets_path / "ml-100k"


# Only extract the data the first time the script is run.
if not movielens_dir.exists():
    with ZipFile(movielens_zipped_file, "r") as zip:
        # Extract files
        print("Extracting all the files now...")
        zip.extractall(path=keras_datasets_path)
        print("Done!")

u_data = movielens_dir / "u.data"


data = pd.read_csv(u_data, delimiter="	", header=None)

headerList = ['user_id', 'item_id', 'rating', 'timestamp']
data.to_csv("movie_lens_data", header=headerList, index=False)
data = pd.read_csv("movie_lens_data")
data = data.drop(columns='timestamp')

print(data)

In [None]:
# getting the users
user_id = data._get_column_array(0)
print("user id")
print(user_id)
user2user_encoded = {x: i for i, x in enumerate(user_id)}
num_users = len(user2user_encoded)
print("num useres: ", num_users)

# getting the items
item_id = data._get_column_array(1)
print("item id")
print(item_id)
item2item_encoded = {x: i for i, x in enumerate(item_id)}
num_movies = len(item2item_encoded)
print("num movies: ", num_movies)


In [None]:
min_rating = 1
max_rating = 5

x = data[["user_id", "item_id"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = data["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values

#splitting to x_train and x_test:
# x = np.asarray(x)
# y = np.asarray(y)
n_rand = np.random.rand(len(x)) <= 0.9
x_train = x[n_rand]
x_test = x[~n_rand]
y_train = y[n_rand]
y_test = y[~n_rand]
print(np.shape(x_test), np.shape(x_train), np.shape(y_test), np.shape(y_train))

print("x_train: ")
print(x_train)
print(tf.shape(x))

print("y_train: ")
print(y_train)

In [None]:
embedding_size = 50

def RecommenderV1(n_users, n_movies, n_factors):
    user = keras.Input(shape=(1,))
    u = keras.layers.Embedding(n_users, n_factors, embeddings_initializer='he_normal',
                  embeddings_regularizer=keras.regularizers.l2(1e-6))(user)
    u = tf.reshape((n_factors,))(u)

    movie = keras.Input(shape=(1,))
    m = keras.Embedding(n_movies, n_factors, embeddings_initializer='he_normal',
                  embeddings_regularizer=keras.regularizers.l2(1e-6))(movie)
    m = keras.Reshape((n_factors,))(m)

    x = keras.Dot(axes=1)([u, m])

    model = keras.models.Model(inputs=[user, movie], outputs=x)
    opt = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)

    return model

model = RecommenderV1(num_users, num_movies, embedding_size)

In [None]:
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=64,
    epochs=20,
    # verbose=1,
    # validation_data=(x_test, y_test),
)

In [None]:

plt.plot(history.history["loss"])
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_loss"])
# plt.plot(history.history["accuracy"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["loss", "accuracy", "val_loss"], loc="best")
plt.show()