<a href="https://colab.research.google.com/github/Mtlukasik/PyroNotebooks/blob/main/Recommender_system_with_matrix_factorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numpy pandas surprise
from surprise import Dataset
from surprise import Reader
import pandas as pd

# Load the data
data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
from surprise import SVD
from surprise import accuracy

# Apply SVD
algo = SVD()
algo.fit(trainset)

# If you want to make predictions for a specific user and item, you can do:
# user_id = str(196)
# item_id = str(302)
# actual_rating = 4
# prediction = algo.predict(user_id, item_id, actual_rating)
testset = trainset.build_testset()
predictions = algo.test(testset)

# Calculate RMSE
rmse = accuracy.rmse(predictions)
def get_top_n_recommendations(predictions, n=10):
    top_n = {}

    for uid, iid, true_r, est, _ in predictions:
        top_n.setdefault(uid, []).append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n = get_top_n_recommendations(predictions, n=10)
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3156235 sha256=a205c03cb448021c5a660a70e4203b0ec3414bef1599b45aa0ef2e86b13bd9f2
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1
Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to do

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv('http://files.grouplens.org/datasets/movielens/ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp'])
data = data.drop(columns='timestamp')

train, test = train_test_split(data, test_size=0.2)

import tensorflow as tf
import tensorflow_probability as tfp

latent_dim = 10  # Number of latent features
num_users = data['user'].nunique()
num_items = data['item'].nunique()

# Model definition
class PMF(tf.keras.Model):
    def __init__(self, num_users, num_items, latent_dim):
        super(PMF, self).__init__()
        self.user_embedding = tf.keras.layers.Embedding(num_users + 1, latent_dim, embeddings_initializer='random_normal')
        self.item_embedding = tf.keras.layers.Embedding(num_items + 1, latent_dim, embeddings_initializer='random_normal')

    def call(self, inputs):
        user_vec = self.user_embedding(inputs[:, 0])
        item_vec = self.item_embedding(inputs[:, 1])
        return tf.reduce_sum(user_vec * item_vec, axis=1)

model = PMF(num_users, num_items, latent_dim)

# Loss and optimizer
def prob_loss(y_true, y_pred):
    return -tfp.distributions.Normal(loc=y_pred, scale=1.).log_prob(y_true)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

# ... [rest of the code]

# Training
batch_size = 512
epochs = 10
dataset = tf.data.Dataset.from_tensor_slices((train[['user', 'item']].values, train['rating'].values)).batch(batch_size)

for epoch in range(epochs):
    for batch, (x, y) in enumerate(dataset):
        with tf.GradientTape() as tape:
            logits = model(x)
            loss = tf.reduce_mean(prob_loss(tf.cast(y, tf.float32), logits))
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        print(f"Epoch {epoch + 1}/{epochs}, Batch {batch + 1}/{len(dataset)}, Loss: {loss.numpy()}")

# ... [rest of the code]
# Get predictions
predictions = model.predict(test[['user', 'item']].values)
mse = tf.keras.losses.MeanSquaredError()
loss_value = mse(test['rating'].values, predictions).numpy()
print(f"Test MSE: {loss_value}")


Epoch 1/10, Batch 1/157, Loss: 7.607816696166992
Epoch 1/10, Batch 2/157, Loss: 7.909287452697754
Epoch 1/10, Batch 3/157, Loss: 7.896164417266846
Epoch 1/10, Batch 4/157, Loss: 7.673124313354492
Epoch 1/10, Batch 5/157, Loss: 7.776801586151123
Epoch 1/10, Batch 6/157, Loss: 7.553722381591797
Epoch 1/10, Batch 7/157, Loss: 8.032243728637695
Epoch 1/10, Batch 8/157, Loss: 7.80620002746582
Epoch 1/10, Batch 9/157, Loss: 7.661154747009277
Epoch 1/10, Batch 10/157, Loss: 7.714544296264648
Epoch 1/10, Batch 11/157, Loss: 7.842784881591797
Epoch 1/10, Batch 12/157, Loss: 7.986782073974609
Epoch 1/10, Batch 13/157, Loss: 8.191555976867676
Epoch 1/10, Batch 14/157, Loss: 7.914278984069824
Epoch 1/10, Batch 15/157, Loss: 8.23818588256836
Epoch 1/10, Batch 16/157, Loss: 7.85217809677124
Epoch 1/10, Batch 17/157, Loss: 7.488943099975586
Epoch 1/10, Batch 18/157, Loss: 7.718843460083008
Epoch 1/10, Batch 19/157, Loss: 7.830364227294922
Epoch 1/10, Batch 20/157, Loss: 7.833542823791504
Epoch 1/10, 