In [None]:
import torch
import torch.optim as optim
import pandas as pd
import os
import numpy as np
from tqdm import tqdm

In [None]:
df_1 = pd.read_csv('./dataset/archive/data1.csv',
                   header=None,
                   names=['movie_id', 'customer_id', 'rating', 'date'],
                   parse_dates=['date'],
                   usecols=range(4)
                   )
# df_2 = pd.read_csv('./dataset/archive/data2.csv',
#                    header=None,
#                    names=['movie_id', 'customer_id', 'rating', 'date'],
#                    parse_dates=['date'],
#                    usecols=range(4)
#                    )
# df_3 = pd.read_csv('./dataset/archive/data3.csv',
#                    header=None,
#                    names=['movie_id', 'customer_id', 'rating', 'date'],
#                    parse_dates=['date'],
#                    usecols=range(4)
#                    )
# df_4 = pd.read_csv('./dataset/archive/data4.csv',
#                    header=None,
#                    names=['movie_id', 'customer_id', 'rating', 'date'],
#                    parse_dates=['date'],
#                    usecols=range(4)
#                    )

In [None]:
# df = pd.concat([df_1, df_2, df_3, df_4])
df = df_1[['movie_id', 'customer_id', 'rating']][:10000]

In [None]:
movies = df['movie_id'].unique()
r_movies = {v : i for i, v in enumerate(movies)}
customers = df['customer_id'].unique()
r_customers = {v : i for i, v in enumerate(customers)}
print(len(movies), len(customers))

In [None]:
dataset = df.to_numpy()
ratings = np.zeros((len(movies), len(customers)), dtype=np.float32)
for d in tqdm(dataset):
    ratings[r_movies[d[0]], r_customers[d[1]]] = d[2]

In [None]:
latent_size = 3
movies_tensor = torch.rand(
    len(movies), latent_size, requires_grad=True, device="mps")
customers_tensor = torch.rand(
    len(customers), latent_size, requires_grad=True, device="mps")
ratings_tensor = torch.tensor(ratings, device='mps')

In [None]:
def loss(pred, real):
    return ((real > 0) * ((pred - real)**2)).sum()


lr = 1e-4
iters = 1000
for i in tqdm(range(iters)):
    pred = (movies_tensor @ customers_tensor.T)
    l = loss(pred, ratings_tensor)
    l.backward()
    movies_tensor.data -= lr * movies_tensor.grad.data
    customers_tensor.data -= lr * customers_tensor.grad.data
    movies_tensor.grad.zero_()
    customers_tensor.grad.zero_()


def predict(movie_id, customer_id):
    latent_movie = movies_tensor[[r_movies[movie_id]]]
    latent_customer = customers_tensor[[r_customers[customer_id]]]
    return round((latent_movie @ latent_customer.T).item())


print(movies_tensor)

In [None]:
correct = 0
sum = 0
for d in tqdm(dataset):
    movie_id, customer_id, rating = d
    if rating != 0:
        sum += 1
        pred = predict(movie_id, customer_id)
        if rating == pred:
            correct += 1

In [None]:
accuracy = correct / sum
print(f'accuracy: {round(accuracy * 100, 2)}%')