# Disentangled Multimodal Representation Learning for Recommendation (DMRL)

## Imports

In [1]:
import pandas as pd
from cornac.data import Dataset

from utils import load_data, preprocessing_content_data


# pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from cornac.models.dmrl.recom_dmrl import DMRL

## Load and process data

In [3]:
ratings, content, targets = load_data()

In [4]:
ratings["TimestampDate"] = ratings['Timestamp'].dt.date

In [5]:
ratings_cornac_dataset = Dataset.build(ratings[['UserId', 'ItemId', 'Rating']].values.tolist(), fmt='UIR')

## Basic analysis

In [None]:
ratings.head()

In [None]:
# Number of unique users and items
ratings.UserId.nunique(), ratings.ItemId.nunique()

In [None]:
# how many itens purchased by each user purchase
ratings.groupby(["UserId", 'Timestamp'])["ItemId"].nunique().value_counts()

In [None]:
# how many itens purchased by each user day by day
ratings.groupby(["UserId", 'TimestampDate'])["ItemId"].nunique().value_counts()

In [None]:
# how many times each user purchased items
ratings.groupby("UserId")['Timestamp'].nunique().value_counts()

In [None]:
# how many times each user purchased items per day
ratings.groupby("UserId")['TimestampDate'].nunique().value_counts()

In [7]:
content.isna().sum()

ItemId              0
Title               0
Year                0
Rated               0
Released            0
Runtime             0
Genre               0
Director            0
Writer              0
Actors              0
Plot                0
Language            0
Country             0
Awards              0
Poster              0
Ratings             0
Metascore           0
imdbRating          0
imdbVotes           0
Type                0
DVD                24
BoxOffice          24
Production         24
Website            24
Response            0
totalSeasons    37989
Season          38011
Episode         38011
seriesID        38011
dtype: int64

## Train model

In [16]:
matrix_factorization = MF()

In [None]:
matrix_factorization.fit(ratings_cornac_dataset)

In [None]:
print(a)

In [None]:
targets.head()

In [20]:
target_prediction = targets.copy()
target_prediction["Rating"] = -1

In [None]:
for index_line, line in targets.head().iterrows():
    user_id = line["UserId"]
    item_id = line["ItemId"]
    
    user_index = ratings_cornac_dataset.uid_map.get(user_id)
    item_index = ratings_cornac_dataset.iid_map.get(item_id)

    if (user_index is None) or (item_index is None):
        continue

    line_rating = matrix_factorization.score(user_idx=user_index, item_idx=[0, 1,2 ])
    # target_prediction.loc[index_line, "Rating"] = line_rating


In [None]:
target_prediction.head()

In [None]:
target_prediction.Rating.nunique()

In [None]:
(target_prediction.Rating == -1).sum()

In [None]:
target_prediction.shape

In [None]:
target_prediction = target_prediction.groupby("UserId").sort_values("Rating", ascending=False)

In [15]:
target_prediction.to_csv("submissao_1_fatorizacao_de_matriz.csv", index=False)

In [19]:
target_prediction = target_prediction.drop(columns="Rating")

In [20]:
target_prediction.to_csv("submissao_1_fatorizacao_de_matriz_sem_rating.csv", index=False)

In [16]:
target_prediction = pd.read_csv("submissao_1_fatorizacao_de_matriz.csv")

In [None]:
target_prediction.head(200)

In [None]:
help(pd.DataFrame.to_csv)

In [None]:
# matrix_factorization.get_item_vectors()

In [None]:
# matrix_factorization.get_user_vectors().shape