# Factorization Machines

## Imports

In [1]:
import pandas as pd
from cornac.data import Dataset

from utils import load_data, preprocessing_content_data


pd.set_option('display.max_rows', None)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from cornac.models.mf.recom_mf import MF

## Load and process data

In [3]:
ratings, content, targets = load_data()

In [4]:
ratings["TimestampDate"] = ratings['Timestamp'].dt.date

In [5]:
ratings_cornac_dataset = Dataset.build(ratings[['UserId', 'ItemId', 'Rating']].values.tolist(), fmt='UIR')

In [6]:
# targets_cornac_dataset = Dataset.build(targets[['UserId', 'ItemId']].values.tolist(), fmt='UIR')

## Basic analysis

In [None]:
ratings.head()

In [None]:
# Number of unique users and items
ratings.UserId.nunique(), ratings.ItemId.nunique()

In [None]:
# how many itens purchased by each user purchase
ratings.groupby(["UserId", 'Timestamp'])["ItemId"].nunique().value_counts()

In [None]:
# how many itens purchased by each user day by day
ratings.groupby(["UserId", 'TimestampDate'])["ItemId"].nunique().value_counts()

In [None]:
# how many times each user purchased items
ratings.groupby("UserId")['Timestamp'].nunique().value_counts()

In [None]:
# how many times each user purchased items per day
ratings.groupby("UserId")['TimestampDate'].nunique().value_counts()

In [None]:
ratings

## Train model

In [14]:
matrix_factorization = MF()

In [None]:
matrix_factorization.fit(ratings_cornac_dataset)

In [16]:
a = ratings_cornac_dataset.uid_map.get("teste")

In [None]:
print(a)

In [None]:
targets.head()

In [19]:
target_prediction = targets.copy()
target_prediction["Rating"] = -1

In [20]:
for index_line, line in targets.iterrows():
    user_id = line["UserId"]
    item_id = line["ItemId"]
    
    user_index = ratings_cornac_dataset.uid_map.get(user_id)
    item_index = ratings_cornac_dataset.iid_map.get(item_id)

    if (user_index is None) or (item_index is None):
        continue

    line_rating = matrix_factorization.score(user_idx=user_index, item_idx=item_index)
    target_prediction.loc[index_line, "Rating"] = line_rating


In [None]:
target_prediction.head()

In [None]:
target_prediction.Rating.nunique()

In [None]:
(target_prediction.Rating == -1).sum()

In [None]:
target_prediction.shape

In [None]:
target_prediction = target_prediction.groupby("UserId").sort_values("Rating", ascending=False)

In [15]:
target_prediction.to_csv("submissao_1_fatorizacao_de_matriz.csv", index=False)

In [19]:
target_prediction = target_prediction.drop(columns="Rating")

In [20]:
target_prediction.to_csv("submissao_1_fatorizacao_de_matriz_sem_rating.csv", index=False)

In [16]:
target_prediction = pd.read_csv("submissao_1_fatorizacao_de_matriz.csv")

In [18]:
target_prediction.head(200)

Unnamed: 0,UserId,ItemId,Rating
0,0006246bee,1e5bdbcb76,7.811275
1,0006246bee,ea200e9d98,7.76161
2,0006246bee,31c1c7d3bb,7.610354
3,0006246bee,f8ddef048f,7.510563
4,0006246bee,82ac587988,7.473237
5,0006246bee,3e354893f6,7.405563
6,0006246bee,16ec9e966c,7.354983
7,0006246bee,c1ee6829f5,7.193252
8,0006246bee,80d1dae630,7.191081
9,0006246bee,0a5d7dd6f6,7.096943


In [13]:
help(pd.DataFrame.to_csv)

Help on function to_csv in module pandas.core.generic:

to_csv(self, path_or_buf: 'FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None' = None, sep: 'str' = ',', na_rep: 'str' = '', float_format: 'str | Callable | None' = None, columns: 'Sequence[Hashable] | None' = None, header: 'bool_t | list[str]' = True, index: 'bool_t' = True, index_label: 'IndexLabel | None' = None, mode: 'str' = 'w', encoding: 'str | None' = None, compression: 'CompressionOptions' = 'infer', quoting: 'int | None' = None, quotechar: 'str' = '"', lineterminator: 'str | None' = None, chunksize: 'int | None' = None, date_format: 'str | None' = None, doublequote: 'bool_t' = True, escapechar: 'str | None' = None, decimal: 'str' = '.', errors: 'str' = 'strict', storage_options: 'StorageOptions' = None) -> 'str | None'
    Write object to a comma-separated values (csv) file.
    
    Parameters
    ----------
    path_or_buf : str, path object, file-like object, or None, default None
        String, path object (imp

In [None]:
# matrix_factorization.get_item_vectors()

In [None]:
# matrix_factorization.get_user_vectors().shape