# __Neural Matrix Factorization model results visualizations__

##### Initial setup

In [None]:
%load_ext watermark
%load_ext autoreload

In [None]:
%autoreload 2
%watermark -v -n -m -p numpy,pandas,tensorflow,seaborn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import recmetrics
import sys
import os
import pickle
from scipy.stats import spearmanr
from pathlib import Path
sys.path.append('../../../../')
from src.settings import DATA_DIR, PROJECT_DIR, RESULT_DIR

CURRENT_PATH = os.path.abspath(os.path.join(os.pardir))
print(CURRENT_PATH)
print(DATA_DIR)
print(RESULT_DIR)

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

In [None]:
from src.data_processing.visualization.plot_utils import plot_counts, plot_violin, plot_line, plot_distribution

from src.data_processing.dataframe_utils import (start_pipeline, drop_unnecessary_cols, expand_column, unpivot_dataframe,
                                                remove_nan, rename_cols, reset_index)

from src.data_processing.process.ratings_df_utils import (prepare_user_unrated_movies_table, prepare_user_predict_rated_movies,
                                                          prepare_user_rated_genres, merge_user_rating_with_movies, prepare_user_rated_movies_table)

from src.experiments.collaborative_filtering.neumf_params import get_params

from src.models.collaborative_filtering.neural_mf import F1Score, Precision, Recall, create_model

##### Load data 

In [None]:
MERGED_DATA_PATH = os.path.join(DATA_DIR, 'processed', 'merged_ml25m_kaggle')

In [None]:
ratings = pd.read_csv(os.path.join(MERGED_DATA_PATH, 'ratings_merged.csv'))
movies = pd.read_csv(os.path.join(MERGED_DATA_PATH, 'movies_merged.csv'))

In [None]:
num_movies = np.unique(ratings['movieId'].values).size
num_users = np.unique(ratings['userId'].values).size

In [None]:
REPORTS_CF_DIR = os.path.join(PROJECT_DIR, 'reports/figures/sec4_cf/neuMF')
Path(REPORTS_CF_DIR).mkdir(parents=True, exist_ok=True)
REPORTS_CF_DIR

In [None]:
MODEL_DIR = os.path.join(RESULT_DIR, 'checkpoints')
LOGS_DIR = os.path.join(RESULT_DIR, 'logs/neural_mf')

## __NeuFM model (301 epochs)__

In [None]:
MODEL_NAME = '2020-05-31_17-18_NeuFM'

In [None]:
MODEL_WEIGHTS_PATH = os.path.join(MODEL_DIR, MODEL_NAME, 'model_weights.ckpt')
LOGS_PATH = os.path.join(LOGS_DIR, MODEL_NAME)

In [None]:
with open(LOGS_PATH + '/spearman.pkl', 'rb') as f:
    results = pickle.load(f)

#### __Spearman and true/pred rates distribution comparision__

In [None]:
pred_rates = np.clip(results['pred_rates'].flatten(), 0.5, 5.0)
true_rates = results['true_rates']

In [None]:
len(pred_rates)

In [None]:
np.mean(pred_rates)

In [None]:
recmetrics.mse(true_rates, pred_rates)

In [None]:
recmetrics.rmse(true_rates, pred_rates)

In [None]:
spearmanr(true_rates, pred_rates)

In [None]:
ax = plot_violin(x=true_rates, y=pred_rates, title='True and predicted rates comparision on test data', data=None, palette='viridis')

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(REPORTS_CF_DIR, 'true_pred_test_compare.png'), bbox_inches = "tight")

#### __Example users recommendation__

In [None]:
params = get_params(num_users=num_users, num_items=num_movies)
model = create_model(params)

In [None]:
model.load_weights(MODEL_WEIGHTS_PATH)

##### User 847 with 2701 rate amount and rate avg =	3.458719

In [None]:
USER_ID = 847

In [None]:
user_unrated_df = prepare_user_unrated_movies_table(ratings, USER_ID)

In [None]:
user_unrated_df.head()

In [None]:
user_unrated_df.info()

In [None]:
predict_rates = model.predict([user_unrated_df['u_id'].values, user_unrated_df['i_id'].values])

In [None]:
predict_rates = np.clip(predict_rates, 0.5, 5.0)

In [None]:
np.max(predict_rates)

In [None]:
np.min(predict_rates)

In [None]:
user_pred_df = prepare_user_predict_rated_movies(user_unrated_df, movies, predict_rates)

In [None]:
user_pred_df.head()

In [None]:
TOP_VAL = 3.75

user_top_list = user_pred_df[user_pred_df['predict_rate'] >= TOP_VAL]

In [None]:
len(user_top_list)

In [None]:
user_pred_genres = (user_top_list
                      .pipe(start_pipeline)
                      .pipe(drop_unnecessary_cols, columns=['plot_keywords', 'predict_rate'])
                      .pipe(expand_column,
                            keep_cols=['userId', 'movieId', 'title', 'release_date'], expand_col='genres')
                      .pipe(unpivot_dataframe,
                            keep_cols=['userId', 'movieId', 'title', 'release_date'])
                      .pipe(drop_unnecessary_cols, columns=['variable'])
                      .pipe(remove_nan, columns=['value'])
                      .pipe(rename_cols, colmap_dict={'value': 'genre'})
                      .pipe(reset_index))

In [None]:
user_rated_movies = (ratings
                   .pipe(prepare_user_rated_movies_table, USER_ID)
                   .pipe(merge_user_rating_with_movies, movies))

user_rated_genres = (user_rated_movies
                           .pipe(prepare_user_rated_genres))

In [None]:
user_rated_genres = user_rated_genres[user_rated_genres['rating'] >= TOP_VAL]
user_rated_genres = user_rated_genres.drop(columns=['timestamp', 'rating'])
user_rated_genres['status'] = [f'rated above {TOP_VAL}'] * len(user_rated_genres)
user_pred_genres['status'] = [f'predicted rate above {TOP_VAL}'] * len(user_pred_genres)

In [None]:
len(user_rated_genres)

In [None]:
pred_rated_concat = pd.concat([user_rated_genres, user_pred_genres])

In [None]:
with sns.axes_style("darkgrid"):
    g = sns.FacetGrid(pred_rated_concat, row="status", height=4, aspect=2.5, sharey=False, sharex=True)
    g.map_dataframe(sns.countplot, x='genre', order=user_rated_genres['genre'].value_counts().index, palette='copper_r', saturation=0.8)
    
    g.fig.suptitle(f'Genre count between movies highly rated and recommended for user {USER_ID}', y=1.005, fontsize=14.0)
    
    for ax in g.axes.flat:
        ax.set_xticklabels(ax.get_xticklabels(),
                               rotation=45,
                               horizontalalignment='right')
        for p in ax.patches:
            ax.annotate('{:.0f}'.format(p.get_height()),
                        xy=(p.get_x() + p.get_width() / 2., p.get_height()),
                        ha='center', va='center', xytext=(0, 5), textcoords='offset points')



In [None]:
fig = g.fig
fig.savefig(os.path.join(REPORTS_CF_DIR, 'high_rate_recommend_compare.png'), bbox_inches = "tight")