# __Matrix factorization with Funk SVD method (netflix prize) - predictions for users__

##### Initial setup

In [None]:
%load_ext watermark
%load_ext autoreload

In [None]:
%autoreload 2
%watermark -v -n -m -p numpy,pandas,numba,funk_svd

In [None]:
import pandas as pd
import numpy as np
import sys
import os
import pickle
from pathlib import Path
from pandas import option_context
sys.path.append('../../../../')
from src.settings import DATA_DIR, RESULT_DIR, PROJECT_DIR

CURRENT_PATH = os.path.abspath(os.path.join(os.pardir))
print(CURRENT_PATH)
print(DATA_DIR)

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

In [None]:
from src.data_processing.process.ratings_df_utils import prepare_user_unrated_movies_table, prepare_user_predict_rated_movies

##### Load data

In [None]:
MERGED_DATA_PATH = os.path.join(DATA_DIR, 'processed', 'merged_ml25m_kaggle')
MERGED_DATA_PATH

In [None]:
movies = pd.read_csv(os.path.join(MERGED_DATA_PATH, 'movies_merged.csv'))
ratings = pd.read_csv(os.path.join(MERGED_DATA_PATH, 'ratings_merged.csv'), 
                      dtype={
                             'userId': np.int32,
                             'movieId': np.int32,
                             'rating': np.float32,
                             'timestamp': np.int32,
                         })

In [None]:
USERS_PREDICTION_DIR = os.path.join(DATA_DIR, 'predictions/CF')
USERS_PREDICTION_DIR

### __Prepare dataframes of unrated movies by chosen users__

In [None]:
user_ids = [162349, 72315, 542, 12949, 12002, 131800]

##### User 162349 - who rated familiar movies

In [None]:
user_162349_unrated_df = prepare_user_unrated_movies_table(ratings, 162349)

In [None]:
user_162349_unrated_df.head()

In [None]:
user_162349_unrated_df.info()

##### User 72315 - max rate amount

In [None]:
user_72315_unrated_df = prepare_user_unrated_movies_table(ratings, 72315)

In [None]:
user_72315_unrated_df.head()

In [None]:
user_72315_unrated_df.info()

##### User 542 - min rate amount

In [None]:
user_542_unrated_df = prepare_user_unrated_movies_table(ratings, 542)

In [None]:
user_542_unrated_df.head()

In [None]:
user_542_unrated_df.info()

##### User 12949 - mean rate amount, mean rate avg

In [None]:
user_12949_unrated_df = prepare_user_unrated_movies_table(ratings, 12949)

In [None]:
user_12949_unrated_df.head()

In [None]:
user_12949_unrated_df.info()

##### User 12002 - max rate avg

In [None]:
user_12002_unrated_df = prepare_user_unrated_movies_table(ratings, 12002)

In [None]:
user_12002_unrated_df.head()

In [None]:
user_12002_unrated_df.info()

##### User 131800 - min rate avg

In [None]:
user_131800_unrated_df = prepare_user_unrated_movies_table(ratings, 131800)

In [None]:
user_131800_unrated_df.head()

In [None]:
user_131800_unrated_df.info()

### __Predict selected users top recommended movies with trained model FunkSVD__

Read model trained on full data

In [None]:
FUNK_SVD_MODEL_PATH = os.path.join(RESULT_DIR, 'models', 'funk_svd', '30-04-2020 01:06:04_FunkSVD-15-factors-0.001-lr-0.005-reg-100-epochs-1-train_split.pkl')

In [None]:
with open(FUNK_SVD_MODEL_PATH, 'rb') as f:
    model_weights = pickle.load(f)

In [None]:
from funk_svd import SVD
model = SVD()

In [None]:
model.pu = model_weights['pu']
model.qi = model_weights['qi']
model.bu = model_weights['bu']
model.bi = model_weights['bi']
model.global_mean = model_weights['global_mean']
model.user_dict = model_weights['user_dict']
model.item_dict = model_weights['item_dict']

##### User 162349 - rated familiar movies

In [None]:
predict_rates = model.predict(user_162349_unrated_df)

In [None]:
user_162349_predictions = prepare_user_predict_rated_movies(user_162349_unrated_df, movies, predict_rates)

In [None]:
user_162349_predictions.head(10)

In [None]:
user_162349_predictions.info()

In [None]:
USER_162349_DIR = os.path.join(USERS_PREDICTION_DIR, 'user_162349')
Path(USER_162349_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
user_162349_predictions.to_csv(os.path.join(USER_162349_DIR, 'usr162349_predict_rate_movies.csv'), index=False)

##### User 72315 - max rate amount

In [None]:
predict_rates = model.predict(user_72315_unrated_df)

In [None]:
user_72315_predictions = prepare_user_predict_rated_movies(user_72315_unrated_df, movies, predict_rates)

In [None]:
user_72315_predictions.head(10)

In [None]:
user_72315_predictions.info()

In [None]:
USER_72315_DIR = os.path.join(USERS_PREDICTION_DIR, 'user_72315')
Path(USER_72315_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
user_72315_predictions.to_csv(os.path.join(USER_72315_DIR, 'usr72315_predict_rate_movies.csv'), index=False)

##### User 542 - min rate amont

In [None]:
predict_rates = model.predict(user_542_unrated_df)

In [None]:
user_542_predictions = prepare_user_predict_rated_movies(user_542_unrated_df, movies, predict_rates)

In [None]:
user_542_predictions.head(10)

In [None]:
user_542_predictions.info()

In [None]:
USER_542_DIR = os.path.join(USERS_PREDICTION_DIR, 'user_542')
Path(USER_542_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
user_542_predictions.to_csv(os.path.join(USER_542_DIR, 'usr542_predict_rate_movies.csv'), index=False)

##### User 12949 - mean rate amount, mean rate avg

In [None]:
predict_rates = model.predict(user_12949_unrated_df)

In [None]:
user_12949_predictions = prepare_user_predict_rated_movies(user_12949_unrated_df, movies, predict_rates)

In [None]:
user_12949_predictions.head(10)

In [None]:
user_12949_predictions.info()
user_12949_predictions.describe()

In [None]:
USER_12949_DIR = os.path.join(USERS_PREDICTION_DIR, 'user_12949')
Path(USER_12949_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
user_12949_predictions.to_csv(os.path.join(USER_12949_DIR, 'usr12949_predict_rate_movies.csv'), index=False)

##### User 12002 - max rate avg

In [None]:
predict_rates = model.predict(user_12002_unrated_df)

In [None]:
user_12002_predictions = prepare_user_predict_rated_movies(user_12002_unrated_df, movies, predict_rates)

In [None]:
user_12002_predictions.head(10)

In [None]:
user_12002_predictions.info()
user_12002_predictions.describe()

In [None]:
USER_12002_DIR = os.path.join(USERS_PREDICTION_DIR, 'user_12002')
Path(USER_12002_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
user_12002_predictions.to_csv(os.path.join(USER_12002_DIR, 'usr12002_predict_rate_movies.csv'), index=False)

##### User 131800 - min rate avg

In [None]:
predict_rates = model.predict(user_131800_unrated_df)

In [None]:
user_131800_predictions = prepare_user_predict_rated_movies(user_131800_unrated_df, movies, predict_rates)

In [None]:
user_131800_predictions.head(10)

In [None]:
user_131800_predictions.info()
user_131800_predictions.describe()

In [None]:
USER_131800_DIR = os.path.join(USERS_PREDICTION_DIR, 'user_131800')
Path(USER_131800_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
user_131800_predictions.to_csv(os.path.join(USER_131800_DIR, 'usr131800_predict_rate_movies.csv'), index=False)

### __Filmweb added user__

In [None]:
# USER = "arktos8"
# USER = "LorethRex"
USER = "Nikki97"

In [None]:
USER_DATA_PATH = os.path.join(DATA_DIR, 'processed', f'user_{USER}')

In [None]:
USERS_PRED_DIR = os.path.join(DATA_DIR, 'predictions/CF', f'user_{USER}')
Path(USERS_PRED_DIR).mkdir(parents=True, exist_ok=True)

In [None]:
concat_rates = pd.read_csv(os.path.join(USER_DATA_PATH, 'concat_ratings.csv'))

In [None]:
concat_rates.head()

In [None]:
concat_rates.info()

In [None]:
user_unrated_df = prepare_user_unrated_movies_table(concat_rates, 162542)

In [None]:
user_unrated_df.info()

In [None]:
USR_FUNK_SVD_MODEL_PATH = os.path.join(RESULT_DIR, 'models/funk_svd/filmweb_users', '05-05-2020 23:20:19_FunkSVD-45-factors-0.005-lr-0.02-reg-100-epochs-arktos8.pkl')

In [None]:
with open(USR_FUNK_SVD_MODEL_PATH, 'rb') as f:
    model_weights = pickle.load(f)

In [None]:
from funk_svd import SVD

model = SVD()

In [None]:
model.pu = model_weights['pu']
model.qi = model_weights['qi']
model.bu = model_weights['bu']
model.bi = model_weights['bi']
model.global_mean = model_weights['global_mean']
model.user_dict = model_weights['user_dict']
model.item_dict = model_weights['item_dict']

In [None]:
predict_rates = model.predict(user_unrated_df)

In [None]:
user_predictions = prepare_user_predict_rated_movies(user_unrated_df, movies, predict_rates)

In [None]:
user_predictions.head(30)

In [None]:
user_predictions.to_csv(os.path.join(USERS_PRED_DIR, f'usr{USER}_predict_rate_movies.csv'), index=False)