# Parameters

In [7]:
subset_name = 'CDs_and_Vinyl'
user_ids = ['A1IKOYZVFHO1XP', 'A2PV6GK1HV54Y9', 'A1HCCW38EQQBTY', 'A2C7BOQVFH1HLE', 'A3O8YT41TDXL0B', 'A26NLSTT75FMJM', 'AWPODHOB4GFWL']
min_amount_product_mentions = 150
min_amount_user_mentions = 100

# Helper Methods

In [8]:
import numpy as np
import os
import pandas as pd

from sklearn.metrics import mean_squared_error, mean_absolute_error

In [9]:
def print_score(y1, y2):
    print(f'RMSE: {np.sqrt(mean_squared_error(y1, y2))}')
    print(f'MAE: {mean_absolute_error(y1, y2)}')
    print('')

In [10]:
def load_train(subset_name):
    return pd.read_csv(os.path.join(f'X_train_{subset_name}.txt')), pd.read_csv(os.path.join(f'y_train_{subset_name}.txt'))['overall']

In [11]:
def load_train_sa(subset_name):
    return pd.read_csv(os.path.join(f'X_train_sa_{subset_name}.txt')), pd.read_csv(os.path.join(f'y_train_sa_{subset_name}.txt'))['overall']

In [12]:
def load_test(subset_name):
    return pd.read_csv(os.path.join(f'X_test_{subset_name}.txt')), pd.read_csv(os.path.join(f'y_test_{subset_name}.txt'))['overall']

In [13]:
def load_meta(subset_name):
        return pd.read_csv(os.path.join(f'meta_{subset_name}.txt'))

In [14]:
def save_preds(model, subset_name, pred, f='test'):
    os.listdir()
    pd.DataFrame(pred).to_csv(os.path.join('predictions', f'predictions_{f}-{model.__class__.__name__}-{subset_name}.txt'))

In [15]:
def get_user_pred_data(user_id, subset_name):
    X_train, y_train = load_train(subset_name)
    X_train_sa, y_train_sa = load_train_sa(subset_name)        
    X_test, y_test = load_test(subset_name)
    meta = load_meta(subset_name)
    
    product_ids = pd.concat([X_train['asin'], X_train_sa['asin'], X_test['asin']]).unique()
    not_rated_product_ids = [x for x in product_ids if x not in set(X_train[X_train['reviewerID'] == user_id]['asin'].values)]
        
    user_product_pairs = pd.DataFrame({
        'asin': not_rated_product_ids,
        'reviewerID': [user_id]*len(not_rated_product_ids)
    })
    return pd.merge(user_product_pairs, meta[['asin', 'metadata']], on='asin')

In [20]:
def show_recs(user_id, subset_name, predicted_ratings, k=5):
    recs = get_user_pred_data(user_id, subset_name)
    recs['overall'] = predicted_ratings
    recs = recs.sort_values(by='overall',ascending=False).head(k)
    
    meta = load_meta(subset_name)
    X_train, y_train = load_train(subset_name)
    X_train['overall'] = y_train
    
    print(f'User {user_id} has previously enjoyed:')
    previously_rated = X_train[X_train['reviewerID']==user_id].sort_values(by='overall',ascending=False).head(k)['asin'].values
    print_product_titles(meta, previously_rated)
    print('')
    print('We now recommend him:')
    recommendations =  recs['asin'].values
    print_product_titles(meta, recommendations)
    print('')
    
def print_product_titles(meta, ids):
    p = meta[meta['asin'].isin(ids)]
    for index, row in p.iterrows():
        print(f"{row['title']} by {row['brand']}")