In [1]:
import msePipeline as mp
import pandas as pd
import os

# Tells our class to not pull from AWS
DEPLOY = False

In [7]:
if DEPLOY:
    print('Establishing connection with RDS...')
else:
    print('Establishing connection with local postgres database...')
    
# Pull all the ratings from the database and perform some cleaning steps 
pipeline = mp.MSEPipeline(deploy=DEPLOY, ratingsThresh=4)
pipeline.preprocess()

print("Splitting up test, train, val...")
train, test, val = pipeline.split_test_train()
print(f"As a check, the largest user_id in the train set is {train.uid.max()},  and the length of the unique uids in the train set is {len(train.uid.unique())}.")

# train a model and then predict for the site users
print("Training a model...")
model = mp.MSErec(df = train)

# update the gradient based on new feature matrices
error = model.trainModel()
test_error = mp.meanSquareError(df = test, user_features=model.user_features, item_features=model.item_features)

if DEPLOY:
    print("Performing and commiting recommendations...")
    # BUG: right now this only works for deployed models
    pipeline.user_predictions = model.getPredictions(pipeline.user_predictions)

    # commit these recommendations to the RDS server
    pipeline.commit_recommendations()

else:
    recs = model.getPredictions(train)

Establishing connection with local postgres database...
Splitting up test, train, val...
The number of books in the train set: 10000, test set: 9993, val set: 9992. The number of users in the train set: 53406, test set: 26697, val set: 26684.
As a check, the largest user_id in the train set is 53405,  and the length of the unique uids in the train set is 53406.
Training a model...
standard: 

          iid    uid  rating  prediction
73648      35      0       4    4.330617
318770    232      0       4    4.180569
641148     80      0       5    4.162891
1961917   900      0       4    4.084551
73650      12      0       4    4.026497
...       ...    ...     ...         ...
2057085  3831  53405       4    4.459996
2057086  3873  53405       5    4.385815
2057069  3864  53405       5    4.354998
2057087  4655  53405       4    4.278124
2057084  3607  53405       4    4.195562

[533114 rows x 4 columns]

reverse: 

          iid    uid  rating  prediction
73646    1520      0       5    

In [8]:
# we're just going to pull the book metadata so we can see an example of some recommendations
from sqlalchemy import create_engine

DB_USER = os.environ.get("DB_USER") 
DB_PASS = os.environ.get("DB_PASS")
DB_NAME = os.environ.get("DB_NAME")
engine = create_engine(
    f"postgresql://{DB_USER}:{DB_PASS}@localhost/{DB_NAME}")
with engine.connect() as connection:
   books = pd.read_sql_table(
        'book', connection)

In [9]:
# let's get a sample set of ratings
user_id = 1
books = books.rename({'book_id':'iid'}, axis=1)
books.iid = books.iid-1

# the users favorite books
(pipeline.archived_ratings[pipeline.archived_ratings.uid==user_id].sort_values(by='rating', ascending = False)
.merge(books[['iid', 'authors', 'original_title']], on = 'iid')[:10])

Unnamed: 0,iid,uid,rating,authors,original_title
0,235,1,5,Jon Krakauer,Into Thin Air: A Personal Account of the Mt. E...
1,629,1,5,"Sheryl Sandberg, Nell Scovell","Lean In: Women, Work, and the Will to Lead"
2,259,1,5,Dale Carnegie,How to Win Friends and Influence People
3,8854,1,5,Richard Branson,"Screw It, Let's Do It"
4,1445,1,5,"Travis Bradberry, Jean Greaves, Patrick Lencioni",Emotional Intelligence 2.0
5,7401,1,5,Shawn Achor,The Happiness Advantage: The Seven Principles ...
6,6383,1,5,Jonathan Haidt,The Happiness Hypothesis: Finding Modern Truth...
7,8013,1,5,Maxwell Maltz,Psycho-Cybernetics. A New Way to Get More Livi...
8,81,1,5,Jon Krakauer,Into the Wild
9,7232,1,5,"Adele Faber, Elaine Mazlish, Kimberly Ann Coe",How to Talk So Teens Will Listen and Listen So...


In [10]:
# the users top 10 recommendations

(recs[recs.uid==user_id].sort_values(by='prediction', ascending = False)
.merge(books[['iid', 'authors', 'original_title']], on = 'iid')[:15])

Unnamed: 0,iid,uid,rating,prediction,authors,original_title
0,663,1,4,5.013175,Truman Capote,Breakfast at Tiffany's
1,4058,1,4,4.799262,Seth Godin,Purple Cow: Transform Your Business by Being R...
2,300,1,5,4.667403,Joseph Conrad,Heart of Darkness
3,22,1,5,4.604523,"J.K. Rowling, Mary GrandPré",Harry Potter and the Chamber of Secrets
4,81,1,5,4.464435,Jon Krakauer,Into the Wild
5,4,1,5,4.444457,F. Scott Fitzgerald,The Great Gatsby
6,283,1,5,4.430583,Jack Kerouac,On the Road
7,8518,1,5,4.39218,"Samuel Shem, John Updike",The House of God
8,17,1,5,4.268602,"J.K. Rowling, Mary GrandPré, Rufus Beck",Harry Potter and the Prisoner of Azkaban
9,6383,1,5,4.224743,Jonathan Haidt,The Happiness Hypothesis: Finding Modern Truth...
