In [15]:
import msePipeline as mp
import pandas as pd
import os

DEPLOY = False

In [10]:
if DEPLOY:
    print('Establishing connection with RDS...')
else:
    print('Establishing connection with local postgres database...')
pipeline = mp.MSEPipeline(deploy=DEPLOY, ratingsThresh=4)
pipeline.preprocess()
print("Splitting up test, train, val...")
train, test, val = pipeline.split_test_train()
print(f"As a check, the largest user_id in the train set is {train.uid.max()},  and the length of the unique uids in the train set is {len(train.uid.unique())}.")
print("Training a model...")
# train a model and then predict for the site users
model = mp.MSErec(df = train)

# update the gradient based on new feature matrices
error = model.trainModel()
test_error = mp.meanSquareError(df = test, user_features=model.user_features, item_features=model.item_features)

if DEPLOY:
    print("Performing and commiting recommendations...")
    # BUG: right now this only works for deployed models
    pipeline.user_predictions = model.getPredictions(pipeline.user_predictions)

    # commit these recommendations to the RDS server
    pipeline.commit_recommendations()

else:
    new_df = model.getPredictions(train)

Establishing connection with local postgres database...
Splitting up test, train, val...
The number of books in the train set: 10000, test set: 9993, val set: 9992. The number of users in the train set: 53406, test set: 26697, val set: 26684.
As a check, the largest user_id in the train set is 53405,  and the length of the unique uids in the train set is 53406.
Training a model...
standard: 

          iid    uid  rating  prediction
1662228   436      0       4    4.109422
1709763   420      0       5    3.976743
59       1795      0       5    3.840770
73638      10      0       5    3.835940
318749    868      0       4    3.762738
...       ...    ...     ...         ...
2057083  3509  53405       4    4.465484
2056911  9746  53405       4    4.387425
2056913  9487  53405       4    4.382054
2057067  2158  53405       5    4.324840
2057084  3607  53405       4    4.193172

[533114 rows x 4 columns]

reverse: 

          iid    uid  rating  prediction
318751    413      0       4    

In [17]:
from sqlalchemy import create_engine

DB_USER = os.environ.get("DB_USER")
DB_PASS = os.environ.get("DB_PASS")
DB_NAME = os.environ.get("DB_NAME")
engine = create_engine(
    f"postgresql://{DB_USER}:{DB_PASS}@localhost/{DB_NAME}")
# I have plenty of RAM locally, no need for chunks.
with engine.connect() as connection:
   books = pd.read_sql_table(
        'book', connection)

In [18]:
books.head()

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [24]:
books = books.rename({'book_id':'iid'}, axis=1)
books.iid = books.iid-1

recs[recs.uid==1].sort_values(by='prediction', ascending = False).merge(books[['iid', 'authors', 'original_title']], on = 'iid')

Unnamed: 0,iid,uid,rating,prediction,authors,original_title
0,7,1,4,5.878945,J.D. Salinger,The Catcher in the Rye
1,629,1,5,5.845388,"Sheryl Sandberg, Nell Scovell","Lean In: Women, Work, and the Will to Lead"
2,9761,1,4,5.507955,"Robert T. Kiyosaki, Sharon L. Lechter",Rich Dad's Guide to Investing: What the Rich I...
3,192,1,5,5.387105,Malcolm Gladwell,Outliers: The Story of Success
4,25,1,4,5.289284,Dan Brown,The Da Vinci Code
5,14,1,5,5.215686,"Anne Frank, Eleanor Roosevelt, B.M. Mooyaart-D...",Het Achterhuis: Dagboekbrieven 14 juni 1942 - ...
6,7232,1,5,5.210978,"Adele Faber, Elaine Mazlish, Kimberly Ann Coe",How to Talk So Teens Will Listen and Listen So...
7,20,1,5,5.156292,"J.K. Rowling, Mary GrandPré",Harry Potter and the Order of the Phoenix
8,79,1,5,5.131312,"Antoine de Saint-Exupéry, Richard Howard, Dom ...",Le Petit Prince
9,8033,1,4,5.109622,Seth Godin,All Marketers Are Liars: The Power of Telling ...


In [22]:
books.rename({'book_id':'iid'}, axis=1)

Unnamed: 0,iid,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9.780439e+12,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9.780440e+12,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9.780316e+12,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9.780061e+12,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9.780743e+12,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,7130616,7130616,7392860,19,441019455,9.780441e+12,Ilona Andrews,2010.0,Bayou Moon,...,17204,18856,1180,105,575,3538,7860,6778,https://images.gr-assets.com/books/1307445460m...,https://images.gr-assets.com/books/1307445460s...
9996,9997,208324,208324,1084709,19,067973371X,9.780680e+12,Robert A. Caro,1990.0,Means of Ascent,...,12582,12952,395,303,551,1737,3389,6972,https://s.gr-assets.com/assets/nophoto/book/11...,https://s.gr-assets.com/assets/nophoto/book/50...
9997,9998,77431,77431,2393986,60,039330762X,9.780393e+12,Patrick O'Brian,1977.0,The Mauritius Command,...,9421,10733,374,11,111,1191,4240,5180,https://images.gr-assets.com/books/1455373531m...,https://images.gr-assets.com/books/1455373531s...
9998,9999,8565083,8565083,13433613,7,61711527,9.780062e+12,Peggy Orenstein,2011.0,Cinderella Ate My Daughter: Dispatches from th...,...,11279,11994,1988,275,1002,3765,4577,2375,https://images.gr-assets.com/books/1279214118m...,https://images.gr-assets.com/books/1279214118s...


In [23]:
recs.iid.min()

0