In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from scipy.sparse import coo_matrix as cm
import lightfm as lf
import numpy as np

In [29]:
db_un = os.environ.get("DB_USER")
db_pw = os.environ.get("DB_PASS")

dbString = 'postgresql://'+db_un+':'+db_pw+'@localhost/bookapp'
db = create_engine(dbString)

dbConnect = db.connect()

archived_ratings = pd.read_sql("select * from \"archive_rating\"", dbConnect)
user_ratings = pd.read_sql("select * from \"user_rating\"", dbConnect)

In [30]:
# I want to filter out user's with only a few ratings
grouped_users = user_ratings.groupby('site_id').count()

for idx, group in grouped_users.iterrows():
    if group.rating < 6:
        user_ratings = user_ratings[user_ratings['site_id'] != idx]
        
user_ratings

Unnamed: 0,book_id,site_id,rating
0,2,1,3
1,18,1,4
2,23,1,3
3,24,1,4
4,25,1,5
5,21,1,4
6,27,1,5
7,7,1,5
8,19,1,5
9,155,1,5


In [31]:
last_archived_user = archived_ratings.user_id.max()
ulist = user_ratings.site_id.unique().tolist()
user_archive_ids = {}
for i in range(len(ulist)):
    arcid = last_archived_user + i
    temp = {ulist[i]:arcid}
    user_archive_ids.update(temp)

In [32]:
user_archive_ids

{1: 53424, 3: 53425}

In [33]:
user_ratings["user_id"] = user_ratings.site_id.map(user_archive_ids)

In [34]:
combined = archived_ratings.append(user_ratings[['user_id', 'book_id', 'rating']])

In [35]:
combined.rename(columns={'user_id': 'uid', 'book_id': 'iid'}, inplace=True)

In [36]:
combined.uid = combined.uid - 1

In [38]:
combined.uid = combined.uid.astype(int)
combined.iid = combined.iid.astype(int)

In [39]:
numUsers = combined.uid.max() + 1
numBooks = combined.iid.max() + 1

ratSparse = cm((combined.rating, (combined.uid, combined.iid)),shape=(numUsers, numBooks))

In [40]:
model = lf.LightFM(loss='warp')
model.fit(ratSparse, epochs = 20)

<lightfm.lightfm.LightFM at 0x7fe1e55ff2e0>

In [41]:
userRecs = pd.DataFrame(columns = ['uid', 'iid', 'score'])
items = np.array(combined.iid.unique().tolist())
items.sort()
num_recs = 20
for site_uid, model_uid in user_archive_ids.items():
    previous_ratings = user_ratings[user_ratings['site_id']!=site_uid].book_id.tolist()
    previous_ratings_fixed = [x-1 for x in previous_ratings]
    user_items = [x for x in items if x not in previous_ratings_fixed]
    scores = model.predict(np.int(model_uid-1), user_items)
    ordered_scores = scores[np.argsort(-scores)]
    recIds = np.argsort(-scores)
    for i in range(num_recs):
        userRecs.loc[len(userRecs)] = [site_uid, recIds[i], ordered_scores[i]]

In [42]:
userRecs

Unnamed: 0,uid,iid,score
0,1.0,34.0,1.145921
1,1.0,1.0,0.704231
2,1.0,16.0,0.694065
3,1.0,49.0,0.637501
4,1.0,286.0,0.390235
5,1.0,111.0,0.366673
6,1.0,38.0,0.303757
7,1.0,55.0,0.229443
8,1.0,0.0,0.226713
9,1.0,3.0,0.180584


In [11]:
userRecs.iid = userRecs.iid  + 1

In [12]:
userRecs['site_id'] = userRecs.uid.map({val:key for key, val in user_archive_ids.items()})

In [13]:
userRecs.drop('uid',  axis = 1, inplace=True)

In [14]:
userRecs = userRecs[['iid', 'site_id', 'score']].rename(columns = {'iid':'book_id'})

In [60]:
dbConnect.execute('DELETE FROM user_recs')
dbConnect.execute('ALTER SEQUENCE user_recs_id_seq RESTART')
for rec in userRecs.iterrows():
    iid = int(rec[1].book_id)
    sid = rec[1].site_id
    score = rec[1].score
    # just delete all the recommendations since we are making fresh ones now
    dbConnect.execute(f'INSERT INTO user_recs(book_id, site_id, score) VALUES ({iid}, {sid}, {score})')
    print('a')


a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a


In [56]:
userRecs

Unnamed: 0,book_id,site_id,score
0,2.0,2,3.612852
1,1.0,2,3.409104
2,7.0,2,3.354856
3,21.0,2,3.270059
4,23.0,2,3.263534
5,18.0,2,3.25138
6,25.0,2,3.227434
7,24.0,2,3.21585
8,27.0,2,3.199617
9,19.0,2,3.185457


In [14]:
l1 = [1,2,3,4,5,6]
l1-1
    

TypeError: unsupported operand type(s) for -: 'list' and 'int'