In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder, RobustScaler, StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error 
import tensorflow.compat.v1 as tf
from lightfm.data import Dataset
from lightfm import LightFM
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k, reciprocal_rank
from lightfm import cross_validation
import csv



In [2]:
def get_data():
    return csv.DictReader(
            (x for x in open('Data/Cleaned_Data/book_user_explicit_rating_cleaned.csv','r'))
        )

In [60]:
dataset = Dataset()

In [61]:
dataset.fit(users=(x['User_ID'] for x in get_data()),
            items=(x['Unique_ISBN'] for x in get_data()),
            item_features=(x['Book_Author'] for x in get_data()),
            user_features=(x['Age_Range'] for x in get_data())
            )

In [62]:
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 17511, num_items 13607.


In [63]:
(interactions, weights) = dataset.build_interactions(((x['User_ID'], x['Unique_ISBN'])
                                                      for x in get_data()))

In [64]:
print(repr(interactions))

<17511x13607 sparse matrix of type '<class 'numpy.int32'>'
	with 151324 stored elements in COOrdinate format>


In [65]:
item_features = dataset.build_item_features(((x['Unique_ISBN'], [x['Book_Author']])
                                              for x in get_data()))
print(repr(item_features))

<13607x18702 sparse matrix of type '<class 'numpy.float32'>'
	with 27407 stored elements in Compressed Sparse Row format>


In [66]:
user_features = dataset.build_user_features(((x['User_ID'], [x['Age_Range']])
                                              for x in get_data()))

In [76]:
train_interactions, test_interactions = cross_validation.random_train_test_split(interactions, random_state=np.random.RandomState(seed=11232))

train_weights, test_weights = cross_validation.random_train_test_split(weights, random_state=np.random.RandomState(seed=11232))

In [92]:
model = LightFM(loss='warp',learning_rate=0.1, random_state=np.random.RandomState(seed=11232))

In [93]:
model.fit_partial(
    interactions=train_interactions,
    item_features=item_features,
    user_features=user_features, sample_weight=train_weights,
    epochs=50, verbose=True)

Epoch: 100%|██████████| 50/50 [00:08<00:00,  6.20it/s]


<lightfm.lightfm.LightFM at 0x7fd3659cb310>

In [94]:
train_precision = precision_at_k(model, train_interactions, k=10,item_features=item_features,user_features=user_features).mean()
test_precision = precision_at_k(model, test_interactions, k=10,item_features=item_features,user_features=user_features).mean()

In [95]:
print(train_precision)
print(test_precision)

0.018625397
0.0058852523


In [96]:
score = auc_score( 
        model, interactions, 
        item_features=item_features, 
        user_features=user_features).mean()

In [97]:
print(score)

0.8536347
