# Import and class definitions

In [151]:
# standard
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# model imports
import lightfm as lf
from scipy.sparse import coo_matrix as cm
import random

# table formatting
from tabulate import tabulate
from IPython.display import display, HTML

class Pipeline():
    
    model
    
    def __init__(self):
        books = pd.DataFrame()
        ratings = pd.DataFrame()
        book_tags = pd.DataFrame()
        tags = pd.DataFrame()
        to_read = pd.DataFrame()
        
    def preprocess(self):
        self.read_data()
        self.fix_ids()
    
    def get_model(self, epochs = 20, num_threads = 2, loss = 'warp'):
        interactions = self.get_sparse()
        self.model = self.fit_model(interactions, epochs, num_threads, loss)

    
    def read_data(self):
        '''
        This function just reads in the goodbooks-10k data. Later should have the 
        functionality to read in scraped data.
        
        input: 
            self - just class object
        
        output: 
            null - no need to return anything
        '''
        self.books = pd.read_csv("goodbooks-10k/books.csv")
        self.ratings = pd.read_csv("goodbooks-10k/ratings.csv")


        # may not use these at first.
        self.book_tags = pd.read_csv("goodbooks-10k/book_tags.csv")
        self.tags = pd.read_csv("goodbooks-10k/tags.csv")
        self.to_read = pd.read_csv("goodbooks-10k/to_read.csv")
        
    def fix_ids(self):
        '''
        This function sets the bookand user id's to start at zero. It also changes the name of 
        headers from user_id and book_id to uid and iid.
        
        input: 
            self - just class object
        
        output: 
            null - no need to return anything
        '''
        # just changing to standard feature names
        self.ratings.rename(columns={'user_id': 'uid', 'book_id': 'iid'}, inplace=True)
        self.books.rename(columns={'book_id': 'iid'}, inplace=True)

        # starting user and book indices from 0
        self.ratings.uid = self.ratings.uid - 1
        self.ratings.iid = self.ratings.iid - 1
        self.books.iid = self.books.iid - 1
        self.to_read.book_id = self.to_read.book_id - 1
    
    def get_sparse(self):
        numUsers = self.ratings.uid.max()+1
        numBooks = self.ratings.iid.max()+1

        ratSparse = cm((self.ratings.rating, (self.ratings.uid, self.ratings.iid)),
                       shape = (numUsers, numBooks))
        return ratSparse
    
    def fit_model(self, ratSparse, epochs, num_threads, loss):
        model = lf.LightFM(loss = loss)
        model.fit(ratSparse, epochs = epochs, num_threads = num_threads)
        
        return model
        
    def recommend_random(self, seed):
        random.seed(seed)
        user = random.choices(self.ratings.uid.unique().tolist())[0]

        # now let's predict them on our trained model
        itemList = np.array(self.ratings.iid.unique().tolist())
        itemList.sort()

        
        knownRatings = pd.merge(self.ratings.query('uid == @user'),
                    self.books[['iid', 'title', 'authors']], on='iid', how='left')
    
        score = self.model.predict(user, itemList)
        suggested = self.books.loc[np.argsort(-score)][['title', 'authors']]
    
        print(color.BOLD + 'User {} known items: '.format(user) + color.END, end = '\n')
        display(HTML(knownRatings[['title', 'authors', 'rating']]
                .sort_values(by='rating', ascending=False).iloc[:10].to_html()))
        print(color.BOLD + 'Top 10 suggested items:' + color.END, end = '\n')
        display(HTML(suggested[:10].to_html()))

        
        
        
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'
    
label_kwargs = {'fontfamily': 'sans-serif',
                'fontsize': 15}

title_kwargs = {'fontfamily': 'sans-serif',
                'fontsize': 25,
                'fontweight': 'bold'}

tick_kwargs = {'rotation': 'vertical'}

# Testing ground

In [152]:
rec = Pipeline()
rec.preprocess()
rec.get_model(num_threads = 12)

In [153]:
rec.recommend_random(seed = 1001)

[1mUser 3898 known items: [0m


Unnamed: 0,title,authors,rating
44,The Green Mile,Stephen King,5
70,Room,Emma Donoghue,5
100,"Eligible: A Modern Retelling of Pride and Prejudice (The Austen Project, #4)",Curtis Sittenfeld,5
99,A Little Life,Hanya Yanagihara,5
91,Homegoing,Yaa Gyasi,5
50,All the Light We Cannot See,Anthony Doerr,5
35,Case Histories (Jackson Brodie #1),Kate Atkinson,5
64,Rebecca,"Daphne du Maurier, Sally Beauman",5
18,Beautiful Ruins,Jess Walter,5
86,"March: Book One (March, #1)","John Lewis, Andrew Aydin, Nate Powell",5


[1mTop 10 suggested items:[0m


Unnamed: 0,title,authors
60,The Girl on the Train,Paula Hawkins
142,All the Light We Cannot See,Anthony Doerr
145,The Goldfinch,Donna Tartt
29,Gone Girl,Gillian Flynn
111,"Me Before You (Me Before You, #1)",Jojo Moyes
457,Station Eleven,Emily St. John Mandel
667,Everything I Never Told You,Celeste Ng
5,The Fault in Our Stars,John Green
1197,A Little Life,Hanya Yanagihara
672,Americanah,Chimamanda Ngozi Adichie
