In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np

# Read Data and Data Preprossicing



In [2]:
books = pd.read_csv('Books.csv', header=0)
#drop image columns
books = books.drop(books.columns[[-1, -2, -3]], axis=1)

#drop rows that contain null value
books.dropna(inplace=True)

#drop rows with same book title 
dup=books[books.duplicated(['Book-Title'], keep=False)].sort_values(by=['Book-Title'])
books=books[~books['Book-Title'].isin(dup['Book-Title'])]

#convert Year-Of-Publication from string to integer 
books['Year-Of-Publication']=pd.to_numeric(books['Year-Of-Publication'],downcast ='signed',errors='coerce')
books.dropna(inplace=True)
books.isnull().sum()
books['Year-Of-Publication'] = books['Year-Of-Publication'].apply(int)
books['Year-Of-Publication']=pd.to_numeric(books['Year-Of-Publication'],downcast ='integer',errors='coerce')

#choose 5000 sample randomly 
books=books.sample(n = 5000, random_state = 456).reset_index()
books

Unnamed: 0,index,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,130873,0030181062,"Fly, You Stupid Kite, Fly",Charles M. Schulz,1990,"Henry Holt &amp; Company, Inc."
1,25843,038533558X,Gone For Good,HARLAN COBEN,2002,Delacorte Press
2,62698,0449243214,Dread Companion,Adnre Norton,1984,Ballantine Books (Mm)
3,220280,0395735319,Fruitful: A Real Mother in the Modern World,Anne Richardson Roiphe,1996,Houghton Mifflin Company
4,190420,0727855956,Shadow Watch (Tom Clancy's Power Plays),Tom Clancy,2000,Severn House Publishers
...,...,...,...,...,...,...
4995,64762,037302827X,"Time To Trust (Harlequin Romance, No. 2827)",Rosemary Badger,1987,Harlequin
4996,256173,3442141516,Licht- Arbeit. Das Standardwerk der Heilung mi...,Barbara Ann Brennan,1998,Goldmann
4997,211320,3518384759,"Hermann Hesse Lesebuch. ErzÃ?Â¤hlungen, Betrac...",Hermann Hesse,1992,Suhrkamp
4998,120143,0099094800,The lair of the white worm,Bram Stoker,1974,Arrow Books


In [3]:
ratings = pd.read_csv('Ratings.csv', header=0)

#choose only the ratings of the choosen books
ratings=ratings[ratings['ISBN'].isin(books['ISBN'])]

ratings

Unnamed: 0,User-ID,ISBN,Book-Rating
11,276746,0449006522,0
168,276837,0380005239,5
201,276847,349912176X,0
242,276853,8448034023,7
251,276856,207033015X,0
...,...,...,...
1149248,276641,0375414290,8
1149249,276641,0375507493,6
1149348,276664,0747205051,7
1149370,276670,0316848670,9


In [4]:
users = pd.read_csv('Users.csv', header=0)
users

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",
...,...,...,...
278853,278854,"portland, oregon, usa",
278854,278855,"tacoma, washington, united kingdom",50.0
278855,278856,"brampton, ontario, canada",
278856,278857,"knoxville, tennessee, usa",


In [5]:
from sklearn.model_selection import train_test_split

train_rating, test_rating = train_test_split(ratings, test_size=0.2,random_state=0)

In [6]:
train_rating

Unnamed: 0,User-ID,ISBN,Book-Rating
798525,193560,1857230302,9
805722,196018,0671894390,0
721792,174596,0440414806,0
1087710,260699,0684196271,4
247688,56526,0553282476,9
...,...,...,...
1058044,252695,0446405329,0
270469,62891,0679723161,10
794638,192166,0373257058,0
875484,211971,3257224176,0


# Model Evaluator 

In [7]:
def get_favorite_movies(user_id, ratings_df):
    favorites = ratings_df[(ratings_df['User-ID'] == user_id) & (ratings_df['Book-Rating'] >= 7)].sort_values(by='Book-Rating', ascending=False)['ISBN']
    return set(favorites if type(favorites) == pd.Series else [favorites])

In [8]:
class ModelEvaluator:
    def __init__(self, training_data, testing_data, threshold=5):
        self.training_data = training_data
        self.testing_data = testing_data
        self.threshold = threshold

    def evaluate_model_for_user(self, model, user_id):
        #Getting the items in test set
        favorites_in_test = get_favorite_movies(user_id, self.testing_data)

        #Getting a ranked recommendation list from a model for a given user 
        person_recs_df = model.recommend_items(user_id, 
                                                  items_to_ignore=get_favorite_movies(user_id, self.training_data))

        # get only movies with predicated rating >= 5
        person_recs_df = person_recs_df[person_recs_df['predicted_rating'] >= self.threshold].sort_values(by='predicted_rating', ascending=False)
        true_relevent = person_recs_df[person_recs_df['ISBN'].isin(favorites_in_test)].shape[0]

        top_5_recommended = person_recs_df.head(5)
        top_10_recommended = person_recs_df.head(10)

        # Number of relevant and recommended items in top k
        hits_at_5_count = top_5_recommended[top_5_recommended['ISBN'].isin(favorites_in_test)].shape[0]
        hits_at_10_count = top_10_recommended[top_10_recommended['ISBN'].isin(favorites_in_test)].shape[0]

        precision_at_5 = hits_at_5_count / top_5_recommended.shape[0] if top_5_recommended.shape[0] != 0 else 0
        recall_at_5 = hits_at_5_count / true_relevent if true_relevent != 0 else 0

        precision_at_10 = hits_at_10_count / top_10_recommended.shape[0] if top_10_recommended.shape[0] != 0 else 0
        recall_at_10 = hits_at_10_count / true_relevent if true_relevent != 0 else 0

        person_metrics = {'hits@5_count':hits_at_5_count, 
                        'hits@10_count':hits_at_10_count,
                        'recommended@5_count':top_5_recommended.shape[0], 
                        'recommended@10_count':top_10_recommended.shape[0],  
                        'relevents': true_relevent,
                        'recall@5': recall_at_5,
                        'recall@10': recall_at_10,
                        'precision@5': precision_at_5,
                        'precision@10': precision_at_10}
        return person_metrics


    def evaluate_model(self, model):
        #print('Running evaluation for users')
        users_metrics = []
        users_ids = list(set(self.testing_data['User-ID'].values))
#         print(len(users_ids))
        for idx, user_id in enumerate(users_ids):
#             print('%d users processed' % idx ,' id=',user_id)
            metrics = self.evaluate_model_for_user(model, user_id)  
            metrics['User-ID'] = user_id
            users_metrics.append(metrics)

        detailed_results_df = pd.DataFrame(users_metrics) \
                          .sort_values('hits@5_count', ascending=False)

        global_recall_at_5 = detailed_results_df['hits@5_count'].sum() / float(detailed_results_df['relevents'].sum())
        global_recall_at_10 = detailed_results_df['hits@10_count'].sum() / float(detailed_results_df['relevents'].sum())

        global_precision_at_5 = detailed_results_df['hits@5_count'].sum() / float(detailed_results_df['recommended@5_count'].sum())
        global_precision_at_10 = detailed_results_df['hits@10_count'].sum() / float(detailed_results_df['recommended@10_count'].sum())

        global_metrics = {'modelName': model.get_model_name(),
                        'recall@5': global_recall_at_5,
                        'recall@10': global_recall_at_10,
                        'precision@5': global_precision_at_5,
                        'precision@10': global_precision_at_10}    
        return global_metrics, detailed_results_df
    
model_evaluator = ModelEvaluator(train_rating, test_rating)

# Populartiy Recommender Model

### recommend the most Popular and newest books 

In [9]:
# get the populartiy model predicted_rating of a Book populartiy model
def weighted_rating( x, C):
        v = x['ratings_count']
        R = x['ratings_mean']
        m = 5
        # Calculation based on the IMDB formula
        return  (v/(v+m) * R) + (m/(m+v) * C)

In [10]:
# intialize populartiy data frame
populartiy = ratings.groupby('ISBN').agg({'Book-Rating': ['mean', 'count']}).reset_index()
populartiy.columns = ['ISBN', 'ratings_mean', 'ratings_count']

# add Year-Of-Publication column to populartiy data frame
populartiy = pd.merge(populartiy, books, on="ISBN")  
populartiy = populartiy.drop(populartiy.columns[[-1, -3, -4]], axis=1)

# calculate predicted_rating for each book
C = populartiy['ratings_mean'].mean()
populartiy['predicted_rating'] = populartiy.apply(lambda x: weighted_rating(x, C), axis=1)

populartiy.sort_values(by='ratings_mean', ascending=False)

Unnamed: 0,ISBN,ratings_mean,ratings_count,index,Year-Of-Publication,predicted_rating
2317,0670838985,10.0,1,98902,1992,4.045937
439,0192821091,10.0,2,130402,1988,4.896517
3355,0831732288,10.0,1,218729,1991,4.045937
462,0195413814,10.0,1,62275,1999,4.045937
3348,0826457673,10.0,1,230819,2002,4.045937
...,...,...,...,...,...,...
3279,0819873942,0.0,1,74343,1997,2.379270
1552,0439538483,0.0,1,197722,2004,2.379270
1551,0439521386,0.0,1,198075,2003,2.379270
3283,0821220012,0.0,1,262052,1994,2.379270


In [11]:
class PopularityRecommender:
  
    MODEL_NAME = 'Popularity'

    def __init__(self, popularities_df,threshold=5):
        self.popularities_df = popularities_df
        self.threshold = threshold

    def get_model_name(self):
        return self.MODEL_NAME



    def recommend_items(self, user_id, items_to_ignore=[], topn=10, verbose=False):
        C = self.popularities_df['ratings_mean'].mean()
        
        ### sort by the predicted_rating and Year-Of-Publication
        recommendations_df = self.popularities_df[~self.popularities_df['ISBN'].isin(items_to_ignore)] \
                              .sort_values(by = ['predicted_rating','Year-Of-Publication',], ascending = [False,False]) \
                              .head(topn)

        return recommendations_df
    
popularity_model = PopularityRecommender(populartiy)

In [12]:
print('Evaluating Popularity recommendation model...')
pop_global_metrics, pop_detailed_results_df = model_evaluator.evaluate_model(popularity_model)
print('\nGlobal metrics:\n%s' % pop_global_metrics)
pop_detailed_results_df.head(10)

Evaluating Popularity recommendation model...

Global metrics:
{'modelName': 'Popularity', 'recall@5': 0.5714285714285714, 'recall@10': 1.0, 'precision@5': 0.0004197271773347324, 'precision@10': 0.00036726128016789086}


Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID
1040,1,1,5,10,1,1.0,1.0,0.2,0.1,192895
1498,1,1,5,10,1,1.0,1.0,0.2,0.1,227705
1055,1,1,5,10,1,1.0,1.0,0.2,0.1,61901
1888,1,1,5,10,1,1.0,1.0,0.2,0.1,188369
0,0,0,5,10,0,0.0,0.0,0.0,0.0,237568
1268,0,0,5,10,0,0.0,0.0,0.0,0.0,169310
1278,0,0,5,10,0,0.0,0.0,0.0,0.0,5543
1277,0,0,5,10,0,0.0,0.0,0.0,0.0,112026
1276,0,0,5,10,0,0.0,0.0,0.0,0.0,136584
1275,0,0,5,10,0,0.0,0.0,0.0,0.0,243077


In [13]:
pop_detailed_results_df[pop_detailed_results_df['User-ID']==0]

Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID


In [14]:
popularity_model.recommend_items(user_id=10,topn=10)

Unnamed: 0,ISBN,ratings_mean,ratings_count,index,Year-Of-Publication,predicted_rating
112,60609176,9.6,5,117095,1995,6.227562
5,3300277,8.6,5,141667,1985,5.727562
3785,931432820,6.909091,11,10686,1991,5.642226
3699,895941414,9.0,4,193938,1984,5.58618
4836,8420636940,9.0,4,75412,1971,5.58618
3679,894808559,10.0,3,117057,1990,5.534452
4568,2277330140,7.666667,6,45261,0,5.479602
4240,1582614687,8.0,5,105005,2001,5.427562
553,307987655,8.0,5,35718,1996,5.427562
1323,385495315,6.875,8,35030,1999,5.328894


In [15]:
popularity_model.recommend_items(10)

Unnamed: 0,ISBN,ratings_mean,ratings_count,index,Year-Of-Publication,predicted_rating
112,60609176,9.6,5,117095,1995,6.227562
5,3300277,8.6,5,141667,1985,5.727562
3785,931432820,6.909091,11,10686,1991,5.642226
3699,895941414,9.0,4,193938,1984,5.58618
4836,8420636940,9.0,4,75412,1971,5.58618
3679,894808559,10.0,3,117057,1990,5.534452
4568,2277330140,7.666667,6,45261,0,5.479602
4240,1582614687,8.0,5,105005,2001,5.427562
553,307987655,8.0,5,35718,1996,5.427562
1323,385495315,6.875,8,35030,1999,5.328894


In [16]:
ids = popularity_model.recommend_items(10)['ISBN'].values
books[books['ISBN'].isin(ids)]['Book-Title'].values

array(["America's Heroes: Inspiring Stories of Courage, Sacrifice and Patriotism",
       'El extranjero',
       'Sister Outsider: Essays and Speeches (Crossing Press Feminist Series)',
       'Cascades - \\Fahrenheit 451\\" (Collins Cascades)"',
       'The Complete Book of Essential Oils and Aromatherapy',
       "Eeyore, You're the Best (Disney's Pooh)",
       'Forget-Me-Nots: A Victorian Book of Love',
       'Meeting Jesus Again for the First Time : The Historical Jesus and the Heart of Contemporary Faith',
       'The Code Book : The Evolution Of Secrecy From Mary, To Queen Of Scots To Quantum Crytography',
       'Rencontre Du 3e Age'], dtype=object)

# Content Base Models
 I'm working with 5000 books and there is a 1750 uniqe Publishers and 4450 Authors and 5000 Titles.
 
 the Publishers and Authors are less than the books which mean there are common values between the samples and
 we will get a GOOD result if we apply Content Base on these two.
 
 but the Titles are equal to the books which mean that no common values between the samples and
 we will get a BAD result if we apply Content Base on Titles.
 
### yet, I think that the Publishers and Authors are not the perfect data to use it for content base model
### if there was a column contain summary or a column contain geners for each book , it will do better.
 
## I made 3 Models




In [17]:
publishers = list(set([x for titles in books['Publisher'].values for x in titles.split('|')]))
len(publishers)

1750

In [18]:
authors = list(set([x for titles in books['Book-Author'].values for x in titles.split('|')]))
len(authors)

4450

In [19]:
titles = list(set([x for titles in books['Book-Title'].values for x in titles.split('|')]))
len(titles)

5000

## 1- Content Base Model by Publishers values

In [20]:
expanded_books_df = books.copy()
for g in publishers:
    expanded_books_df[g] = [0 if not g in publishers.split('|') else 1 for publishers in books['Publisher'].values]

expanded_books_df

Unnamed: 0,index,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Associated Pub Group,Autumn Press,Grupo Oceano,Edicions de l'Eixample,...,Signet Book,Weiser Books,Pan Books Ltd,Double Dragon Publishing,Seven Stories Press,Natl Womens Christian,Transworld Publishers Limited,Rigby,Fischer Taschenbuch Verlag,World Publishing
0,130873,0030181062,"Fly, You Stupid Kite, Fly",Charles M. Schulz,1990,"Henry Holt &amp; Company, Inc.",0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,25843,038533558X,Gone For Good,HARLAN COBEN,2002,Delacorte Press,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,62698,0449243214,Dread Companion,Adnre Norton,1984,Ballantine Books (Mm),0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,220280,0395735319,Fruitful: A Real Mother in the Modern World,Anne Richardson Roiphe,1996,Houghton Mifflin Company,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,190420,0727855956,Shadow Watch (Tom Clancy's Power Plays),Tom Clancy,2000,Severn House Publishers,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,64762,037302827X,"Time To Trust (Harlequin Romance, No. 2827)",Rosemary Badger,1987,Harlequin,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4996,256173,3442141516,Licht- Arbeit. Das Standardwerk der Heilung mi...,Barbara Ann Brennan,1998,Goldmann,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4997,211320,3518384759,"Hermann Hesse Lesebuch. ErzÃ?Â¤hlungen, Betrac...",Hermann Hesse,1992,Suhrkamp,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4998,120143,0099094800,The lair of the white worm,Bram Stoker,1974,Arrow Books,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def normalize(value, old_max, old_min, new_max=10.0, new_min=0.0):
    old_range = (old_max - old_min)  
    new_range = (new_max - new_min)  
    return (((value - old_min) * new_range) / old_range) + new_min

In [22]:
class ContentBasedRecommender:
    
    MODEL_NAME = 'Content-Based'
    
    def __init__(self, items_df, training_data, testing_data):
        self.items_df = items_df
        self.features_names = items_df.columns[6:]
        self.training_data = training_data
        self.testing_data = testing_data
        
    def get_model_name(self):
        return self.MODEL_NAME

    def get_item_profile(self, item_id):
        item_profile = self.items_df[self.items_df['ISBN'] == item_id][self.features_names]
        return item_profile

    def get_items_profiles(self, ids):
        item_profiles = self.items_df[self.items_df['ISBN'].isin(ids)][self.features_names].values
        return item_profiles

    def build_users_profile(self, user_id):
        user_df = self.training_data[self.training_data['User-ID'] == user_id]
        user_items_profiles = self.get_items_profiles(user_df['ISBN'].values)
        
        user_items_ratings = np.array(user_df['Book-Rating'].values).reshape(-1,1)
        ####### if there is no ratings for this user
        if np.sum(user_items_ratings) != 0:
            user_profile = np.sum(np.multiply(user_items_profiles, user_items_ratings), axis=0) / np.sum(user_items_ratings)
        else:
            user_profile = np.sum(np.multiply(user_items_profiles, user_items_ratings), axis=0) / 0.000001
        return user_profile

    def get_similar_items_to_user_profile(self, user_id, topn=1000):
        user_profile = self.build_users_profile(user_id).reshape(1, -1)
        #Computes the cosine similarity between the user profile and all item profiles
        cosine_similarities = cosine_similarity(user_profile, self.items_df[self.features_names].values)
        #Gets the top similar items
        similar_indices = cosine_similarities.argsort().flatten()[-topn:]
        #Sort the similar items by similarity
        similar_items = sorted([(self.items_df.loc[i, 'ISBN'], cosine_similarities[0,i]) for i in similar_indices], key=lambda x: -x[1])
        return similar_items
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=10):
        similar_items = self.get_similar_items_to_user_profile(user_id)
        similar_items_filtered = list(filter(lambda x: x[0] not in items_to_ignore, similar_items))
        
        recommendations_df = pd.DataFrame(similar_items_filtered, columns=['ISBN', 'predicted_rating']) \
                                    .head(topn)

        recommendations_df['predicted_rating'] = recommendations_df['predicted_rating'].apply(lambda x: normalize(x, 1.0, 0.0))
        return recommendations_df
    


In [23]:
content_based_recommender_model1 = ContentBasedRecommender(expanded_books_df, train_rating, test_rating)
print('Evaluating Content-Based Filtering model...')
cb_global_metrics, cb_detailed_results_df = model_evaluator.evaluate_model(content_based_recommender_model1)
print('\nGlobal metrics:\n%s' % cb_global_metrics)
cb_detailed_results_df.head(10)

Evaluating Content-Based Filtering model...

Global metrics:
{'modelName': 'Content-Based', 'recall@5': 0.7142857142857143, 'recall@10': 1.0, 'precision@5': 0.0020475020475020475, 'precision@10': 0.0016181229773462784}


Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID
1366,1,1,5,10,1,1.0,1.0,0.2,0.1,194358
21,1,2,5,10,2,0.5,1.0,0.2,0.2,98391
1780,1,1,1,1,1,1.0,1.0,1.0,1.0,220688
1357,1,1,5,10,1,1.0,1.0,0.2,0.1,120588
491,1,1,1,1,1,1.0,1.0,1.0,1.0,10354
0,0,0,0,0,0,0.0,0.0,0.0,0.0,237568
1271,0,0,5,10,0,0.0,0.0,0.0,0.0,46443
1268,0,0,0,0,0,0.0,0.0,0.0,0.0,169310
1269,0,0,0,0,0,0.0,0.0,0.0,0.0,185694
1270,0,0,0,0,0,0.0,0.0,0.0,0.0,193898


## 2- Content Base Model by Authors values

In [24]:
expanded_books_df = books.copy()
for g in authors:
    expanded_books_df[g] = [0 if not g in authors.split('|') else 1 for authors in books['Book-Author'].values]

expanded_books_df.head()

Unnamed: 0,index,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Niamh O'Connor,Thomas Cahill,Dorothy L. Sayers,LINDA HOWARD,...,William Shatner,Jean Marzollo,Jack D. Schwager,Sue Harrison,Thomas Zigal,Odette Pollar,Victor Dove,Natasha Roe,William T. Craig,Jane Berentson
0,130873,0030181062,"Fly, You Stupid Kite, Fly",Charles M. Schulz,1990,"Henry Holt &amp; Company, Inc.",0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,25843,038533558X,Gone For Good,HARLAN COBEN,2002,Delacorte Press,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,62698,0449243214,Dread Companion,Adnre Norton,1984,Ballantine Books (Mm),0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,220280,0395735319,Fruitful: A Real Mother in the Modern World,Anne Richardson Roiphe,1996,Houghton Mifflin Company,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,190420,0727855956,Shadow Watch (Tom Clancy's Power Plays),Tom Clancy,2000,Severn House Publishers,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
content_based_recommender_model2 = ContentBasedRecommender(expanded_books_df, train_rating, test_rating)
print('Evaluating Content-Based Filtering model...')
cb_global_metrics, cb_detailed_results_df = model_evaluator.evaluate_model(content_based_recommender_model2)
print('\nGlobal metrics:\n%s' % cb_global_metrics)
cb_detailed_results_df.head(10)

Evaluating Content-Based Filtering model...

Global metrics:
{'modelName': 'Content-Based', 'recall@5': 1.0, 'recall@10': 1.0, 'precision@5': 0.0037546933667083854, 'precision@10': 0.0034482758620689655}


Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID
623,1,1,2,2,1,1.0,1.0,0.5,0.5,223953
1780,1,1,5,6,1,1.0,1.0,0.2,0.166667,220688
1357,1,1,3,3,1,1.0,1.0,0.333333,0.333333,120588
0,0,0,0,0,0,0.0,0.0,0.0,0.0,237568
1267,0,0,0,0,0,0.0,0.0,0.0,0.0,120151
1278,0,0,0,0,0,0.0,0.0,0.0,0.0,5543
1277,0,0,0,0,0,0.0,0.0,0.0,0.0,112026
1276,0,0,0,0,0,0.0,0.0,0.0,0.0,136584
1275,0,0,1,1,0,0.0,0.0,0.0,0.0,243077
1274,0,0,1,1,0,0.0,0.0,0.0,0.0,38273


## 3- Content Base Model using Count Vectorizer by Titles , Authors and Publishers values

here we use the Titles values because the count vetorizer devided the sentence into words
and this create a common values between the samples.

for example :
(Harry Potter and the Champer of Secrets)
            is not the same of      (Harry Potter and the Goblet of Fire)
but (Harry) and (Potter) are common words between them.



In [26]:
expended_books_df = books.copy()

books_ids = expended_books_df['ISBN'].values.tolist()
expended_books_df['soup'] = [" ".join((books[books['ISBN'] == id]['Book-Title']
                                      +" "+books[books['ISBN'] == id]['Book-Author']
                                      +" "+books[books['ISBN'] == id]['Publisher']).values.tolist(),
                                     ) for id in books_ids]
expended_books_df

Unnamed: 0,index,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,soup
0,130873,0030181062,"Fly, You Stupid Kite, Fly",Charles M. Schulz,1990,"Henry Holt &amp; Company, Inc.","Fly, You Stupid Kite, Fly Charles M. Schulz He..."
1,25843,038533558X,Gone For Good,HARLAN COBEN,2002,Delacorte Press,Gone For Good HARLAN COBEN Delacorte Press
2,62698,0449243214,Dread Companion,Adnre Norton,1984,Ballantine Books (Mm),Dread Companion Adnre Norton Ballantine Books ...
3,220280,0395735319,Fruitful: A Real Mother in the Modern World,Anne Richardson Roiphe,1996,Houghton Mifflin Company,Fruitful: A Real Mother in the Modern World An...
4,190420,0727855956,Shadow Watch (Tom Clancy's Power Plays),Tom Clancy,2000,Severn House Publishers,Shadow Watch (Tom Clancy's Power Plays) Tom Cl...
...,...,...,...,...,...,...,...
4995,64762,037302827X,"Time To Trust (Harlequin Romance, No. 2827)",Rosemary Badger,1987,Harlequin,"Time To Trust (Harlequin Romance, No. 2827) Ro..."
4996,256173,3442141516,Licht- Arbeit. Das Standardwerk der Heilung mi...,Barbara Ann Brennan,1998,Goldmann,Licht- Arbeit. Das Standardwerk der Heilung mi...
4997,211320,3518384759,"Hermann Hesse Lesebuch. ErzÃ?Â¤hlungen, Betrac...",Hermann Hesse,1992,Suhrkamp,"Hermann Hesse Lesebuch. ErzÃ?Â¤hlungen, Betrac..."
4998,120143,0099094800,The lair of the white worm,Bram Stoker,1974,Arrow Books,The lair of the white worm Bram Stoker Arrow B...


In [27]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(expended_books_df['soup'])
count_matrix


<5000x13614 sparse matrix of type '<class 'numpy.int64'>'
	with 43097 stored elements in Compressed Sparse Row format>

In [28]:
class ContentBasedRecommender:
    
    MODEL_NAME = 'Content-Based'
    
    def __init__(self, items_ids, items_matrix, training_data, testing_data):
        self.items_ids = items_ids
        self.items_matrix = items_matrix
        self.training_data = training_data
        self.testing_data = testing_data
        
    def get_model_name(self):
        return self.MODEL_NAME

    def get_item_profile(self, item_id):
        idx = self.items_ids.index(item_id)
        return self.items_matrix[idx].toarray().reshape(-1)

    def get_items_profiles(self, ids):
        items_profiles = np.array([self.get_item_profile(x) for x in ids])
        return items_profiles

    def build_users_profile(self, user_id):
        
        user_df = self.training_data[self.training_data['User-ID'] == user_id]
        user_items_profiles = self.get_items_profiles(user_df['ISBN'].values)
        
        
        user_items_ratings = np.array(user_df['Book-Rating'].values).reshape(-1,1)
        user_profile = np.sum(np.multiply(user_items_profiles, user_items_ratings), axis=0) / np.sum(user_items_ratings)
        return user_profile

    def get_similar_items_to_user_profile(self, user_id, topn=10):
        user_profile = self.build_users_profile(user_id).reshape(1, -1)
        # اذا كان اليوزر مالو تقييمات بالدتا 
        if np.isnan(user_profile).all() :
            user_profile =np.zeros((1,self.items_matrix.shape[1])) 
#         print(np.isnan(user_profile).all())
        #Computes the cosine similarity between the user profile and all item profiles
        
        cosine_similarities = cosine_similarity(user_profile, self.items_matrix.toarray())
        
        #Gets the top similar items
        
        similar_indices = cosine_similarities.argsort().flatten()[-topn:]
        #Sort the similar items by similarity
        similar_items = sorted([(self.items_ids[i], cosine_similarities[0,i]) for i in similar_indices], key=lambda x: -x[1])
        return similar_items
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=1000):
        similar_items = self.get_similar_items_to_user_profile(user_id)
        similar_items_filtered = list(filter(lambda x: x[0] not in items_to_ignore, similar_items))
        
        recommendations_df = pd.DataFrame(similar_items_filtered, columns=['ISBN', 'predicted_rating']) \
                                    .head(topn)

        recommendations_df['predicted_rating'] = recommendations_df['predicted_rating'].apply(lambda x: normalize(x, 1.0, 0.0))
        
        return recommendations_df
    
content_based_recommender_model = ContentBasedRecommender(expended_books_df['ISBN'].values.tolist(), count_matrix, train_rating, test_rating)

In [29]:
content_based_recommender_model.recommend_items(2891)

Unnamed: 0,ISBN,predicted_rating
0,1886039224,10.0
1,9960340112,2.010076
2,965177610,1.818182
3,517635488,1.740777
4,737302291,1.556998
5,793828015,1.462545
6,28619811,1.462545
7,747517568,1.462545
8,816179530,1.421338
9,716732106,1.315903


In [30]:
print('Evaluating Content-Based Filtering model number 2...')
cb_global_metrics, cb_detailed_results_df = model_evaluator.evaluate_model(content_based_recommender_model)
print('\nGlobal metrics:\n%s' % cb_global_metrics)
cb_detailed_results_df.head(10)

Evaluating Content-Based Filtering model number 2...

Global metrics:
{'modelName': 'Content-Based', 'recall@5': 1.0, 'recall@10': 1.0, 'precision@5': 0.004819277108433735, 'precision@10': 0.00404040404040404}


Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID
1780,1,1,1,1,1,1.0,1.0,1.0,1.0,220688
1357,1,1,1,1,1,1.0,1.0,1.0,1.0,120588
0,0,0,0,0,0,0.0,0.0,0.0,0.0,237568
1265,0,0,0,0,0,0.0,0.0,0.0,0.0,234828
1277,0,0,0,0,0,0.0,0.0,0.0,0.0,112026
1276,0,0,0,0,0,0.0,0.0,0.0,0.0,136584
1275,0,0,0,0,0,0.0,0.0,0.0,0.0,243077
1274,0,0,0,0,0,0.0,0.0,0.0,0.0,38273
1273,0,0,0,0,0,0.0,0.0,0.0,0.0,46461
1272,0,0,0,0,0,0.0,0.0,0.0,0.0,152946


# Collaborative Filtering Model

In [31]:
#Creating a sparse pivot table with users in rows and items in columns
users_items_pivot_matrix_df = ratings.pivot(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)
users_items_pivot_matrix_df.head(10)

ISBN,0001714236,0001840517,0002111314,0002118580,0002326833,0003300277,0006368409,0006374964,0006476325,0006642128,...,968150271X,9681505905,9684443102,970651452X,972102919X,9721035882,9722016563,9722318179,9960340112,9968746037
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
87,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
392,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
users_items_pivot_matrix = users_items_pivot_matrix_df.values
users_items_pivot_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [33]:
from scipy.sparse.linalg import svds

#The number of factors to factor the user-item matrix.
NUMBER_OF_FACTORS_MF = round(users_items_pivot_matrix.shape[1]/3)
#Performs matrix factorization of the original user item matrix
#U, sigma, Vt = svds(users_items_pivot_matrix, k = NUMBER_OF_FACTORS_MF)
U, sigma, Vt = svds(users_items_pivot_matrix, k = NUMBER_OF_FACTORS_MF)

In [34]:
U.shape

(6266, 1660)

In [35]:
Vt.shape


(1660, 4979)

In [36]:
sigma = np.diag(sigma)
sigma.shape

(1660, 1660)

In [37]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
all_user_predicted_ratings

array([[ 6.06610296e-17,  9.88409692e-20, -2.23198020e-16, ...,
        -3.63222859e-16,  5.54222618e-16,  2.27547328e-16],
       [ 1.73371368e-17,  7.43878171e-20, -1.94105796e-16, ...,
        -1.63336029e-17,  2.71643592e-17, -1.54296445e-16],
       [ 7.24621691e-18,  4.83184116e-20,  3.28054777e-03, ...,
        -2.35348899e-16,  2.69484872e-17, -5.42857715e-04],
       ...,
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-6.38015911e-17, -1.54074204e-19, -1.22674777e-16, ...,
        -1.66141451e-15, -4.71254694e-16,  1.51580718e-16],
       [-1.55782005e-17,  9.83537419e-20, -3.19871836e-03, ...,
         8.60434256e-18,  3.74127014e-17,  1.05026310e-03]])

In [38]:
#Converting the reconstructed matrix back to a Pandas dataframe
preds_df = pd.DataFrame(all_user_predicted_ratings, columns=users_items_pivot_matrix_df.columns, index=users_items_pivot_matrix_df.index)

In [39]:
preds_df.head(10)

ISBN,0001714236,0001840517,0002111314,0002118580,0002326833,0003300277,0006368409,0006374964,0006476325,0006642128,...,968150271X,9681505905,9684443102,970651452X,972102919X,9721035882,9722016563,9722318179,9960340112,9968746037
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
87,6.066103000000001e-17,9.884097e-20,-2.23198e-16,-3.880403e-16,1.602396e-17,4.597885e-17,1.341751e-16,-6.426721e-17,4.3157180000000004e-17,-4.330397e-17,...,-1.475633e-16,4.6878320000000004e-17,-5.648711000000001e-17,1.073629e-16,-1.209586e-16,1.901154e-16,5.1272400000000005e-17,-3.632229e-16,5.542226e-16,2.275473e-16
242,1.7337140000000002e-17,7.438781999999999e-20,-1.941058e-16,-2.165154e-16,1.3012950000000001e-17,3.161573e-16,7.969401000000001e-17,-8.819247e-18,3.0059130000000004e-17,-1.701331e-17,...,7.685964000000001e-17,-9.463661e-17,2.6312420000000002e-18,6.108445e-16,-5.45592e-17,-2.685678e-15,-3.653692e-16,-1.63336e-17,2.716436e-17,-1.542964e-16
243,7.246217e-18,4.8318409999999996e-20,0.003280548,-0.0004695389,-3.032649e-18,4.967891e-07,3.42005e-17,-5.432448000000001e-17,1.618261e-17,1.135181e-18,...,-1.474017e-05,-7.914989e-16,-3.092603e-16,-1.236655e-18,-8.792535e-17,4.368097999999999e-19,-9.115191e-06,-2.353489e-16,2.6948490000000003e-17,-0.0005428577
254,-3.214693e-17,-8.175589e-20,0.03273793,0.001942542,-1.488666e-17,3.786815e-05,-1.6018240000000002e-17,1.522386e-16,3.923774e-18,2.9944940000000004e-17,...,-0.0002536513,2.706268e-16,-4.755234e-17,-1.599831e-16,-5.703721000000001e-17,-7.543541000000001e-17,-1.469526e-06,4.356754e-16,-9.725717e-17,-0.001633769
256,-1.598306e-17,2.6040569999999998e-20,0.0001704348,-0.0001120576,-6.045937e-18,5.346126e-07,5.3024170000000004e-17,2.797113e-17,7.959826e-18,-3.5398780000000004e-17,...,3.507395e-06,3.950308e-16,-2.639348e-16,6.263165e-17,-2.410961e-17,-4.4800840000000005e-17,-3.598382e-07,-2.588428e-16,-6.628823000000001e-17,-0.0001686205
273,-1.345165e-17,-6.135919e-20,2.393655e-16,-1.468383e-16,-1.0008760000000001e-17,2.157487e-16,-8.058531000000001e-17,1.525293e-18,-2.3501220000000002e-17,2.300943e-17,...,2.155298e-16,1.927288e-15,-1.666002e-16,1.839487e-16,7.414333000000001e-17,2.5065260000000003e-17,3.093919e-16,1.502348e-16,-3.2993510000000004e-17,-1.016284e-15
384,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
392,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
408,1.4416540000000002e-17,8.641029e-21,1.342556e-15,-1.902438e-16,3.428226e-18,1.475253e-15,4.303817e-18,7.523581e-18,-2.9530979999999995e-19,1.651016e-18,...,-4.274992e-16,2.460847e-16,-8.648622e-16,-8.103201e-17,-3.176375e-18,-1.22377e-16,7.134182e-16,-4.3415690000000004e-17,-1.174945e-16,3.007017e-15


In [40]:
preds_df = preds_df.apply(lambda x: normalize(x, all_user_predicted_ratings.max(), all_user_predicted_ratings.min()))
preds_df.head(10)

ISBN,0001714236,0001840517,0002111314,0002118580,0002326833,0003300277,0006368409,0006374964,0006476325,0006642128,...,968150271X,9681505905,9684443102,970651452X,972102919X,9721035882,9722016563,9722318179,9960340112,9968746037
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
87,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922
242,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922
243,1.435922,1.435922,1.438361,1.435573,1.435922,1.435923,1.435922,1.435922,1.435922,1.435922,...,1.435911,1.435922,1.435922,1.435922,1.435922,1.435922,1.435915,1.435922,1.435922,1.435519
254,1.435922,1.435922,1.460256,1.437366,1.435922,1.43595,1.435922,1.435922,1.435922,1.435922,...,1.435734,1.435922,1.435922,1.435922,1.435922,1.435922,1.435921,1.435922,1.435922,1.434708
256,1.435922,1.435922,1.436049,1.435839,1.435922,1.435923,1.435922,1.435922,1.435922,1.435922,...,1.435925,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435797
273,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922
384,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922
388,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922
392,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922
408,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,...,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922,1.435922


In [41]:
class CFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, predictions_df):
        self.predictions_df = predictions_df
        
    def get_model_name(self):
        return self.MODEL_NAME

    def recommend_items(self, user_id, items_to_ignore=[], topn=10, verbose=False):
        
        # Get and sort the user's predictions
        sorted_user_predictions = self.predictions_df.loc[user_id].sort_values(ascending=False)
        recommendations = {'ISBN': sorted_user_predictions.index, 'predicted_rating': sorted_user_predictions.values}
        recommendations_df = pd.DataFrame(recommendations)
        # Recommend the highest predicted rating movies that the user hasn't seen yet.
        recommendations_df = recommendations_df[~recommendations_df['ISBN'].isin(items_to_ignore)] \
                               .sort_values('predicted_rating', ascending = False) \
                               .head(topn)

        return recommendations_df
    
cf_recommender_model = CFRecommender(preds_df)

In [42]:

#print('Evaluating Collaborative Filtering (SVD Matrix Factorization) model...')
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(cf_recommender_model)
print('\nGlobal metrics:\n%s' % cf_global_metrics)
cf_detailed_results_df.head(10)


Global metrics:
{'modelName': 'Collaborative Filtering', 'recall@5': 0.9804432855280313, 'recall@10': 1.0, 'precision@5': 0.6975881261595547, 'precision@10': 0.686046511627907}


Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID
1421,5,9,5,10,9,0.555556,1.0,1.0,0.9,153662
666,5,5,5,6,5,1.0,1.0,1.0,0.833333,248718
784,5,10,5,10,10,0.5,1.0,1.0,1.0,11676
625,5,6,5,10,6,0.833333,1.0,1.0,0.6,76499
263,5,5,5,6,5,1.0,1.0,1.0,0.833333,197659
21,5,10,5,10,10,0.5,1.0,1.0,1.0,98391
700,4,4,4,4,4,1.0,1.0,1.0,1.0,35859
797,4,4,4,4,4,1.0,1.0,1.0,1.0,69078
1466,4,4,4,4,4,1.0,1.0,1.0,1.0,63714
1265,3,3,3,3,3,1.0,1.0,1.0,1.0,234828


In [43]:
cf_recommender_model.recommend_items(11676)

Unnamed: 0,ISBN,predicted_rating
0,3453199863,8.896539
1,821749668,8.885522
2,789205106,8.875119
3,60083263,8.87304
4,60609176,8.871307
5,374503486,8.871209
6,380005239,8.870402
7,590401807,8.862322
8,192835092,8.82569
9,962770124,8.82569


# Hybrid Model
I choose the result of the Best Content Base Model(the one with count vectorizer) 
and add it to the result of the Collaborative Filtering Model , then sort the Final result by predicted_rating.



The Collaborative did better than the Hybrid , I think that because The Content Base Model
give High predicted_rating to books that is not in the user's favorites

In [44]:
class HybridRecommender:
    
    MODEL_NAME = 'Hybrid Recommender'
    
    def __init__(self, CBRecommender,CFRecommender):
        self.CBRecommender = CBRecommender
        self.CFRecommender = CFRecommender
        
    def get_model_name(self):
        return self.MODEL_NAME

    def recommend_items(self, user_id, items_to_ignore=[], topn=10):
        
        cb_recommendations=self.CBRecommender.recommend_items(user_id,items_to_ignore,topn)
        cf_recommendations=self.CFRecommender.recommend_items(user_id,items_to_ignore,topn)
        
        # Get and sort the user's predictions
        # Recommend the highest predicted rating movies that the user hasn't seen yet.
        recommendations_df = cb_recommendations.append(cf_recommendations) \
                               .sort_values('predicted_rating', ascending = False) \
                               .head(topn)

        return recommendations_df
    
Hybrid_recommender_model = HybridRecommender(content_based_recommender_model,cf_recommender_model)

In [45]:
print('Evaluating Hybrid model...')
Hybrid_global_metrics, Hybrid_detailed_results_df = model_evaluator.evaluate_model(Hybrid_recommender_model)
print('\nGlobal metrics:\n%s' % Hybrid_global_metrics)
Hybrid_detailed_results_df.head(10)

Evaluating Hybrid model...

Global metrics:
{'modelName': 'Hybrid Recommender', 'recall@5': 0.9804941482444733, 'recall@10': 1.0, 'precision@5': 0.5221606648199446, 'precision@10': 0.48243412797992474}


Unnamed: 0,hits@5_count,hits@10_count,recommended@5_count,recommended@10_count,relevents,recall@5,recall@10,precision@5,precision@10,User-ID
263,5,5,5,6,5,1.0,1.0,1.0,0.833333,197659
21,5,10,5,10,10,0.5,1.0,1.0,1.0,98391
784,5,10,5,10,10,0.5,1.0,1.0,1.0,11676
1421,5,9,5,10,9,0.555556,1.0,1.0,0.9,153662
666,5,5,5,6,5,1.0,1.0,1.0,0.833333,248718
625,5,6,5,10,6,0.833333,1.0,1.0,0.6,76499
1466,4,4,4,4,4,1.0,1.0,1.0,1.0,63714
797,4,4,4,4,4,1.0,1.0,1.0,1.0,69078
700,4,4,4,4,4,1.0,1.0,1.0,1.0,35859
1659,3,3,3,3,3,1.0,1.0,1.0,1.0,56399


In [46]:
cf_recommender_model.recommend_items(104636)

Unnamed: 0,ISBN,predicted_rating
0,0440414806,8.868777
1,0064471047,8.866766
2,0679725245,8.124492
3,034532532X,6.640091
4,006108199X,5.892212
5,0130017604,1.521193
6,037571927X,1.507237
7,0312965257,1.471651
8,0451193202,1.466377
9,0471193615,1.459296


In [47]:
content_based_recommender_model.recommend_items(104636)

Unnamed: 0,ISBN,predicted_rating
0,679725245,7.863337
1,440414806,6.178021
2,440402832,4.696921
3,679753826,4.351559
4,679729267,4.351559
5,679736298,4.149049
6,1400030935,4.149049
7,679736549,4.012742
8,394702409,4.012742
9,375702202,4.012742


In [48]:
Hybrid_recommender_model.recommend_items(104636)

Unnamed: 0,ISBN,predicted_rating
0,0440414806,8.868777
1,0064471047,8.866766
2,0679725245,8.124492
0,0679725245,7.863337
3,034532532X,6.640091
1,0440414806,6.178021
4,006108199X,5.892212
2,0440402832,4.696921
4,0679729267,4.351559
3,0679753826,4.351559
