# Hybrid Recommendation using
a) Collaborative Filtering Model

b) Content-based Model

In [10]:
import pandas as pd
from random import randint

def generate_data(n_books = 3000, n_genres = 10, n_authors = 450, n_publishers = 50, n_readers = 30000, dataset_size = 100000):
    
    d = pd.DataFrame(
        {
            'book_id' : [randint(1, n_books) for _ in range(dataset_size)],
            'author_id' : [randint(1, n_authors) for _ in range(dataset_size)],
            'book_genre' : [randint(1, n_genres) for _ in range(dataset_size)],
            'reader_id' : [randint(1, n_readers) for _ in range(dataset_size)],
            'num_pages' : [randint(75, 700) for _ in range(dataset_size)],
            'book_rating' : [randint(1, 10) for _ in range(dataset_size)],
            'publisher_id' : [randint(1, n_publishers) for _ in range(dataset_size)],
            'publish_year' : [randint(2000, 2021) for _ in range(dataset_size)],
            'book_price' : [randint(1, 200) for _ in range(dataset_size)],
            'text_lang' : [randint(1,7) for _ in range(dataset_size)]
        }
    ).drop_duplicates()
    return d
  
df = generate_data(dataset_size = 100000)
df.to_csv('data.csv', index = False)
df.head()

Unnamed: 0,book_id,author_id,book_genre,reader_id,num_pages,book_rating,publisher_id,publish_year,book_price,text_lang
0,1991,326,5,12831,320,6,38,2005,103,4
1,2962,442,8,4591,614,2,48,2002,194,4
2,103,211,3,14533,672,4,26,2003,8,6
3,1537,32,8,11319,675,1,50,2000,17,6
4,2759,113,3,6034,183,5,20,2008,169,7


In [11]:
df = df.sort_values(by=['book_id'], ascending = True)

In [12]:
df.head()

Unnamed: 0,book_id,author_id,book_genre,reader_id,num_pages,book_rating,publisher_id,publish_year,book_price,text_lang
89641,1,330,6,15776,487,9,42,2004,160,2
51771,1,79,9,16083,485,8,44,2016,105,7
84252,1,51,1,25088,115,5,8,2014,102,3
58609,1,54,5,27594,379,7,21,2001,187,3
6386,1,64,2,13086,536,1,36,2019,49,7


In [13]:
# normalizing the data to 0s and 1s
import numpy as np
'''
FORMULA
X(NORMALIZED) = (X - Xminimum)/(Xmaximum - Xminimum)

 min_val = min(data)
    if min_val < 0:
        data = [x + abs(min_val) for x in data]
    max_val = max(data)
    
    return [x/max_val for x in data]

'''
def normalize(data):    
    return (data - np.min(data)/ (np.max(data)- np.min(data)))

In [14]:
#performing one hot encoding
def encoding(df, column):
    new = pd.get_dummies(df[column])
    new.reset_index(drop = True, inplace = True)
    return pd.concat([df, new], axis = 1)

In [15]:
''' 
Creating a Content based recommendation syatem as the base system

'''

class Recommender():
    def __init__(self,df):
        self.df = df
    
    def similarity(self, v1, v2):
        return np.sum(np.dot(v1,v2)/ np.cross(v1,v2)) 
    
    def recommend(self, book_id, rec):
        ip = self.df.loc[book_id].values
        self.df['sim'] = self.df.apply(lambda x: self.similarity(ip, x.values), axis =1)
        
        return self.df.nlargest(columns = 'sim', n = rec)

In [16]:
df['num_pages_norm'] = normalize(df['num_pages'].values)
df['book_rating_norm'] = normalize(df['book_rating'].values)
df['book_price_norm'] = normalize(df['book_price'].values)
df.head()

Unnamed: 0,book_id,author_id,book_genre,reader_id,num_pages,book_rating,publisher_id,publish_year,book_price,text_lang,num_pages_norm,book_rating_norm,book_price_norm
89641,1,330,6,15776,487,9,42,2004,160,2,486.88,8.888889,159.994975
51771,1,79,9,16083,485,8,44,2016,105,7,484.88,7.888889,104.994975
84252,1,51,1,25088,115,5,8,2014,102,3,114.88,4.888889,101.994975
58609,1,54,5,27594,379,7,21,2001,187,3,378.88,6.888889,186.994975
6386,1,64,2,13086,536,1,36,2019,49,7,535.88,0.888889,48.994975


In [17]:
df = encoding(df = df, column = 'publish_year')
df = encoding(df = df, column = 'book_genre')
df = encoding(df = df, column = 'text_lang')

#dropping redndant columns
cols = ['publish_year', 'book_genre', 'num_pages', 'book_rating', 'book_price', 'text_lang']
df.drop(columns = cols, inplace = True)

In [18]:
df.head()

Unnamed: 0,book_id,author_id,reader_id,publisher_id,num_pages_norm,book_rating_norm,book_price_norm,2000,2001,2002,...,8,9,10,1,2,3,4,5,6,7
89641,1,330,15776,42,486.88,8.888889,159.994975,0,0,0,...,1,0,0,0,0,0,0,0,0,1
51771,1,79,16083,44,484.88,7.888889,104.994975,0,0,0,...,0,0,0,0,0,0,0,1,0,0
84252,1,51,25088,8,114.88,4.888889,101.994975,0,0,0,...,0,0,0,0,0,0,1,0,0,0
58609,1,54,27594,21,378.88,6.888889,186.994975,0,0,0,...,0,0,0,0,0,0,0,1,0,0
6386,1,64,13086,36,535.88,0.888889,48.994975,0,0,1,...,0,0,0,0,0,0,1,0,0,0
