# BOOK RECOMMENDATION SYSTEM

## Import all libraries

In [1]:
import os
import sys
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
%matplotlib inline

In [2]:
cwd=os.getcwd()     #current working directory
print(cwd)

C:\Users\HP\Desktop


## Import Datasets

In [3]:
books = pd.read_csv('books.csv')
tags = pd.read_csv('tags.csv')
book_tags = pd.read_csv('book_tags.csv')

In [4]:
tags.head(5)

Unnamed: 0,tag_id,tag_name
0,0,-
1,1,--1-
2,2,--10-
3,3,--12-
4,4,--122-


In [5]:
book_tags.head(5)

Unnamed: 0,goodreads_book_id,tag_id,count
0,1,30574,167697
1,1,11305,37174
2,1,11557,34173
3,1,8717,12986
4,1,33114,12716


## Pre-processing

In [6]:
tags = pd.merge( book_tags,tags, left_on = 'tag_id', right_on = 'tag_id', how = 'inner')
print('shape:',tags.shape)

shape: (999912, 4)


In [7]:
tags.head(5)

Unnamed: 0,goodreads_book_id,tag_id,count,tag_name
0,1,30574,167697,to-read
1,2,30574,24549,to-read
2,3,30574,496107,to-read
3,5,30574,11909,to-read
4,6,30574,298,to-read


In [8]:
books.head(5)

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [9]:
books_merged = pd.merge(books, tags, left_on ='book_id', right_on = 'goodreads_book_id',how = 'inner')

## Remove null values

In [10]:
for i in books_merged.columns:
    books_merged[i] = books_merged[i].fillna(' ')
for i in books.columns:
    books[i] = books[i].fillna(' ')

In [11]:
joined_merged_books = books_merged.groupby('book_id')['tag_name'].apply(' '.join).reset_index()
joined_merged_books.head()

Unnamed: 0,book_id,tag_name
0,1,to-read fantasy favorites currently-reading yo...
1,2,to-read fantasy favorites currently-reading yo...
2,3,to-read fantasy favorites currently-reading yo...
3,5,to-read fantasy favorites currently-reading yo...
4,6,to-read fantasy young-adult fiction harry-pott...


In [12]:
books.head(10)

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439023480.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780439554930.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316015840.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061120080.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743273560.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...
5,6,11870085,11870085,16827462,226,525478817,9780525478810.0,John Green,2012.0,The Fault in Our Stars,...,2346404,2478609,140739,47994,92723,327550,698471,1311871,https://images.gr-assets.com/books/1360206420m...,https://images.gr-assets.com/books/1360206420s...
6,7,5907,5907,1540236,969,618260307,9780618260300.0,J.R.R. Tolkien,1937.0,The Hobbit or There and Back Again,...,2071616,2196809,37653,46023,76784,288649,665635,1119718,https://images.gr-assets.com/books/1372847500m...,https://images.gr-assets.com/books/1372847500s...
7,8,5107,5107,3036731,360,316769177,9780316769170.0,J.D. Salinger,1951.0,The Catcher in the Rye,...,2044241,2120637,44920,109383,185520,455042,661516,709176,https://images.gr-assets.com/books/1398034300m...,https://images.gr-assets.com/books/1398034300s...
8,9,960,960,3338963,311,1416524797,9781416524790.0,Dan Brown,2000.0,Angels & Demons,...,2001311,2078754,25112,77841,145740,458429,716569,680175,https://images.gr-assets.com/books/1303390735m...,https://images.gr-assets.com/books/1303390735s...
9,10,1885,1885,3060926,3455,679783261,9780679783270.0,Jane Austen,1813.0,Pride and Prejudice,...,2035490,2191465,49152,54700,86485,284852,609755,1155673,https://images.gr-assets.com/books/1320399351m...,https://images.gr-assets.com/books/1320399351s...


In [13]:
books= pd.merge(books,joined_merged_books,left_on = 'book_id', right_on = 'book_id', how= 'left')

In [14]:
books.head(5)

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,tag_name
0,1,2767052,2767052,2792775,272,439023483,9780439023480.0,Suzanne Collins,2008.0,The Hunger Games,...,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...,to-read fantasy favorites currently-reading yo...
1,2,3,3,4640799,491,439554934,9780439554930.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...,to-read fantasy favorites currently-reading yo...
2,3,41865,41865,3212258,226,316015849,9780316015840.0,Stephenie Meyer,2005.0,Twilight,...,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...,to-read fantasy favorites currently-reading yo...
3,4,2657,2657,3275794,487,61120081,9780061120080.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...,
4,5,4671,4671,245494,1356,743273567,9780743273560.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...,to-read fantasy favorites currently-reading yo...


In [15]:
columns = ['authors','title','language_code','tag_name']
for i in columns:
    books[i] = books[i].fillna(' ')

In [16]:
def combine_features(row):
    return row['authors']+' '+row['tag_name']+' '+row['title']
books['combined_features'] = books.apply(combine_features,axis = 1)
books['combined_features']

0       Suzanne Collins to-read fantasy favorites curr...
1       J.K. Rowling, Mary GrandPré to-read fantasy fa...
2       Stephenie Meyer to-read fantasy favorites curr...
3                      Harper Lee   To Kill a Mockingbird
4       F. Scott Fitzgerald to-read fantasy favorites ...
                              ...                        
9995            Ilona Andrews   Bayou Moon (The Edge, #2)
9996    Robert A. Caro   Means of Ascent (The Years of...
9997    Patrick O'Brian to-read fantasy favorites curr...
9998    Peggy Orenstein   Cinderella Ate My Daughter: ...
9999                    John Keegan   The First World War
Name: combined_features, Length: 10000, dtype: object

## Feature extraction using CountVectorizer

In [17]:
vectorizer = CountVectorizer(ngram_range=(1, 2),min_df=0, stop_words='english')
count_matrix = vectorizer.fit_transform(books['combined_features'])
vectorizer.get_feature_names_out()

array(['000', '000 books', '000 places', ..., 'ｆａｖｏｒｉｔｅｓ',
       'ｆａｖｏｒｉｔｅｓ archived', 'ｆａｖｏｒｉｔｅｓ george'], dtype=object)

## Pairwise similarity

In [18]:
cosine_similarity_score = cosine_similarity(count_matrix)
cosine_similarity_score

array([[1.        , 0.8186422 , 0.94291513, ..., 0.4073903 , 0.        ,
        0.        ],
       [0.8186422 , 1.        , 0.79644035, ..., 0.40033804, 0.        ,
        0.        ],
       [0.94291513, 0.79644035, 1.        , ..., 0.44531422, 0.        ,
        0.        ],
       ...,
       [0.4073903 , 0.40033804, 0.44531422, ..., 1.        , 0.        ,
        0.01488234],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.01488234, 0.        ,
        1.        ]])

In [19]:
liked_book = "The Case for Faith: A Journalist Investigates the Toughest Objections to Christianity"

In [20]:
def from_title_get_index(title):
    return books[books.title == title].index.values[0]
from_title_get_index(liked_book)

3590

In [21]:
index_of_books = from_title_get_index(liked_book)
books_similar = list(enumerate(cosine_similarity_score[index_of_books]))
sorted_books_similar =  sorted(books_similar, key = lambda x:x[1], reverse = True)
sorted_books_similar

[(3590, 1.0),
 (1276, 0.4583492485141056),
 (8706, 0.44513190725972585),
 (492, 0.10846522890932808),
 (9064, 0.10846522890932808),
 (3, 0.09166984970282112),
 (532, 0.09166984970282112),
 (771, 0.09166984970282112),
 (6545, 0.09166984970282112),
 (7462, 0.09166984970282112),
 (8445, 0.09166984970282112),
 (1357, 0.08084520834544431),
 (1599, 0.08084520834544431),
 (2190, 0.08084520834544431),
 (2244, 0.08084520834544431),
 (2428, 0.08084520834544431),
 (2640, 0.08084520834544431),
 (3347, 0.08084520834544431),
 (3958, 0.08084520834544431),
 (5214, 0.08084520834544431),
 (6291, 0.08084520834544431),
 (6573, 0.08084520834544431),
 (3760, 0.07397267214553091),
 (476, 0.07312724241271307),
 (1423, 0.07312724241271307),
 (1439, 0.07312724241271307),
 (1758, 0.07312724241271307),
 (1869, 0.07312724241271307),
 (1875, 0.07312724241271307),
 (1891, 0.07312724241271307),
 (2392, 0.07312724241271307),
 (2620, 0.07312724241271307),
 (2767, 0.07312724241271307),
 (2773, 0.07312724241271307),
 (31

In [22]:
def from_index_get_title(index):
    return books[books.index == index].title.values[0]

## Top 10 books recommended

In [23]:
i = 0
for book_i in sorted_books_similar:
    print(i,from_index_get_title(book_i[0]))
    i=i+1
    if i>10:
        break

0 The Case for Faith: A Journalist Investigates the Toughest Objections to Christianity
1 The Case for Christ
2 The Case for a Creator: A Journalist Investigates Scientific Evidence That Points Toward God
3 Mere Christianity
4 Faith
5 To Kill a Mockingbird
6 Go Set a Watchman
7 The Republic
8 The Piano Teacher
9 Saving Faith
10 Basket Case
