# Book recommendation notebook for Model 3

This notebook contains the recommendation algorithm made for model for the book exchange app.

In [30]:
from urllib.parse import urlparse

def get_credentials():
    db_url = urlparse('#########')
    DB_USERNAME = db_url.username
    DB_PASSWORD = db_url.password 
    DB_DATABASE = db_url.path[1:]
    DB_HOSTNAME = db_url.hostname
    DB_PORT = db_url.port

    return DB_DATABASE, DB_USERNAME, DB_PASSWORD, DB_HOSTNAME, DB_PORT  

In [31]:
import psycopg2

database, username, password, hostname, port = get_credentials()

connection = psycopg2.connect(
    database = database,
    user = username,
    password = password,
    host = hostname,
    port = port
)

cursor = connection.cursor()


def fetch(query):
    cursor.execute(query)
    records = cursor.fetchall()
    columns = [description[0] for description in cursor.description]
    print("Query executed.....")
    close_connection(cursor)
    return records, columns

def update(query):
    cursor.execute(query)
    connection.commit()
    close_connection(cursor)
    print("Database updated.....")

def close_connection(cursor):
    cursor.close()
    connection.close()

In [32]:
# import books dataset
import pandas as pd
records, columns = fetch("SELECT * FROM \"Book\";")
df = pd.DataFrame(records, columns = columns)

Query executed.....


In [33]:
df.head()

Unnamed: 0,id,isbn,title,description,author,genre,language,pages,image,rating,addedAt,latitude,longitude,ownerId,borrowerId,geolocation
0,1636bff3-3bc8-460e-8cb8-c017a845acb5,1380000066012,Bosnia,Aut corporis in sint quidem et facilis dolorem...,Elsa43,HISTORICAL,GERMAN,287,https://cdn.fakercloud.com/avatars/iduuck_128.jpg,2.59,2021-05-16 14:17:15.431,-50.0,-184.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
1,2382c977-021b-47e4-9c92-2883a4c07548,8257800564170,generating,Ea repellendus dolore eligendi a numquam in el...,Alisha.Lakin,RELIGION,ENGLISH,157,https://cdn.fakercloud.com/avatars/ryankirkman...,4.46,2021-05-16 14:17:17.832,82.0,-198.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
2,252baee1-ff5e-42d1-a283-437724cefac1,6227386569604,Iceland,Magni blanditiis nihil alias recusandae cum vo...,Lacy.Larkin56,PHILOSOPHY,OTHERS,171,https://cdn.fakercloud.com/avatars/faisalabid_...,4.79,2021-05-16 14:17:13.622,24.0,-185.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
3,41d894a9-98b3-4a46-abf1-16930699547b,6171816077083,full-range,Aliquam aut non deleniti assumenda sint ea ips...,Vance57,SELF_DEVELOPMENT,SPANISH,121,https://cdn.fakercloud.com/avatars/mr_shiznit_...,1.34,2021-05-16 14:17:12.256,-16.0,-193.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
4,5a79992f-9c92-4f8b-b7c6-99ff16fd6425,2985794626176,firewall,Molestiae omnis temporibus sed error facere se...,Santina.Adams,ROMANCE,GERMAN,133,https://cdn.fakercloud.com/avatars/d_kobelyats...,4.68,2021-05-16 14:17:11.660,-20.0,-186.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,


In [34]:
df.shape

(2000, 16)

In [35]:
# unique user id's in the dataset
df['id'].unique()

array(['1636bff3-3bc8-460e-8cb8-c017a845acb5',
       '2382c977-021b-47e4-9c92-2883a4c07548',
       '252baee1-ff5e-42d1-a283-437724cefac1', ...,
       'f6119a57-452f-452a-bb34-828df7697260',
       'fb1415e6-3c29-423a-9436-e6f06fc6981d',
       'ffcdd10d-7fe4-46a9-82f4-01aa0b180a0b'], dtype=object)

In [36]:
len(df['id'].unique())

2000

In [37]:
df = df.sample(frac = 1)
df.head()

Unnamed: 0,id,isbn,title,description,author,genre,language,pages,image,rating,addedAt,latitude,longitude,ownerId,borrowerId,geolocation
1545,d24c7177-78d0-4a5f-abd3-0b7c0ad73a0a,4717400414869,Ball,Occaecati eum nulla aut dicta aut architecto i...,Tito.Stokes,BUSINESS,SPANISH,197,https://cdn.fakercloud.com/avatars/lebinoclard...,3.15,2021-05-16 14:33:47.925,-16.0,-194.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
888,25427dac-1460-4803-b760-7efeac5fb0da,4300161482766,incentivize,Doloribus amet ipsam iure possimus eos vel sed...,Abel.Hilll,DRAMA,SPANISH,107,https://cdn.fakercloud.com/avatars/hasslunsfor...,2.46,2021-05-16 14:26:50.288,53.0,-187.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
416,8185e49e-7333-4e67-937d-c400e8983e52,4936802301556,communities,Earum at tempora molestiae maiores magni ex mo...,Easter_Rempel,MATH,FRENCH,147,https://cdn.fakercloud.com/avatars/cybind_128.jpg,4.55,2021-05-16 14:21:35.283,-83.0,-192.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
1818,1d691c61-2b18-45b9-8644-82f87bb69de5,1158270558342,SAS,Ipsum autem rerum facere magnam eius illo expl...,Jaydon66,SCIENCE_FICTION,ENGLISH,85,https://cdn.fakercloud.com/avatars/devankoshal...,3.75,2021-05-16 14:37:02.326,-24.0,-184.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,
1141,af41b05f-4871-4003-b8b1-1dcae4e71d4e,2641753027215,Forks,Consectetur eos unde fugit modi rem et iste co...,Moshe_Beatty54,JOURNAL,OTHERS,194,https://cdn.fakercloud.com/avatars/alxndrustin...,1.98,2021-05-16 14:29:23.110,72.0,-188.0,41e5b097-e09d-427b-a770-45aef3eedfd6,,


In [38]:
df.dtypes

id                     object
isbn                   object
title                  object
description            object
author                 object
genre                  object
language               object
pages                   int64
image                  object
rating                float64
addedAt        datetime64[ns]
latitude              float64
longitude             float64
ownerId                object
borrowerId             object
geolocation            object
dtype: object

In [39]:
df.isna().sum()

id                0
isbn              0
title             0
description       0
author            0
genre             0
language          0
pages             0
image             0
rating            0
addedAt           0
latitude          0
longitude         0
ownerId           0
borrowerId     2000
geolocation    2000
dtype: int64

In [40]:
df.isna().sum()

id                0
isbn              0
title             0
description       0
author            0
genre             0
language          0
pages             0
image             0
rating            0
addedAt           0
latitude          0
longitude         0
ownerId           0
borrowerId     2000
geolocation    2000
dtype: int64

In [41]:
from scipy.sparse import csr_matrix
# pivot rating into movie features
mat = df.pivot(index='isbn',columns='id',values='rating').fillna(0)

In [42]:
mat.head()

id,0030d269-9968-4926-9e95-ff73d38c0a4e,003d05ea-84e2-4798-9203-b4d85d836dcd,00752c24-20a6-4423-8413-4800b9fca5fb,0078193a-4dc7-4bea-b9c1-564e99ad42d0,008569f4-7bff-4a66-9a63-03789cd5cbdc,008f3cc7-a559-444a-beda-39242746397a,0156b80a-be2e-4059-83c8-75c449bb1d59,016a5e64-4102-43dc-b422-b10d9244a08f,018fe31a-aad3-402e-9d6a-baaa6df33035,01c0c42b-54af-494e-8ee9-7fc89489ac09,01ccefdb-5a6a-41fa-bb31-344ca55fa7bf,01e2cf8f-1ef3-4af9-9b00-672fa24a0aa7,01fa500d-24c4-4387-9233-9ac9a73b9f0d,020ff772-c861-45fb-be16-94e24a6f079a,027f68cb-ab9b-4857-88ec-2fc7c9ba8bf8,02bbe0d0-2c4f-4660-a919-99ff29a723de,02c3fc22-b52a-4e30-9534-1aae05a5294f,02c850ab-a67e-488a-adfe-235d2272b8af,02e042f1-b747-4a45-a0d8-370266fbdd41,02f101f2-7c88-4aa0-959c-df8600ef41e2,033d09ae-4983-420b-ba52-4a851230c106,035cd7c0-72e8-4cf9-ba04-06c59ccf4a6c,036a405d-529e-4b05-97d1-1d3954159797,03ef9d68-3663-4dba-a1b9-f768dd47d088,04385c1b-f896-452d-9081-dbc8a66aae5a,043f2d4c-dec2-4c74-b911-00fb3cde5962,044834c0-7b3f-4741-b46a-a469c1032891,0458a5ad-19f6-4694-a6d1-7ce7f94c9321,04677861-0cc8-420f-ab34-c0b21ff32da6,04865f4d-9019-431a-985c-0144d13afe43,04b46fbb-bf2c-4af5-a897-a3c439046783,04dac830-cd72-4abb-ba8f-3c5080296c4e,04ddba3f-cf81-4f34-90b4-6a2e1b26b81b,04e55865-1ad3-4ff9-9fef-1a223c68f26c,04e96527-0edd-4908-bf56-db08ff6aa60f,050a3975-5d24-40b1-b62c-2c8dc5401c62,050bdbd2-da61-441d-accd-5f756a1b767f,053e0571-449d-4a9e-b9e6-ebfeacf4e9fe,055cfc14-9171-41c4-88a3-52d941170429,0569f220-11fd-44be-a6a4-6a440db90699,...,fb270376-a66b-4074-a837-cc1f8bc8e051,fb4083f7-c445-4e92-9665-18976ed0c9e0,fba4ce02-1e79-45c6-941b-e28407d0d255,fbe4da9d-1b88-4aaf-b634-82b203be3d15,fc502c15-37de-43fa-9e16-96a5155bc8bb,fc6418d3-ff78-47df-b10a-f20bcc515f3d,fc9b1c5d-5532-4a58-8266-704dc5926dd9,fcafa910-8a1e-4772-8f73-16bdfe8148de,fd001a39-b665-4030-aff7-a34a29816558,fd04fa97-5195-4e1b-93d3-4db9d11fb311,fd0d8ce5-d4c0-48f6-9243-cdaa927658de,fd125dfe-b52a-4978-af9e-28585b178905,fd349094-d416-4ed0-9fa1-a8d96ec05a89,fd3dd0cd-9453-44ea-9dec-6029babcb170,fd409d63-f62c-43d7-9dcb-21dc16b5c747,fd4de9e9-4e9b-432e-9594-1eb8690fc5f0,fdb508d6-f994-4f12-bd67-04ca2f5f231f,fdbe6ba7-b4fb-4db7-8fa0-b2adb0d87528,fde000ec-294d-49fc-9fa1-4fcd4c6b8508,fde4fe22-604f-4255-a760-0b74a992ec02,fdea53d6-91c7-4657-a7e6-b6df2872bd60,fdebbb5c-d542-4d50-b12a-ca4e5d0ffbac,fe29d305-db7c-4e75-b9f8-8fe02b54c072,fe5199c0-5627-43ee-a347-80f67a390d99,fe64e5a8-bd0c-4085-97e0-98c05623cfae,fe7460b6-d819-4646-9741-e6b701f90cab,fe7b1fb9-1f06-45db-ae05-f3200ad76c47,fe8c1eb5-24bc-43c4-acc9-07f71c6bd21e,fe9f3704-d61b-4434-bfb2-0ee1d7847db8,feb33d41-cc4d-4dc3-ab22-90a76f3fb5bf,feccbef4-fb82-49ce-ba35-af3a5c1cd6df,fedf6898-f65a-463f-b4b5-c8c5314308ee,feebf4b4-6245-421d-85e7-3b1854e13d12,ff1f3e48-80f1-47f6-8e69-b4b0790a5022,ff32eae6-3376-41a3-8ea9-dfabd1be34e0,ff60ab27-ab37-4ff7-b069-0a8981782ef0,ffcdd10d-7fe4-46a9-82f4-01aa0b180a0b,ffd4aedf-d92c-4251-aef7-39eb4736609c,ffd4facd-c1ab-418a-8d14-19407e0b61d4,ffee7924-613e-414f-9ff2-df1185061dc5
isbn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1003247164189,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1004844905808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1006544403731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1008103664964,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1017235839739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
mat.shape

(2000, 2000)

In [44]:
sparse_mat = csr_matrix(mat.values)

In [45]:
book_map = df[['isbn', 'title']]

In [46]:
book_map.head()

Unnamed: 0,isbn,title
1545,4717400414869,Ball
888,4300161482766,incentivize
416,4936802301556,communities
1818,1158270558342,SAS
1141,2641753027215,Forks


In [47]:
book_map

Unnamed: 0,isbn,title
1545,4717400414869,Ball
888,4300161482766,incentivize
416,4936802301556,communities
1818,1158270558342,SAS
1141,2641753027215,Forks
...,...,...
1485,7051269555465,Borders
504,2036152709275,Movies
439,5992713082582,Frozen
265,1332631649449,integrate


In [48]:
book_map.values

array([['4717400414869', 'Ball'],
       ['4300161482766', 'incentivize'],
       ['4936802301556', 'communities'],
       ...,
       ['5992713082582', 'Frozen'],
       ['1332631649449', 'integrate'],
       ['4444058092311', 'Ohio']], dtype=object)

In [49]:
hash_map = {
    movie: i for i,movie in enumerate(list(book_map.set_index('isbn').loc[mat.index]['title']))
}

In [50]:
hash_map

{'(E.M.U.-6)': 1733,
 '(Keeling)': 1364,
 '(customarily': 283,
 '1080p': 356,
 '24/365': 532,
 '5th': 1104,
 'ADP': 1655,
 'AGP': 1623,
 'AI': 1582,
 'Account': 1931,
 'Accountability': 1692,
 'Accounts': 1201,
 'Adaptive': 1208,
 'Afghani': 1919,
 'Afghanistan': 655,
 'Agent': 1395,
 'Alabama': 1771,
 'Alaska': 1804,
 'Albania': 1846,
 'Analyst': 483,
 'Angola': 1435,
 'Applications': 1387,
 'Architect': 1976,
 'Ariary': 1044,
 'Arizona': 1581,
 'Armenia': 952,
 'Armenian': 407,
 'Assimilated': 894,
 'Assistant': 1193,
 'Associate': 1946,
 'Assurance': 1466,
 'Australia': 866,
 'Auto': 1859,
 'Automotive': 1828,
 'Avon': 1983,
 'Awesome': 1577,
 'B2C': 1142,
 'Baby': 1818,
 'Bacon': 1891,
 'Balanced': 1714,
 'Ball': 1325,
 'Barbados': 1852,
 'Beauty': 1681,
 'Bedfordshire': 1941,
 'Belgium': 1628,
 'Belize': 631,
 'Benin': 105,
 'Berkshire': 1793,
 'Bermudian': 686,
 'Bike': 1549,
 'Bolivar': 1711,
 'Books': 1964,
 'Borders': 1934,
 'Bosnia': 88,
 'Branch': 447,
 'Brand': 1476,
 'Bran

In [51]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [52]:
# function for searching books in database using keywords
from fuzzywuzzy import fuzz 
def matching(book_map, fav_book):
  """
  returns the closest match via fuzzy ratio
  If no match found, returns None

  Parameters
  --------
  book_map : dataframe of book titles along with their isbn
  fav_book : name of the favourite book

  Return
  -------
  index of closest match
  """
  match = []
  # get match
  for isbn, title in book_map.items():
    ratio = fuzz.ratio(title.lower(),fav_book.lower())
    if ratio >=60:
      match.append((title,isbn,ratio))
    # sort
  match = sorted(match,key = lambda x:x[2])[::-1]
  if not match:
    print("No match found!!!")
  else:
    print("Found possible matches in our database: {0}".format([x[0] for x in match]))
    return match



In [53]:
book_dict = dict(zip(book_map['isbn'],book_map['title']))

In [55]:
matching(book_dict,"violet")[0][0]

Found possible matches in our database: ['violet', 'violet', 'violet', 'violet', 'violet', 'violet', 'Inlet', 'Village', 'Vietnam', 'Isle']


'violet'

In [56]:
# function for making recommendations
import time
from sklearn.neighbors import NearestNeighbors
def recommend(fav_book,n_recommendations,book_map,data):
  """
  Returns the top n recommendations based on the keyword entered

  Parameters
  ---------
  fav_book : Keyword for favourite book
  n_recommendations : Number of recommendations to be made
  book_map : book database with title and isbn
  data : csr matrix of user_id, ratings and isbn

  Return
  --------
  list of top n book recommendations
  """
  # instantiating model
  model = NearestNeighbors(algorithm ='brute',metric='cosine')
  # fitting the model
  model.fit(data)
  # get input book isbn
  book_isbn = matching(book_map, fav_book)
  # inference
  print("Recommendation system starting to make inference......")
  t0 = time.time()
  distances, isbns = model.kneighbors(
      data[book_isbn],
      n_neighbors=n_recommendations+1
  )
  recommends = sorted(list(zip(isbns.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x : x[1])[:0:-1]
  print(f"Time Taken for recommendation : {time.time()-t0}")
  return recommends 

In [57]:
# function to show movie names after recommedations
def make_recommendations(fav_book, n_recommendations, mat, hash_map):
  """
  makes n recommendations based on fav books
  Parameters
  ----------
  fav_book : favourite book
  n_recommendation : number of recommedations to be made
  Return
  ----------
  None, print recommendations
  """
  raw_recommendations = recommend(fav_book,n_recommendations=10,book_map=hash_map, data=mat)
  reverse_hashmap = {v : k for k,v in hash_map.items}
  print('Recommendations for {}:'.format(fav_book))
  for i,(isbn,dist) in enumerate(raw_recommendations):
    print('{0}: {1}, with distance of {2}'.format(i+1,reverse_hashmap[isbn],dist))

In [58]:
book_map

Unnamed: 0,isbn,title
1545,4717400414869,Ball
888,4300161482766,incentivize
416,4936802301556,communities
1818,1158270558342,SAS
1141,2641753027215,Forks
...,...,...
1485,7051269555465,Borders
504,2036152709275,Movies
439,5992713082582,Frozen
265,1332631649449,integrate


In [60]:
book_map[book_map['isbn']=='7051269555465']

Unnamed: 0,isbn,title
1485,7051269555465,Borders


In [61]:
print(sparse_mat[1])

  (0, 242)	4.2


In [62]:
hash_map

{'(E.M.U.-6)': 1733,
 '(Keeling)': 1364,
 '(customarily': 283,
 '1080p': 356,
 '24/365': 532,
 '5th': 1104,
 'ADP': 1655,
 'AGP': 1623,
 'AI': 1582,
 'Account': 1931,
 'Accountability': 1692,
 'Accounts': 1201,
 'Adaptive': 1208,
 'Afghani': 1919,
 'Afghanistan': 655,
 'Agent': 1395,
 'Alabama': 1771,
 'Alaska': 1804,
 'Albania': 1846,
 'Analyst': 483,
 'Angola': 1435,
 'Applications': 1387,
 'Architect': 1976,
 'Ariary': 1044,
 'Arizona': 1581,
 'Armenia': 952,
 'Armenian': 407,
 'Assimilated': 894,
 'Assistant': 1193,
 'Associate': 1946,
 'Assurance': 1466,
 'Australia': 866,
 'Auto': 1859,
 'Automotive': 1828,
 'Avon': 1983,
 'Awesome': 1577,
 'B2C': 1142,
 'Baby': 1818,
 'Bacon': 1891,
 'Balanced': 1714,
 'Ball': 1325,
 'Barbados': 1852,
 'Beauty': 1681,
 'Bedfordshire': 1941,
 'Belgium': 1628,
 'Belize': 631,
 'Benin': 105,
 'Berkshire': 1793,
 'Bermudian': 686,
 'Bike': 1549,
 'Bolivar': 1711,
 'Books': 1964,
 'Borders': 1934,
 'Bosnia': 88,
 'Branch': 447,
 'Brand': 1476,
 'Bran

In [63]:
len(hash_map)

815

In [64]:
# function to get list of recommmendations based on isbn
def recommendations_from_isbn(isbn,data,n_neighbors,book_dict,hash_map):
  """
  This function provides recommendations based on isbn
  
  Parameters
  ---------
  isbn: isbn of the book from which recommendations have to be made
  data: sparse matrix data
  n_neighbors: number of neighbors for knn models
  book_dict: dictionary linking isbn to book name
  hash_map: dictionary linking title to sparse matrix index

  Return
  --------
  recoms: recommendations from the passed isbn
  """
  dummy_model = NearestNeighbors(algorithm='brute',metric='cosine')
  dummy_model.fit(data)
  title = book_dict[isbn]
  r,d = dummy_model.kneighbors(data[hash_map[title]],n_neighbors=11)
  recoms = []
  recommends = sorted(list(zip(d.squeeze().tolist(),r.squeeze().tolist())),key=lambda x : x[1])[:0:-1]
  for (i,j) in recommends:
    for title, isbn in hash_map.items():
      if isbn == i:
        recoms.append(title)
  return recoms

In [65]:
recommendations_from_isbn('7051269555465',data=sparse_mat,n_neighbors=10, book_dict=book_dict, hash_map=hash_map)

['infrastructures', 'Connecticut']

In [66]:
# function to get recommended books' isbn
def get_books_isbn(isbn,data,n_neighbors,book_dict,hash_map):
  """
  Function to return isbns of recommended books from the isbn passed

  Parameters
  ---------
  isbn: isbn of the book from which recommendations have to be made
  data: sparse matrix data
  n_neighbors: number of neighbors for knn models
  book_dict: dictionary linking isbn to book name
  hash_map: dictionary linking title to sparse matrix index

  Return
  --------
  isbns: isbns of recommendations from the passed isbn

  """
  recoms = recommendations_from_isbn(isbn,data=data,n_neighbors=n_neighbors,book_dict=book_dict,hash_map=hash_map)
  isbns = []
  for book_name in recoms:
    for isb,title in book_dict.items():
      if title == book_name:
        isbns.append(isb)
  return isbns

In [67]:
get_books_isbn('7051269555465',data=sparse_mat,n_neighbors=10, book_dict=book_dict, hash_map=hash_map)

['5763925403357',
 '5215656947345',
 '6296119809151',
 '5209408262745',
 '6261602593586']

In [68]:
# Making recommendations using only one base model
base_model = NearestNeighbors(algorithm='brute', metric='cosine')
base_model.fit(sparse_mat)
title = book_dict['7051269555465']
r,d = base_model.kneighbors(sparse_mat[hash_map[title]],n_neighbors=11)
recoms = []
recommends = sorted(list(zip(d.squeeze().tolist(),r.squeeze().tolist())),key=lambda x : x[1])[:0:-1]
for (i,j) in recommends:
  for title, isbn in hash_map.items():
    if isbn == i:
      recoms.append(title)
recoms

['infrastructures', 'Connecticut']

In [69]:
isbns = []
for book_name in recoms:
  for isb,title in book_dict.items():
    if title == book_name:
      isbns.append(isb)
isbns

['5763925403357',
 '5215656947345',
 '6296119809151',
 '5209408262745',
 '6261602593586']

In [70]:
# Making a base model and training it
model = NearestNeighbors(metric='cosine',algorithm='brute')
model.fit(sparse_mat)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [71]:
# Function for getting book title using base model
def get_title(model,isbn):
  """
  Function to return title of recommended books using isbn

  Parameters
  -----------
  model : base model globally trained to recommend books
  isbn : isbn of book from which recommendations are to be made

  Return
  -----------
  recoms : list of titles of recommended books
  """
  title = book_dict[isbn]
  r,d = base_model.kneighbors(sparse_mat[hash_map[title]],n_neighbors=11)
  recoms = []
  recommends = sorted(list(zip(d.squeeze().tolist(),r.squeeze().tolist())),key=lambda x : x[1])[:0:-1]
  for (i,j) in recommends:
    for title, isbn in hash_map.items():
      if isbn == i:
        recoms.append(title)
  return recoms

In [72]:
get_title(model,isbn='7051269555465')

['infrastructures', 'Connecticut']

In [73]:
# Function to get isbn from title
def get_recommendations(model,isbn):
  """
  Function to isbn of recommended books by using a globally declared model

  Parameters
  ----------
  model - globally trained model for recommendations
  isbn - isbn of book from which recommendations are to be made

  Return
  ----------
  isbns - list of recommended books' isbns
  """
  recoms = get_title(model,isbn)
  isbns = []
  for book_name in recoms:
    for isb,title in book_dict.items():
      if title == book_name:
        isbns.append(isb)
  return isbns

In [74]:
get_recommendations(model,isbn='7051269555465')

['5763925403357',
 '5215656947345',
 '6296119809151',
 '5209408262745',
 '6261602593586']

In [75]:
matches = matching(book_dict, "Infra")

Found possible matches in our database: ['Lira', 'intranet', 'Intranet', 'Intranet', 'Franc', 'Franc', 'Dinar', 'viral', 'frame', 'Franc', 'Franc']


In [76]:
matches

[('Lira', '6334332345054', 67),
 ('intranet', '3730365041643', 62),
 ('Intranet', '4759393444285', 62),
 ('Intranet', '6982255315408', 62),
 ('Franc', '4593888716772', 60),
 ('Franc', '2140181787312', 60),
 ('Dinar', '1946929195895', 60),
 ('viral', '5922507958487', 60),
 ('frame', '1087442301213', 60),
 ('Franc', '5283806301653', 60),
 ('Franc', '3292626669630', 60)]

In [77]:
for match in matches:
  print(match[0])
  print(match[1])

Lira
6334332345054
intranet
3730365041643
Intranet
4759393444285
Intranet
6982255315408
Franc
4593888716772
Franc
2140181787312
Dinar
1946929195895
viral
5922507958487
frame
1087442301213
Franc
5283806301653
Franc
3292626669630
