### **Import libraries**

In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

### **Download data**

In [2]:
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

--2023-04-03 13:33:53--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 104.26.2.33, 172.67.70.149, 104.26.3.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|104.26.2.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip’


2023-04-03 13:33:54 (30.6 MB/s) - ‘book-crossings.zip’ saved [26085508/26085508]

Archive:  book-crossings.zip
  inflating: BX-Book-Ratings.csv     
  inflating: BX-Books.csv            
  inflating: BX-Users.csv            


In [3]:
books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

### **Import datasets**

In [4]:
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [5]:
df_books.head()

Unnamed: 0,isbn,title,author
0,195153448,Classical Mythology,Mark P. O. Morford
1,2005018,Clara Callan,Richard Bruce Wright
2,60973129,Decision in Normandy,Carlo D'Este
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata
4,393045218,The Mummies of Urumchi,E. J. W. Barber


In [6]:
df_ratings.head()

Unnamed: 0,user,isbn,rating
0,276725,034545104X,0.0
1,276726,0155061224,5.0
2,276727,0446520802,0.0
3,276729,052165615X,3.0
4,276729,0521795028,6.0


### **Check missing data**

In [7]:
df_books.isnull().sum()

isbn      0
title     0
author    1
dtype: int64

In [8]:
df_books[df_books['author'].isna()]

Unnamed: 0,isbn,title,author
187700,9627982032,The Credit Suisse Guide to Managing Your Perso...,


A short research on amazon has the result, that for this book no author is listed.

In [9]:
df_ratings.isnull().sum()

user      0
isbn      0
rating    0
dtype: int64

### **Merge the dataframes**

"Inner" join because we only want to have books with a rating. If there are some ratings with faulty references, they will be ignored as well with this approach.

In [10]:
df_ratings_books = pd.merge(df_ratings, df_books, how='inner', on='isbn')

In [11]:
df_ratings_books.head()

Unnamed: 0,user,isbn,rating,title,author
0,276725,034545104X,0.0,Flesh Tones: A Novel,M. J. Rose
1,2313,034545104X,5.0,Flesh Tones: A Novel,M. J. Rose
2,6543,034545104X,0.0,Flesh Tones: A Novel,M. J. Rose
3,8680,034545104X,5.0,Flesh Tones: A Novel,M. J. Rose
4,10314,034545104X,9.0,Flesh Tones: A Novel,M. J. Rose


In [12]:
# This is takes up too much space 
# df_rating_books_pivot = df_ratings_books.pivot_table(index='title', columns='user', values='rating').fillna(0)
#
# That's why a smaller sample 5% is used to continue
df_ratings_books_sample = df_ratings_books.sample(frac=.01, random_state=1) 
df_ratings_books_pivot = df_ratings_books_sample.pivot_table(index='title', columns='user', values='rating').fillna(0)

### **Build up KNN classifier**

In [13]:
# Build NearestNeighbors Object
model_nn = NearestNeighbors(metric='cosine', algorithm='auto', n_neighbors=7)

# Fit the NearestNeighbor
model_nn.fit(df_ratings_books_pivot)

### **Build recommendation endpoint**

In [32]:
def get_recommendations(title='') :
    indices = model_nn.kneighbors(df_ratings_books_pivot.loc[[title]], 10, return_distance=False)
    return df_ratings_books_pivot.index[indices][0]

In [34]:
get_recommendations('Speaking In Tongues : A Novel')

  return df_ratings_books_pivot.index[indices][0]


array(['Speaking In Tongues : A Novel',
       'Speaker for the Dead (Ender Wiggins Saga (Paperback))',
       'Sparrowhawk Book One: Jack Frake',
       'Spawn of Dykes to Watch Out for (Dykes to Watch Out for)',
       'Speak',
       'Speak Without Fear : A Total System for Becoming a Natural, Confident Communicator',
       'Speak for the Dead (A Viking novel of mystery and suspense)',
       'Spanish Serenade', 'Special Delivery',
       'Special Forces: A Guided Tour of U.S. Army Special Forces'],
      dtype=object)