In [1]:
import pandas as pd
import numpy as np

In [2]:
# This pickled were engineered in last file (you can check it in BX-EDA)
books = pd.read_pickle('books_cleaned.pkl')
user_reviews = pd.read_pickle('user_reviews.pkl')

Below some reminder of files 

In [3]:
books.head()

Unnamed: 0,book_id,title,author,published,publisher,image_small,image_medium,image_large
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
5,399135782,The Kitchen God's Wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...


In [4]:
user_reviews.head()

Unnamed: 0,user_id,age,income_grp,economy,Longitude,Latitude,book_id,score
0,2,18.0,2,6,-64.803015,17.955006,195153448,0
1,20,19.0,2,6,-64.803015,17.955006,425163091,0
2,42,17.0,2,6,-64.803015,17.955006,553582747,7
3,51,34.0,2,6,-64.803015,17.955006,440225701,9
4,56,24.0,2,6,-64.803015,17.955006,671623249,7


In [5]:
print(user_reviews.shape)
print(books.shape)

(596550, 8)
(182759, 8)


In [6]:
# Combine all three (initial) datasets
combined = pd.merge(user_reviews, books, on = 'book_id')

In [7]:
combined.head()

Unnamed: 0,user_id,age,income_grp,economy,Longitude,Latitude,book_id,score,title,author,published,publisher,image_small,image_medium,image_large
0,2,18.0,2,6,-64.803015,17.955006,195153448,0,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,20,19.0,2,6,-64.803015,17.955006,425163091,0,Chocolate Jesus,Stephan Jaramillo,1998,Berkley Publishing Group,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...
2,95703,31.0,2,6,-64.803015,17.955006,425163091,9,Chocolate Jesus,Stephan Jaramillo,1998,Berkley Publishing Group,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...
3,198776,35.0,2,6,-64.803015,17.955006,425163091,0,Chocolate Jesus,Stephan Jaramillo,1998,Berkley Publishing Group,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...
4,275970,46.0,2,6,-64.803015,17.955006,425163091,0,Chocolate Jesus,Stephan Jaramillo,1998,Berkley Publishing Group,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...,http://images.amazon.com/images/P/0425163091.0...


For recommendation algorithm, we do not need some column (images, concrete title, author and publisher( Later on, I will try to extract some information that might be useful during recommendation))

In [8]:
combined.drop(['image_small', 'image_medium', 'image_large'], axis = 1, inplace = True)

In [9]:
# After many deliberation, I decided to remove all 0 rated cases, cause I think it's not rows that I can rely on
combined = combined.loc[combined['score'] != 0].copy()

# Only 200,000 rows from 1,200,000 were not removed during cleaning (may be I am too strict ;( )

In [10]:
# also remove books that were rated only with 0
books = books[books['book_id'].isin(combined.book_id.unique())].copy()

Instead of working with object type columns like author, publisher; I decided to get median values for them

In [11]:
books['title'] = books['title'].apply(lambda x: x.lower())

##### prefers

In [12]:
# Here I am creating dictionary (hash-map) of publishers as key and their median rating scores as values.
by_publisher = dict(combined.groupby('publisher')['score'].median())

# Then I will give these values to new column: avg_publisher_score for every match in combined table

# So here I take publisher value from every row, and get it's average rating score assigning that value to dictionary
combined['avg_publisher_score'] = combined['publisher'].apply(lambda x: by_publisher[x])

In [13]:
# Finally let's do the same for author
by_author = dict(combined.groupby('author')['score'].median())

combined['avg_author_score'] = combined['author'].apply(lambda x: by_author[x])

In [14]:
# I will need Author, publisher and title data after model building part of this project, So I will create new final dataframe

df = combined.drop(['title', 'author', 'publisher'], axis = 1).copy()

In [15]:
# so this how final df looks like
df.tail(6)

Unnamed: 0,user_id,age,income_grp,economy,Longitude,Latitude,book_id,score,published,avg_publisher_score,avg_author_score
531243,121030,49.0,3,6,11.788629,-0.5866,2264031476,6,2001,7.0,7.0
531244,121030,49.0,3,6,11.788629,-0.5866,2264032405,7,2001,7.0,7.0
531245,140933,30.0,2,2,7.406277,43.752746,2226109501,8,1999,8.0,7.5
531247,247624,26.0,5,7,32.36908,1.274693,974433926,7,2004,7.0,7.0
531251,209389,34.0,5,7,46.704737,-19.371896,2253053287,9,1990,8.0,7.0
531252,209389,34.0,5,7,46.704737,-19.371896,2253097888,7,1994,8.0,7.0


In [16]:
df['user_id'].nunique()

29331

In [17]:
df.shape

(193058, 11)

In [18]:
df['book_id'].nunique()

93700

For now, let's take one step back, and create sparse matrix with rows as unique user, column as unique book id and value as rating of this user to this book.

Here, what i am going to do is remove data that I can't trust. It means books that were rated with very few people, or users who rated only few books

In [19]:
by_book = pd.DataFrame(df.groupby('book_id')['user_id'].count().sort_values(ascending = False)).reset_index()
by_book = by_book.rename(columns={'user_id': 'count'})
by_book = by_book[by_book['count'] >= 5]


In [20]:
by_user = pd.DataFrame(df.groupby('user_id')['book_id'].count().sort_values(ascending = False)).reset_index()
by_user = by_user.rename(columns={'book_id': 'count'})
by_user = by_user[by_user['count'] >= 10]

In [21]:
df = df[(df['user_id'].isin(by_user['user_id'].unique())) & (df['book_id'].isin(by_book['book_id'].unique()))].copy()

In [22]:
df

Unnamed: 0,user_id,age,income_grp,economy,Longitude,Latitude,book_id,score,published,avg_publisher_score,avg_author_score
8,7158,30.0,2,6,-64.803015,17.955006,0553582747,8,2001,8.0,8.0
9,16795,47.0,2,6,-64.803015,17.955006,0553582747,8,2001,8.0,8.0
13,31826,54.0,2,6,-64.803015,17.955006,0553582747,10,2001,8.0,8.0
21,71234,30.0,2,6,-64.803015,17.955006,0553582747,10,2001,8.0,8.0
28,95903,27.0,2,6,-64.803015,17.955006,0553582747,10,2001,8.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...
521541,147649,25.0,4,6,-177.348348,-13.887370,2277260029,9,1998,8.0,8.0
521542,156972,26.0,4,6,-177.348348,-13.887370,2277260029,8,1998,8.0,8.0
521544,223386,27.0,4,6,-177.348348,-13.887370,2277260029,8,1998,8.0,8.0
522485,76158,22.0,4,6,-177.348348,-13.887370,2020551470,9,2002,8.0,9.0


In [23]:
user_book_pivot = df.pivot_table(columns='book_id',index='user_id', values='score').fillna(0)

In [24]:
user_book_pivot.head()

book_id,0006485200,0006551971,0006742939,0007110928,0007122039,0007154615,000716226X,0007170866,0020198817,0020198906,...,849550152X,8495618605,8806142100,880781210X,9074336329,950491036X,9580464162,9681501225,9726101794,9871138148
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
user_book_pivot.shape

(3411, 6438)

In [26]:
user_book_pivot.iloc[0]

book_id
0006485200    0.0
0006551971    0.0
0006742939    0.0
0007110928    0.0
0007122039    0.0
             ... 
950491036X    0.0
9580464162    0.0
9681501225    0.0
9726101794    0.0
9871138148    0.0
Name: 242, Length: 6438, dtype: float64

In [30]:
from scipy.sparse import csr_matrix

user_book_sparse=csr_matrix(user_book_pivot)

In [31]:
from scipy import spatial

spatial.distance.cosine(user_book_sparse[0].todense(), user_book_sparse[0].todense())

0.0

In [32]:
for i in range(1, 11):
    print(spatial.distance.euclidean(user_book_sparse[0].todense(), user_book_sparse[i].todense()))

46.20606020859169
24.020824298928627
25.15949125081825
27.110883423451916
22.538855339169288
27.65863337187866
23.515952032609693
39.01281840626232
41.32795663954365
19.026297590440446


In [33]:
user_book_sparse[0].todense()

matrix([[0., 0., 0., ..., 0., 0., 0.]])

In [34]:
from sklearn.neighbors import NearestNeighbors

In [35]:
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [36]:
cosine_similarity(user_book_sparse)

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.19585962, 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.19585962, ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [37]:
neighbors = NearestNeighbors(n_neighbors = 22, metric='euclidean')

In [38]:
neighbors.fit(user_book_sparse)

NearestNeighbors(metric='euclidean', n_neighbors=22)

In [39]:
user_book_pivot.iloc[666,:].values.reshape(1,-1)

array([[0., 0., 0., ..., 0., 0., 0.]])

In [40]:
user_book_pivot.iloc[1, :].values.reshape(1, -1)

array([[0., 0., 0., ..., 0., 0., 0.]])

In [41]:
distances,suggestions=neighbors.kneighbors(user_book_pivot.iloc[666,:].values.reshape(1,-1))

In [42]:
user_book_pivot.iloc[666,:].values.reshape(1,-1)

array([[0., 0., 0., ..., 0., 0., 0.]])

In [43]:
distances

array([[ 0.        , 51.30302135, 51.7107339 , 51.75905718, 51.90375709,
        51.94227565, 51.94227565, 52.01922721, 52.01922721, 52.05766034,
        52.06726419, 52.06726419, 52.08646657, 52.08646657, 52.08646657,
        52.09606511, 52.17278984, 52.17278984, 52.17278984, 52.17278984,
        52.17278984, 52.17278984]])

In [44]:
suggestions

array([[ 666, 2054, 1057,  234, 2094, 3087,  231,  909,  810,  577, 1834,
        2315,  305, 2314, 2516,  916, 1046, 3063, 2939, 3290,  536, 1033]],
      dtype=int64)

In [45]:
for i in range(len(suggestions)):
    print(user_book_pivot.index[suggestions[i]])

Int64Index([ 55892, 165759,  86680,  19969, 168464, 250163,  19611,  75771,
             67838,  47975, 146781, 185448,  26443, 185359, 204044,  76350,
             86136, 248289, 239106, 268134,  43925,  85280],
           dtype='int64', name='user_id')


In [46]:
ff = pd.DataFrame(user_book_pivot.iloc[54])

In [47]:
user_book_pivot.head()

book_id,0006485200,0006551971,0006742939,0007110928,0007122039,0007154615,000716226X,0007170866,0020198817,0020198906,...,849550152X,8495618605,8806142100,880781210X,9074336329,950491036X,9580464162,9681501225,9726101794,9871138148
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [48]:
recommended_books = {}

curr = 666

suggestions = suggestions[0]

suggestions = suggestions[suggestions != curr]

In [49]:
suggestions

array([2054, 1057,  234, 2094, 3087,  231,  909,  810,  577, 1834, 2315,
        305, 2314, 2516,  916, 1046, 3063, 2939, 3290,  536, 1033],
      dtype=int64)

In [50]:
for i in suggestions:
    
    sim_user = pd.DataFrame(user_book_pivot.iloc[i])
    to_rename = sim_user.columns[0]
    
    sim_user = sim_user.rename(columns={to_rename: 'score'})
    sim_user = sim_user.reset_index()
    
    only_scored = sim_user[sim_user['score'] > 0].copy()
    
    for book in only_scored['book_id'].unique():
        
        if book in recommended_books.keys():
            
            recommended_books[book][0] += 1
            
            recommended_books[book][1] += only_scored.loc[only_scored['book_id'] == book, 'score'].values[0] 
        else:
            recommended_books[book] = [1, only_scored.loc[only_scored['book_id'] == book, 'score'].values[0]]
    
    

In [51]:
list(recommended_books.keys())[0]

'044022165X'

In [52]:
books[books['book_id'] == '0446532231']

Unnamed: 0,book_id,title,author,published,publisher,image_small,image_medium,image_large
2816,446532231,"dude, where's my country?",Michael Moore,2003,Warner Books,http://images.amazon.com/images/P/0446532231.0...,http://images.amazon.com/images/P/0446532231.0...,http://images.amazon.com/images/P/0446532231.0...


In [53]:
recs = pd.DataFrame(recommended_books).T.reset_index().rename(columns = {'index': 'book_id', 0: 'num_of_occr', 1: 'rating'})

In [54]:
recs['rating'] = recs['rating'] / recs['num_of_occr']

In [55]:
recs.sort_values(by=['num_of_occr', 'rating'], ascending = [False, False])

Unnamed: 0,book_id,num_of_occr,rating
0,044022165X,2.0,6.0
25,0971880107,2.0,4.0
6,0064407667,1.0,10.0
9,0440407524,1.0,10.0
18,0385503857,1.0,10.0
5,0812548051,1.0,8.0
4,0446532231,1.0,7.0
7,0064407683,1.0,7.0
8,0380709562,1.0,7.0
17,0679429220,1.0,7.0


In [56]:
prefer = [
    "cocktail classics",
    "from one to one hundred",
    "one hundred years of solitude",
    "fahrenheit 451",
    "memoirs of a geisha",
    "life of pi",
    "the picture of dorian gray (modern library (paperback))",
    "the little prince",
    "alice's adventures in wonderland and through the looking glass",
    "the adventures of tom sawyer",
    "war and peace (wordsworth classics)",
    "old man and the sea",
    "julie and romeo",
    "fight club",
    "jurassic park"
]

prefers_dict = {}
for book in prefer:
    
    prefers_dict[book] = books.loc[books.title == book, 'book_id'].values.tolist()
    
prefers = pd.DataFrame(columns=['book', 'id', 'score'])

In [57]:
prefer = {}
c = 0

for book in prefers_dict.keys():
    
    score = int(input(f'How would you rate {book} book? '))
    
    for unique_id in prefers_dict[book]:
        
        prefers.loc[c] = [book, unique_id, score] 
        c+=1

How would you rate cocktail classics book? 6
How would you rate from one to one hundred book? 6
How would you rate one hundred years of solitude book? 9
How would you rate fahrenheit 451 book? 7
How would you rate memoirs of a geisha book? 7
How would you rate life of pi book? 9
How would you rate the picture of dorian gray (modern library (paperback)) book? 8
How would you rate the little prince book? 9
How would you rate alice's adventures in wonderland and through the looking glass book? 9
How would you rate the adventures of tom sawyer book? 7
How would you rate war and peace (wordsworth classics) book? 9
How would you rate old man and the sea book? 7
How would you rate julie and romeo book? 7
How would you rate fight club book? 9
How would you rate jurassic park book? 8


In [58]:
prefers.drop('book', axis=1, inplace = True)
prefers = prefers.set_index('id').T

In [59]:
prefers

id,1845170423,0525447644,0060929790,038001503X,0060919655,0072434236,3257208626,0345342968,0345410017,8401422825,...,1853260622,0684801221,0684163268,0609606727,0805062971,0393039765,2070422402,0345370775,0394588169,0679430628
score,6,6,9,9,9,9,7,7,7,7,...,9,7,7,7,9,9,9,8,8,8


In [60]:
new_vals = pd.DataFrame(columns = user_book_pivot.columns)

In [61]:
new_vals['0060929790']

Series([], Name: 0060929790, dtype: object)

In [62]:
new_vals.loc[0] = 0

In [64]:
for column in prefers.columns.values:
    
    if column in new_vals.columns.values:
        new_vals[column] = prefers[column].values[0]

In [65]:
new_vals = np.array(new_vals).reshape(1, -1)

In [66]:
prefers[column].values[0]

8

In [78]:
for i in suggestions[0]: 
    print(i)

1818
2960
773
723
3087
231
810
909
305
2516
2314
916
748
3047
536
1046
2939
3063
189
1018
2823
746


In [128]:
def create_recommendation(array):
    
    distances, suggestions = neighbors.kneighbors(array)

    recommended_books = {}

    

    for i in suggestions[0]:
      
        sim_user = pd.DataFrame(user_book_pivot.iloc[i])
     
        
        to_rename = sim_user.columns[0]
       

        sim_user = sim_user.rename(columns={to_rename: 'score'})
       
        sim_user = sim_user.reset_index()
       

        only_scored = sim_user[sim_user['score'] > 0].copy()
     
        print(only_scored.head())
        for book in only_scored['book_id'].unique():
            
            if book in recommended_books.keys():
                
                recommended_books[book][0] += 1
                
                recommended_books[book][1] += only_scored.loc[only_scored['book_id'] == book, 'score'].values[0] 
            else:
                recommended_books[book] = [1, only_scored.loc[only_scored['book_id'] == book, 'score'].values[0]]
             
    
    recs = pd.DataFrame(recommended_books).T.reset_index().rename(columns = {'index': 'book_id', 0: 'num_of_occr', 1: 'rating'})

    recs['rating'] = recs['rating'] / recs['num_of_occr']
    
    recs.sort_values(by=['rating', 'num_of_occr'], ascending = [False, False])

    
    return recs


In [121]:
df.apply(lambda x: x['age'] * x['economy'], axis=1)

8         180.0
9         282.0
13        324.0
21        180.0
28        162.0
          ...  
521541    150.0
521542    156.0
521544    162.0
522485    132.0
522486    126.0
Length: 51272, dtype: float64

In [127]:
books

Unnamed: 0,book_id,title,author,published,publisher,image_small,image_medium,image_large
1,0002005018,clara callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,decision in normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,flu: the story of the great influenza pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
5,0399135782,the kitchen god's wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...
6,0425176428,what if?: the world's foremost military histor...,Robert Cowley,2000,Berkley Publishing Group,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...
...,...,...,...,...,...,...,...,...
271354,381440176X,"ein fall f�?¼r tkkg, bd.50, sklaven f�?¼r wutawia",Stefan Wolf,1989,Pelikan,http://images.amazon.com/images/P/381440176X.0...,http://images.amazon.com/images/P/381440176X.0...,http://images.amazon.com/images/P/381440176X.0...
271371,1845170423,cocktail classics,David Biggs,2004,Connaught,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...
271373,0449906736,flashpoints: promise and peril in a new world,Robin Wright,1993,Ballantine Books,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...
271374,0440400988,there's a bat in bunk five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...


In [129]:
check = create_recommendation(new_vals)

         book_id  score
1149  0345342968    6.0
         book_id  score
1674  038001503X   10.0
2006  0385424728    6.0
         book_id  score
1202  0345370775    7.0
2662  0440222656    6.0
        book_id  score
751  0156027321    8.0
852  0312305060    9.0
         book_id  score
3761  0525455205    1.0
         book_id  score
6337  3492221521    1.0
         book_id  score
3104  0446673544    3.0
         book_id  score
4594  0671642561    3.0
         book_id  score
6213  2266102621    4.0
         book_id  score
5424  080213825X    4.0
         book_id  score
6262  3404144783    4.0
         book_id  score
644   0140440046    4.0
1053  0330332775    1.0
         book_id  score
2374  0425157164    5.0
         book_id  score
6289  3442433495    5.0
         book_id  score
6041  1573225789    5.0
         book_id  score
2493  0425190919    5.0
         book_id  score
5847  0971880107    5.0
         book_id  score
5728  0875421180    5.0
         book_id  score
4810  0679434046   

In [134]:
books

Unnamed: 0,book_id,title,author,published,publisher,image_small,image_medium,image_large
1,0002005018,clara callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,decision in normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,flu: the story of the great influenza pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
5,0399135782,the kitchen god's wife,Amy Tan,1991,Putnam Pub Group,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...,http://images.amazon.com/images/P/0399135782.0...
6,0425176428,what if?: the world's foremost military histor...,Robert Cowley,2000,Berkley Publishing Group,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...,http://images.amazon.com/images/P/0425176428.0...
...,...,...,...,...,...,...,...,...
271354,381440176X,"ein fall f�?¼r tkkg, bd.50, sklaven f�?¼r wutawia",Stefan Wolf,1989,Pelikan,http://images.amazon.com/images/P/381440176X.0...,http://images.amazon.com/images/P/381440176X.0...,http://images.amazon.com/images/P/381440176X.0...
271371,1845170423,cocktail classics,David Biggs,2004,Connaught,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...,http://images.amazon.com/images/P/1845170423.0...
271373,0449906736,flashpoints: promise and peril in a new world,Robin Wright,1993,Ballantine Books,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...,http://images.amazon.com/images/P/0449906736.0...
271374,0440400988,there's a bat in bunk five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...


In [141]:
for book_id in check.book_id.values:
    print(books.loc[books['book_id'] == book_id, 'title'].values[0])

fahrenheit 451
one hundred years of solitude
the chamber
jurassic park
the horse whisperer
life of pi
the hours: a novel
pooh and the philosophers : in which it is shown that all of western philosophy is merely a preamble to winnie-the-pooh
suche impotenten mann f�?¼rs leben.
sex & the city
fallen hearts
les heures / the hours
four blondes
dunkel.
candide (penguin classics)
bridget jones's diary
shadows of steel
der campus.
the color of water: a black man's tribute to his white mother
sea of fire (op-center series, volume 10)
wild animus
wicca: a guide for the solitary practitioner
in the kitchen with rosie: oprah's favorite recipes
metaphysique des tubes
dreamcatcher
the electric kool-aid acid test
