In [28]:
import pymongo
import json
import os
import pandas as pd
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import pickle

In [29]:
# import the connecting credentials 
sys.path.append('../')
from secret import credentials
sys.path.remove('../')

In [30]:
## mongo API connection
sec = credentials()
mongo_uri = sec.connect_string

myclient = pymongo.MongoClient(mongo_uri)

In [31]:
print(myclient.list_database_names())

['bookdb', 'admin', 'local']


In [32]:
mydb = myclient['bookdb']
col_books = mydb['books']
col_ratings = mydb['ratings']
col_users = mydb['users']

In [33]:
# Query the data (this example retrieves all documents in the collection)
data = list(col_books.find({}))
# Convert the data to a Pandas DataFrame
books = pd.DataFrame(data)

# Query the data (this example retrieves all documents in the collection)
data = list(col_ratings.find({}))
# Convert the data to a Pandas DataFrame
ratings = pd.DataFrame(data)

# Query the data (this example retrieves all documents in the collection)
data = list(col_users.find({}))
# Convert the data to a Pandas DataFrame
users = pd.DataFrame(data)

In [34]:
books.drop('_id', axis=1, inplace= True)
ratings.drop('_id', axis=1, inplace= True)
users.drop('_id', axis=1, inplace= True)

## Data Preprocessing 

In [35]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [36]:
# select the columns that needed 
books = books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-L']]

In [37]:
# rename the columns
books.rename(columns={
    'Book-Title':'title',
    'Book-Author':'author',
    'Year-Of-Publication':'year',
    'Image-URL-L':'Image_URL'
}, inplace=True)

In [38]:
books.head()

Unnamed: 0,ISBN,title,author,year,Publisher,Image_URL
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...


In [39]:
ratings.rename(columns={
    'User-ID': 'user_id',
    'Book-Rating': 'rating'
}, inplace=True)

In [40]:
ratings.head()

Unnamed: 0,user_id,ISBN,rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [41]:
users.rename(columns={
    'User-ID':'user_id'
}, inplace=True)

In [42]:
users.head()

Unnamed: 0,user_id,Location,country,city,Age
0,1.0,"nyc, new york, usa",usa,nyc,
1,2.0,"stockton, california, usa",usa,stockton,18.0
2,3.0,"moscow, yukon territory, russia",russia,moscow,
3,4.0,"porto, v.n.gaia, portugal",portugal,porto,17.0
4,5.0,"farnborough, hants, united kingdom",united kingdom,farnborough,


## Data Cleaning 

In [43]:
ratings['user_id'].value_counts()

11676     13602
198711     7550
153662     6109
98391      5891
35859      5850
          ...  
116180        1
116166        1
116154        1
116137        1
276723        1
Name: user_id, Length: 105283, dtype: int64

In [44]:
# we are select users rating more than 200 books
# create a flieter 

flit = ratings['user_id'].value_counts() > 200

In [45]:
flit_index = flit[flit].index

In [46]:
ratings = ratings[ratings['user_id'].isin(flit_index)]

In [47]:
# new ratings after flitering
ratings

Unnamed: 0,user_id,ISBN,rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8
1459,277427,0030615321,0
1460,277427,0060002050,0
...,...,...,...
1147612,275970,3829021860,0
1147613,275970,4770019572,0
1147614,275970,896086097,0
1147615,275970,9626340762,8


In [48]:
###################filter########## 30 books

In [49]:
ratings['ISBN'].value_counts()

0971880107    363
0316666343    270
0060928336    220
0440214041    218
0385504209    215
             ... 
0451211170      1
0451211073      1
0451210484      1
0451210476      1
9626344990      1
Name: ISBN, Length: 207291, dtype: int64

In [50]:
# Flieter rating, select num of rating greater than 30
flit = ratings['ISBN'].value_counts() > 30

In [51]:
flit_index = flit[flit].index

In [52]:
ratings = ratings[ratings['ISBN'].isin(flit_index)]

In [53]:
# new ratings after flitering
ratings

Unnamed: 0,user_id,ISBN,rating
1456,277427,002542730X,10
1468,277427,006092988X,0
1469,277427,0060930535,0
1471,277427,0060934417,0
1474,277427,0061009059,9
...,...,...,...
1147439,275970,1400031346,0
1147440,275970,1400031354,0
1147441,275970,1400031362,0
1147470,275970,1558744606,0


In [54]:
# merge ratings and books
ratings_with_books = ratings.merge(books, on='ISBN', how='inner')

In [55]:
ratings_with_books

Unnamed: 0,user_id,ISBN,rating,title,author,year,Publisher,Image_URL
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
...,...,...,...,...,...,...,...,...
72218,235935,0451180216,0,Interest of Justice,Nancy Taylor Rosenberg,1994,Signet Book,http://images.amazon.com/images/P/0451180216.0...
72219,238545,0451180216,0,Interest of Justice,Nancy Taylor Rosenberg,1994,Signet Book,http://images.amazon.com/images/P/0451180216.0...
72220,241666,0451180216,0,Interest of Justice,Nancy Taylor Rosenberg,1994,Signet Book,http://images.amazon.com/images/P/0451180216.0...
72221,242106,0451180216,0,Interest of Justice,Nancy Taylor Rosenberg,1994,Signet Book,http://images.amazon.com/images/P/0451180216.0...


In [56]:
raing_countBy_books = ratings_with_books.groupby('title')['rating'].count().reset_index()
raing_countBy_books.head()

Unnamed: 0,title,rating
0,16 Lighthouse Road,38
1,1984,52
2,1st to Die: A Novel,162
3,2010: Odyssey Two,43
4,204 Rosewood Lane,40


In [57]:
raing_countBy_books.rename(columns={
    'rating':'num_of_rating'
}, inplace=True)

In [58]:
raing_countBy_books.head()

Unnamed: 0,title,num_of_rating
0,16 Lighthouse Road,38
1,1984,52
2,1st to Die: A Novel,162
3,2010: Odyssey Two,43
4,204 Rosewood Lane,40


In [60]:
final_rating = raing_countBy_books.merge(ratings_with_books, on='title')
final_rating.head()

Unnamed: 0,title,num_of_rating,user_id,ISBN,rating,author,year,Publisher,Image_URL
0,16 Lighthouse Road,38,11676,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
1,16 Lighthouse Road,38,30276,1551668300,6,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
2,16 Lighthouse Road,38,43842,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
3,16 Lighthouse Road,38,52614,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
4,16 Lighthouse Road,38,76352,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...


In [61]:
final_rating.shape

(72223, 9)

In [63]:
final_rating

Unnamed: 0,title,num_of_rating,user_id,ISBN,rating,author,year,Publisher,Image_URL
0,16 Lighthouse Road,38,11676,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
1,16 Lighthouse Road,38,30276,1551668300,6,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
2,16 Lighthouse Road,38,43842,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
3,16 Lighthouse Road,38,52614,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
4,16 Lighthouse Road,38,76352,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...
...,...,...,...,...,...,...,...,...,...
72218,"\O\"" Is for Outlaw""",105,234359,0805059555,0,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...
72219,"\O\"" Is for Outlaw""",105,234623,0805059555,0,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...
72220,"\O\"" Is for Outlaw""",105,236283,0805059555,10,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...
72221,"\O\"" Is for Outlaw""",105,238120,0805059555,0,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...


In [None]:
############################

In [62]:
# # merge ratings and books
# ratings_with_books = ratings.merge(books, on='ISBN', how='inner')

In [23]:
# ratings_with_books

Unnamed: 0,user_id,ISBN,rating,title,author,year,Publisher,Image_URL
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
...,...,...,...,...,...,...,...,...
487666,275970,1892145022,0,Here Is New York,E. B. White,1999,Little Bookroom,http://images.amazon.com/images/P/1892145022.0...
487667,275970,1931868123,0,There's a Porcupine in My Outhouse: Misadventu...,Mike Tougias,2002,Capital Books (VA),http://images.amazon.com/images/P/1931868123.0...
487668,275970,3411086211,10,Die Biene.,Sybil GrÃ?Â¤fin SchÃ?Â¶nfeldt,1993,"Bibliographisches Institut, Mannheim",http://images.amazon.com/images/P/3411086211.0...
487669,275970,3829021860,0,The Penis Book,Joseph Cohen,1999,Konemann,http://images.amazon.com/images/P/3829021860.0...


In [24]:
# raing_countBy_books = ratings_with_books.groupby('title')['rating'].count().reset_index()


In [25]:
# raing_countBy_books.rename(columns={
#     'rating':'num_of_rating'
# }, inplace=True)

In [26]:
# # this is the num of rating by books
# raing_countBy_books.head()

Unnamed: 0,title,num_of_rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [27]:
# final_rating = raing_countBy_books.merge(ratings_with_books, on='title')

In [28]:
# # this is the final rating dataframe contain num of rating
# final_rating.head()

Unnamed: 0,title,num_of_rating,user_id,ISBN,rating,author,year,Publisher,Image_URL
0,A Light in the Storm: The Civil War Diary of ...,2,35859,590567330,0,Karen Hesse,1999,Hyperion Books for Children,http://images.amazon.com/images/P/0590567330.0...
1,A Light in the Storm: The Civil War Diary of ...,2,96448,590567330,9,Karen Hesse,1999,Hyperion Books for Children,http://images.amazon.com/images/P/0590567330.0...
2,Always Have Popsicles,1,172742,964147726,0,Rebecca Harvin,1994,Rebecca L. Harvin,http://images.amazon.com/images/P/0964147726.0...
3,Apple Magic (The Collector's series),1,198711,942320093,0,Martina Boudreau,1984,Amer Cooking Guild,http://images.amazon.com/images/P/0942320093.0...
4,Beyond IBM: Leadership Marketing and Finance ...,1,11601,962295701,0,Lou Mobley,1989,"Teleonet, Incorporated",http://images.amazon.com/images/P/0962295701.0...


In [29]:
# # Flieter rating, select num of rating greater than 30
# final_rating= final_rating[final_rating['num_of_rating']>30]

In [30]:
# final_rating.shape

(96548, 9)

In [31]:
# final_rating.drop_duplicates(subset=['title','user_id'], inplace=True)

In [32]:
# # this is the final Rating after data Cleaning 
# final_rating

Unnamed: 0,title,num_of_rating,user_id,ISBN,rating,author,year,Publisher,Image_URL
290,10 Lb. Penalty,35,39281,0515123471,0,Dick Francis,1998,Jove Books,http://images.amazon.com/images/P/0515123471.0...
291,10 Lb. Penalty,35,49109,0515123471,5,Dick Francis,1998,Jove Books,http://images.amazon.com/images/P/0515123471.0...
292,10 Lb. Penalty,35,69405,0515123471,0,Dick Francis,1998,Jove Books,http://images.amazon.com/images/P/0515123471.0...
293,10 Lb. Penalty,35,76352,0515123471,0,Dick Francis,1998,Jove Books,http://images.amazon.com/images/P/0515123471.0...
294,10 Lb. Penalty,35,83287,0515123471,10,Dick Francis,1998,Jove Books,http://images.amazon.com/images/P/0515123471.0...
...,...,...,...,...,...,...,...,...,...
487432,"\O\"" Is for Outlaw""",105,234359,0805059555,0,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...
487433,"\O\"" Is for Outlaw""",105,234623,0805059555,0,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...
487434,"\O\"" Is for Outlaw""",105,236283,0805059555,10,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...
487435,"\O\"" Is for Outlaw""",105,238120,0805059555,0,Sue Grafton,1999,Henry Holt &amp; Company,http://images.amazon.com/images/P/0805059555.0...


## Feature engineering

In [64]:
pivot = final_rating.pivot_table(columns='user_id', index='title', values='rating')

In [65]:
pivot.fillna(0, inplace=True)

In [66]:
pivot

user_id,254,2276,2766,2977,3363,4017,4385,6242,6251,6323,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16 Lighthouse Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2010: Odyssey Two,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204 Rosewood Lane,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
# fix 0 in the datasets
book_sparse = csr_matrix(pivot)

In [68]:
book_sparse

<1259x886 sparse matrix of type '<class 'numpy.float64'>'
	with 17141 stored elements in Compressed Sparse Row format>

## Data Modeling 

In [69]:
model = NearestNeighbors(algorithm='brute')

In [70]:
model.fit(book_sparse)

NearestNeighbors(algorithm='brute')

In [71]:
# find the 6 cloested distance of 6 neigbors(include itself)
distance, suggestion = model.kneighbors(pivot.iloc[1].values.reshape(1,-1), n_neighbors=6)

In [72]:
distance

array([[ 0.        , 42.09513036, 43.        , 43.03486958, 43.12771731,
        43.16248371]])

In [73]:
suggestion

array([[  1, 610, 533, 872, 364,  52]], dtype=int64)

In [74]:
suggestion.shape

(1, 6)

In [75]:
# This is the 5 books that are recommended to 16 Lighthouse Road
for i in range(len(suggestion[0])):
    print(pivot.index[suggestion[0][i]])

1984
No Safe Place
Malice
The Bookman's Wake (Cliff Janeway Novels (Paperback))
Gates of Paradise
AGE OF INNOCENCE (MOVIE TIE-IN)


In [76]:
books_name = pivot.index

In [77]:
# Here is all the bookname
books_name

Index(['16 Lighthouse Road', '1984', '1st to Die: A Novel',
       '2010: Odyssey Two', '204 Rosewood Lane', '24 Hours', '2nd Chance',
       '4 Blondes',
       'A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper))',
       'A Beautiful Mind: The Life of Mathematical Genius and Nobel Laureate John Nash',
       ...
       'Without Remorse',
       'Witness in Death (Eve Dallas Mysteries (Paperback))',
       'Women Who Run with the Wolves',
       'Word Freak: Heartbreak, Triumph, Genius, and Obsession in the World of Competitive Scrabble Players',
       'Wuthering Heights', 'Year of Wonders', 'You Belong To Me',
       'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',
       'Zoya', '\O\" Is for Outlaw"'],
      dtype='object', name='title', length=1259)

In [78]:
# creat the recommendation system

def recommend_book(book_name):
    book_id = np.where(pivot.index == book_name)[0][0]
    distance, suggestion = model.kneighbors(pivot.iloc[book_id].values.reshape(1,-1), n_neighbors=6)
    for i in range(len(suggestion[0])):
        print(pivot.index[suggestion[0][i]])


In [101]:
# testing 
recommend_book("Harry Potter and the Sorcerer's Stone (Book 1)")

Harry Potter and the Sorcerer's Stone (Book 1)
The Mammoth Hunters (Auel, Jean M. , Earth's Children.)
The Charm School
Eaters of the Dead
Fortune's Hand
The List


In [48]:
# # testing 
# recommend_book('You Belong To Me')

You Belong To Me
The Cradle Will Fall
Exclusive
Loves Music, Loves to Dance
The Anastasia Syndrome
Gates of Paradise


In [None]:
##################

In [82]:
user_merge_ratings = final_rating.merge(users, on='user_id')
user_merge_ratings.head()

Unnamed: 0,title,num_of_rating,user_id,ISBN,rating,author,year,Publisher,Image_URL,Location,country,city,Age
0,16 Lighthouse Road,38,11676,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...,"n/a, n/a, n/a",,,
1,1984,52,11676,451524934,0,George Orwell,1990,Signet Book,http://images.amazon.com/images/P/0451524934.0...,"n/a, n/a, n/a",,,
2,1st to Die: A Novel,162,11676,446610038,10,James Patterson,2002,Warner Vision,http://images.amazon.com/images/P/0446610038.0...,"n/a, n/a, n/a",,,
3,1st to Die: A Novel,162,11676,316666009,8,James Patterson,2001,Little Brown and Company,http://images.amazon.com/images/P/0316666009.0...,"n/a, n/a, n/a",,,
4,204 Rosewood Lane,40,11676,1551669293,0,Debbie Macomber,2002,Mira,http://images.amazon.com/images/P/1551669293.0...,"n/a, n/a, n/a",,,


In [83]:
def clean_us(country):
    if country == 'us':
        return 'usa'
    else:
        return country

In [84]:
user_merge_ratings['country'] = user_merge_ratings['country'].apply(clean_us)

In [85]:
user_merge_ratings.head()

Unnamed: 0,title,num_of_rating,user_id,ISBN,rating,author,year,Publisher,Image_URL,Location,country,city,Age
0,16 Lighthouse Road,38,11676,1551668300,0,Debbie Macomber,2001,Mira,http://images.amazon.com/images/P/1551668300.0...,"n/a, n/a, n/a",,,
1,1984,52,11676,451524934,0,George Orwell,1990,Signet Book,http://images.amazon.com/images/P/0451524934.0...,"n/a, n/a, n/a",,,
2,1st to Die: A Novel,162,11676,446610038,10,James Patterson,2002,Warner Vision,http://images.amazon.com/images/P/0446610038.0...,"n/a, n/a, n/a",,,
3,1st to Die: A Novel,162,11676,316666009,8,James Patterson,2001,Little Brown and Company,http://images.amazon.com/images/P/0316666009.0...,"n/a, n/a, n/a",,,
4,204 Rosewood Lane,40,11676,1551669293,0,Debbie Macomber,2002,Mira,http://images.amazon.com/images/P/1551669293.0...,"n/a, n/a, n/a",,,


In [86]:
user_merge_ratings.groupby(['country'])['num_of_rating'].sum().sort_values(ascending=False)

country
usa                   4693341
canada                 463380
united kingdom         110408
n/a                     68678
australia               60672
portugal                44315
                        36065
new zealand             24917
malaysia                24279
germany                 19892
spain                   15937
netherlands             14812
iran                     9918
france                   7830
china                    7206
italy                    6724
brazil                   6154
romania                  5728
qatar                    3688
belgium                  3304
switzerland              3273
dominican republic       2479
austria                  1785
japan                    1664
finland                  1398
sweden                   1223
denmark                   345
Name: num_of_rating, dtype: int64

In [87]:
region_unique = user_merge_ratings['country'].unique().tolist()

In [88]:
region_unique.remove('')
region_unique.remove('n/a')

In [89]:
region_unique

['usa',
 'canada',
 'portugal',
 'united kingdom',
 'denmark',
 'spain',
 'australia',
 'brazil',
 'malaysia',
 'germany',
 'netherlands',
 'iran',
 'china',
 'austria',
 'italy',
 'qatar',
 'sweden',
 'new zealand',
 'france',
 'belgium',
 'romania',
 'finland',
 'switzerland',
 'japan',
 'dominican republic']

In [90]:
with open('../py_objects/region_unique.pkl', 'wb') as file:
    pickle.dump(region_unique, file)  

In [103]:
# store the saved dataframe
user_merge_ratings.to_csv('../data/final_merge_table.csv', index=False)

In [104]:
#######################################

In [105]:
# This is important
pivot.to_csv('../py_objects/pivot.csv', index=True)

In [106]:
# this is important
final_rating.to_csv('../py_objects/final_rating.csv', index=True)

In [107]:
# Save model and python object
with open('../py_objects/model.pkl', 'wb') as file:
    pickle.dump(model, file)

with open('../py_objects/books_name.pkl', 'wb') as file:
    pickle.dump(books_name, file)

with open('../py_objects/final_rating.pkl', 'wb') as file:
    pickle.dump(final_rating, file)

with open('../py_objects/pivot.pkl', 'wb') as file:
    pickle.dump(pivot, file)