In [1]:
#Load & Preprocessed Data
import pandas as pd

books = pd.read_csv('datasets/books.csv')
books.head(5)

Unnamed: 0,id,book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [5]:
#Split data into features and target
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = books[['book_id','title']]
df = df.sample(frac=1, random_state=42)
df = df.head(10000)
df.isnull().sum()
tf = TfidfVectorizer()
tf.fit(df['title']) 
tfidf_matrix = tf.fit_transform(df['title']) 
tfidf_matrix.todense()
pd.DataFrame(
    tfidf_matrix.todense(), 
    columns=tf.get_feature_names(),
    index=df.title
).sample(22, axis=1).sample(10, axis=0)
cosine_sim = cosine_similarity(tfidf_matrix) 
cosine_sim_df = pd.DataFrame(cosine_sim, index=df['title'], columns=df['title'])

In [17]:
#Model
def books_recommendations(title, similarity_data=cosine_sim_df, items=df[['book_id', 'title']], k=15):
    index = similarity_data.loc[:,title].to_numpy().argpartition(
        range(-1, -k, -1))
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    return pd.DataFrame(closest).merge(items).head(k)

In [14]:
validasi = df[df.title.eq('The Tenth Circle')]
validasi

Unnamed: 0,book_id,title
907,10909,The Tenth Circle


In [15]:
predict = books_recommendations('The Tenth Circle')
predict

Unnamed: 0,title,book_id
0,The Tenth Circle,10909
1,The Circle,18302455
2,The Tenth Justice,85321
3,Tenth of December,13641208
4,Circle of Friends,41977
5,The Calvin and Hobbes Tenth Anniversary Book,24813
6,"The Black Circle (The 39 Clues, #5)",6255144
7,"White: The Great Pursuit (The Circle, #3)",125955
8,"The Ruby Circle (Bloodlines, #6)",8709528
9,"Dance of the Gods (Circle Trilogy, #2)",31313


In [18]:
import pickle
import joblib
saved_model = pickle.dumps(books_recommendations)

In [19]:
joblib.dump(books_recommendations, 'model.pkl')

['model.pkl']

In [28]:
import numpy as np 
test_data = "The Calvin and Hobbes Tenth Anniversary Book"
#print(test_data)

In [29]:
#declare path where you saved your model
outFileFolder = 'datasets/'

filePath = outFileFolder + 'model.pkl'
#Open File
file = open(filePath, "rb")

#Load model
trained_model = joblib.load(file)



In [30]:
prediction = trained_model(test_data)
print(prediction)

                                                title  book_id
0        The Calvin and Hobbes Tenth Anniversary Book    24813
1                                   Calvin and Hobbes    77727
2   The Essential Calvin and Hobbes: A Calvin and ...    43070
3                      The Complete Calvin and Hobbes    24812
4   The Authoritative Calvin and Hobbes: A Calvin ...    59715
5                 The Indispensable Calvin and Hobbes    24815
6              The Calvin and Hobbes Lazy Sunday Book    24494
7                                The Last Anniversary  1282954
8   It's a Magical World: A Calvin and Hobbes Coll...    24814
9   There's Treasure Everywhere: A Calvin and Hobb...    70489
10  The Days Are Just Packed: A Calvin and Hobbes ...    24818
11                                   The Tenth Circle    10909
12                                  The Tenth Justice    85321
13  Homicidal Psycho Jungle Cat: A Calvin and Hobb...    24816
14            The Fannie Farmer Cookbook: Anniversary  