In [1]:
import pandas as pd
import numpy as np

import nltk
import gensim
from gensim import corpora, models
from sklearn.decomposition import LatentDirichletAllocation, TruncatedSVD

import warnings
warnings.filterwarnings('ignore')

from IPython.display import clear_output
import timeit

import pickle

In [2]:
#Import cleaned dataset fiction books only 
books = pd.read_csv('books_cleaned_v4.csv')
#Import cleaned reviews
reviews = pd.read_csv('reviews_cleaned_short.csv')
#Import doc-topic model
df_document_topic = pd.read_csv('LDA50kfictionnewclean/df_document_topic.csv')

In [3]:
books.head()

Unnamed: 0,isbn10,isbn13,rating-avg,rating-count,title,description,genres,description_original,description_length,genres_length
0,552576484,9780553000000.0,4.07,287187.0,"Everything, Everything",major motion picture starring amanda stenberg ...,"'young-adult', 'romance', 'contemporary', 'fic...","<b><i>Everything, Everything</i> is now a majo...",61,17
1,241255775,9780241000000.0,3.63,1129.0,Anna of the Five Towns,deeply moving original dealing material encoun...,"'classics', 'fiction', 'historical-fiction', '...","<b>'Deeply moving, original, and dealing with ...",54,18
2,1536607398,9781537000000.0,4.11,893.0,Ghost Run,acclaimed eagerly anticipated fourth thriller ...,"'zombies', 'horror', 'post-apocalyptic', 'fict...",The acclaimed and eagerly anticipated fourth t...,82,12
3,719078385,9780719000000.0,3.23,440.0,"Epicene, or the Silent Woman : By Ben Jonson",epicene widely studied johnson play brilliantl...,"'plays', 'drama', 'classics', '17th-century', ...",<i>Epicene </i>is now one of the most widely-s...,93,15
4,60539526,9780061000000.0,4.01,274.0,Little Critter : The Lost Dinosaur Bone,little critter class going critterville museum...,"'picture-books', 'dinosaurs', 'science', 'fict...",Little Critter's class is going to the Critter...,35,9


In [4]:
books.shape

(95508, 10)

In [5]:
books = books[:50000]

In [6]:
books.shape

(50000, 10)

In [7]:
#Document Term Matrix 
df_document_topic = df_document_topic.drop(columns = ['Unnamed: 0'], axis = 1)

In [8]:
df_document_topic.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,...,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06,8e-06
1,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,...,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06,9e-06
2,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,...,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06,6e-06
3,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,...,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06
4,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05


In [9]:
df_document_topic.shape

(50000, 2000)

### LDA Model 

In [10]:
#load vectorizer 
vectorizer = pickle.load(open("LDA50kfictionnewclean/vectorizer.pk", "rb"))

In [11]:
#load trained LDA model 
lda_model = pickle.load(open("LDA50kfictionnewclean/lda_model.pk", "rb"))

### Search - Cosine Similarity

In [13]:
import re
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer 
add_sw = ['new','york','times','bestseller','bestselling','author','prize','putlizer']
sw = STOPWORDS.union(set(add_sw))
eng_words = set(nltk.corpus.words.words())

def clean_text(text):
    text = text.lower()
    text = re.sub('<.*?>', ' ', text) #remove tags 
    text = re.sub(r'[^a-zA-Z]',' ',text) #remove anything that is not an alphabet 
    #Remove stop words
    text = re.split(r'[^\w]+',text) 
    text_filtered = [w for w in text if not w in sw]
    #Lemmatize 
    lemmatizer = WordNetLemmatizer()
    text_lemmatized = [lemmatizer.lemmatize(w) for w in text_filtered]
    #remove short words
    text_filtered = [w for w in text_lemmatized if len(w)>1]
    text_filtered = ' '.join(text_filtered).strip()
    return text_filtered

In [14]:
from sklearn.metrics.pairwise import cosine_similarity
import heapq
num_topics = 2000
def search_books(text, number_books_recommend, popular = 1):
    start = timeit.default_timer()
    #clean new text input 
    text = clean_text(text)
    #vectorize clean text
    text_vectorized = vectorizer.transform([text])
    topic_probability = lda_model.transform(text_vectorized)
    
    #compute similarity
    similarity_array = cosine_similarity(df_document_topic, topic_probability, dense_output=True)
    #most_similar_books = heapq.nlargest(number_books_recommend, range(len(similarity_array)), similarity_array.__getitem__)
    book_copy = books.copy()
    book_copy['similarity'] = similarity_array
    
    #sort 
    book_copy = book_copy.sort_values(by = 'similarity', ascending = False)
    
    #popular books - filter books that have more than 100000 ratings 
    if popular != 0: 
        book_copy = book_copy[book_copy['rating-count']>100000]

    #print recommended books
    most_similar_books = book_copy.index[:number_books_recommend]

    for book_index in most_similar_books:
        print(book_copy.loc[book_index,'title'])
        print(book_copy.loc[book_index,'description_original'])
        print('ISBN:',book_copy.loc[book_index,'isbn13'])
        print('Rating:',book_copy.loc[book_index,'rating-avg'])
        print('Number of ratings:',book_copy.loc[book_index,'rating-count'])
        print('Cosine Similarity:', book_copy.loc[book_index,'similarity'])
        print('Genre:', book_copy.loc[book_index,'genres'])
        #print('Rank:', book_copy.loc[book_index,'rank'])
        print('\n')
        
    stop = timeit.default_timer()
    print('Run time:', np.round((stop-start)/60, 2), "minutes")

In [15]:
#fantasy 
search_books('wizarding magic',10,popular = 1)

Harry Potter and the Prisoner of Azkaban - Slytherin Edition
Harry Potter's third year at Hogwarts is full of new dangers. A convicted murderer, Sirius Black, has broken out of Azkaban prison, and it seems he's after Harry. Now Hogwarts is being patrolled by the dementors, the Azkaban guards who are hunting Sirius. But Harry can't imagine that Sirius or, for that matter, the evil Lord Voldemort could be more frightening than the dementors themselves, who have the terrible power to fill anyone they come across with aching loneliness and despair. Meanwhile, life continues as usual at Hogwarts. A top-of-the-line broom takes Harry's success at Quidditch, the sport of the Wizarding world, to new heights. A cute fourth-year student catches his eye. And he becomes close with the new Defense of the Dark Arts teacher, who was a childhood friend of his father. Yet despite the relative safety of life at Hogwarts and the best efforts of the dementors, the threat of Sirius Black grows ever closer. 

In [18]:
# thriller 
search_books('serial killer murder detective',10, popular = 1)

Darkly Dreaming Dexter : Book One
See alternate cover editions: <a href="https://www.goodreads.com/book/show/12091479-darkly-dreaming-dexter" rel="nofollow">here</a>, <a href="https://www.goodreads.com/book/show/8440211-darkly-dreaming-dexter" rel="nofollow">here</a>, and <a href="https://www.goodreads.com/book/show/6634993-darkly-dreaming-dexter" rel="nofollow">here</a><br /><br />Dexter Morgan appears to be the perfect gentleman. He leads a normal, quiet life working as a forensic officer for the Miami police. He has a nice, shy girlfriend and is liked by her young children.<br /><br />But Dexter has a secret hobby. He's an accomplished serial killer. So far he's killed dozens of people and has never been caught, because he knows exactly how to dispose of the evidence. And there are those who would rather he wasn't caught at all, because Dexter is a serial killer with a difference. He only kills the city's bad guys. Then Dexter's well-organized life is thrown into chaos. Another seri

In [16]:
#science fiction - dystopian 
search_books('dystopian end of world',10, popular = 1)

Nineteen Eighty-Four
It is 1984. The world is in a state of perpetual war and Big Brother sees and controls all. Winston Smith, a member of the Outer Party and propaganda-writer at the Ministry of Truth, is keeping a journal he should not be keeping and falling in love with Julia, a woman he should not be seeing. Outwardly compliant, Winston dreams of rebellion against the oppressive Big Brother, risking everything to recover his lost sense of individuality and control of his own future. One of the bestselling books of the twentieth century, 1984 is the dystopian classic that introduced such Orwellian terms as 'Big Brother, ' 'doublethink, ' 'Newspeak, ' and 'thoughtcrime' to the collective consciousness, giving official terminology to state-sanctioned deception, surveillance, and historical revisioni
ISBN: 9780241341650.0
Rating: 4.17
Number of ratings: 2669674.0
Cosine Similarity: 0.9878170204134447
Genre: 'classics', 'fiction', 'science-fiction', 'dystopia', 'literature', 'novels', 

### Model Testing

In [20]:
#books that have reviews
books_reviews = books[books['isbn13'].isin(reviews['isbn13'])]
print('Number of books with reviews:', len(books_reviews))

Number of books with reviews: 1435


In [39]:
def search_books_score(text, number_books_recommend):
    predicted_isbn = []
    #clean new text input 
    text = clean_text(text)
    #vectorize clean text
    text_vectorized = vectorizer.transform([text])
    topic_probability = lda_model.transform(text_vectorized)
    
    #compute similarity
    similarity_array = cosine_similarity(df_document_topic, topic_probability, dense_output=True)
    #most_similar_books = heapq.nlargest(number_books_recommend, range(len(similarity_array)), similarity_array.__getitem__)
    book_copy = books.copy()
    book_copy['similarity'] = similarity_array
    
    #sort 
    book_copy = book_copy.sort_values(by = 'similarity', ascending = False)

    #print recommended books
    most_similar_books = book_copy.index[:number_books_recommend]
    
    for book_index in most_similar_books:
        isbn = book_copy.loc[book_index,'isbn13']
        predicted_isbn.append(isbn)
    
    return predicted_isbn

In [40]:
def test_df(df,number_books_recommend):
    start = timeit.default_timer()
    for x in range(0,len(df)):
        clear_output(wait=True)

        isbn = df.loc[x,'isbn13']
        text = df.loc[x,'review']
        predicted_isbn = search_books_score(text,number_books_recommend)
        df.at[x,'predicted_isbn'] = predicted_isbn

        stop = timeit.default_timer()

        print('Current progress: {} out of {} rows'.format(x+1,len(df)))
        print('Current run time:', np.round((stop-start)/60, 2), "minutes")
    
    print('Computing Score') 
    
    df['intersection'] = df.apply(lambda x: x['isbn13'] in x['predicted_isbn'] ,axis = 1)
    df['score'] = np.where(df['intersection'] == False,0,1)
    
    accuracy = df['score'].sum() / len(df) * 100
    print('Accuracy: {} %'.format(accuracy))
    
    return df

In [41]:
def show_books(df,correct = 1): 
    if correct == 1:   
        filtered_index = df[df['score'] == 1].index
        
    #for 3 correctly predicted reviews
        for x in range(0,3):   
            index_1 = filtered_index[x]
            review_isbn = df.loc[index_1,'isbn13']
            review = df.loc[index_1,'review']
            description = books[books['isbn13'] == review_isbn]['description_original'].values
            title = books[books['isbn13'] == review_isbn]['title'].values
            
            print('\n-----Review-----')
            print('Book title for review:', title)
            print('Review:')
            print(review)

            print('\n-----Target book------')
            print('Title:',title)
            print(description)

    else:
        filtered_index = df[df['score'] == 0].index
    
    #for 3 wrongly predicted reviews
        for x in range(0,3):    
            index_1 = filtered_index[x]
            review_isbn = df.loc[index_1,'isbn13']
            review = df.loc[index_1,'review']

            description = books[books['isbn13'] == review_isbn]['description_original'].values
            title = books[books['isbn13'] == review_isbn]['title'].values
            
            print('\n-----Review-----')
            print('Book title for review:', title)
            print('Review:')
            print(review)

            print('\n-----Target Book------')
            print('Title:',title)
            print(description)
        

### Model Testing - Fantasy Books

In [29]:
fantasy = books_reviews[books_reviews['genres'].str.contains('fantasy',na = False)]
print('Number of fantasy books with at least 1 review:', len(fantasy))

fantasy_reviews = reviews[reviews['isbn13'].isin(fantasy['isbn13'])]
print('Number of reviews for fantasy books:', len(fantasy_reviews))
fantasy_reviews_sample = fantasy_reviews.sample(200, random_state = 20).reset_index(drop = True)
fantasy_reviews_sample.shape

Number of fantasy books with at least 1 review: 557
Number of reviews for fantasy books: 2871


(200, 6)

In [42]:
fantasy_reviews_sample = fantasy_reviews_sample[['isbn13','review']]
fantasy_reviews_sample['predicted_isbn'] = None
fantasy_reviews_sample['predicted_isbn'] = fantasy_reviews_sample['predicted_isbn'].astype(object)

In [44]:
test_fantasy = test_df(fantasy_reviews_sample,10)
test_fantasy

Current progress: 200 out of 200 rows
Current run time: 10.47 minutes
Computing Score
Accuracy: 5.0 %


Unnamed: 0,isbn13,review,predicted_isbn,intersection,score
0,9.780425e+12,"\nThis is a book.For a full review, please go ...","[9781524776251.0, 9781401206123.0, 97814012098...",False,0
1,9.780425e+12,Ehh.. this one was a tad confusing. I don't wa...,"[9781401690533.0, 9780571329748.0, 97815098807...",False,0
2,9.781847e+12,2.5? Phil's ex Lola (not the car) created a do...,"[9781543663020.0, 9781543663013.0, 97805254964...",False,0
3,9.781481e+12,"In this book, Dashiell Gibson is put into a st...","[9788086264196.0, 9781935738367.0, 97815491771...",False,0
4,9.780749e+12,"\nThis was yet another heavenly book, and a ve...","[9780679731801.0, 9781531823337.0, 97814267019...",False,0
...,...,...,...,...,...
195,9.781495e+12,\nThis is the 2nd book in Galen Foley's Ascens...,"[9781416971085.0, 9781489397560.0, 97817423351...",False,0
196,9.781469e+12,"Language - G, Sexual Content - G; Violence - G...","[9780099877103.0, 9781975889005.0, 97816094533...",False,0
197,9.781303e+12,hmmm is podcast->graphic novel the new trend? ...,"[9780062798114.0, 9781936747634.0, 97819786230...",False,0
198,9.781599e+12,"3.5 stars The poem, on its own, is an interest...","[9781787531932.0, 9781787531895.0, 97801401790...",False,0


In [45]:
show_books(test_fantasy,correct = 1)


-----Review-----
Book title for review: ['The Book of Beloved']
Review:

Carolyn Haines can write the socks off a Southern ghost story! Her details are lush, pulling one back in time to Mobile in post WWI. We often romanticize the past as a gentler time -- but not so for the characters -- living or dead -- in The Book of Beloved. Lies and secrets are revealed. A sense of 'What Could Have Been' haunts the reader but ultimately the the search for truth prevails in a thoroughly fun read. So glad this is the first in a series!


-----Target book------
Title: ['The Book of Beloved']
['As a young woman widowed by World War I, Raissa James is no stranger to ghosts. But when an invitation arrives from Caoin House, her uncle’s estate in Mobile, Alabama, she’s finally ready to cast off the shadows of her past. And what better way to do so than with a grand party in her honor? An aspiring authoress, Raissa’s eager to soak up more of life—and immerse herself in the dark history that haunts the es

In [46]:
show_books(test_fantasy,correct = 0)


-----Review-----
Book title for review: ["The Demon's Daughter"]
Review:

This is a book.For a full review, please go to: http://mlleelizabeth.booklikes.com/GoodReads's policies no longer comply with my review policy. Therefore my reviews will be posted at BookLikes going forward. Thank you for following my reviews here. I hope to continue discussing books with you on BookLikes.


-----Target Book------
Title: ["The Demon's Daughter"]
["Inspector Adrian Philips keeps the peace between demons and humans in Avvar, a city not unlike Victorian London. To do his job, he's allowed his strength to be enhanced by demon technology, a choice that's cost him his wife, his family, and—some would say—his humanity. Rejected by both races, he hungers for a woman's touch.<br /><br />Roxanne McAllister is an outcast, too: the illegitimate daughter of an infamous chanteuse. One fateful night brings Roxanne and Adrian together, and though the border between human and demon is treacherous, they might be 

### Model Testing - Thriller Books

In [34]:
thriller = books_reviews[books_reviews['genres'].str.contains('thriller',na = False)]
print('Number of thriller books with at least 1 review:', len(thriller))

thriller_reviews = reviews[reviews['isbn13'].isin(thriller['isbn13'])]
print('Number of reviews for thriller:', len(thriller_reviews))
thriller_reviews_sample = thriller_reviews.sample(200, random_state = 20).reset_index(drop = True)
thriller_reviews_sample.shape

Number of thriller books with at least 1 review: 376
Number of reviews for thriller: 2106


(200, 6)

In [35]:
thriller_reviews_sample = thriller_reviews_sample[['isbn13','review']]
thriller_reviews_sample['predicted_isbn'] = None
thriller_reviews_sample['predicted_isbn'] = thriller_reviews_sample['predicted_isbn'].astype(object)

In [47]:
test_thriller = test_df(thriller_reviews_sample,10)
test_thriller 

Current progress: 200 out of 200 rows
Current run time: 11.18 minutes
Computing Score
Accuracy: 5.0 %


Unnamed: 0,isbn13,review,predicted_isbn,intersection,score
0,9.780552e+12,\nThe writing talent of Forsyth is evident her...,"[9780552155045.0, 9780099561583.0, 97814091963...",True,1
1,9.781501e+12,Gone by Michael Grant have been one of my favo...,"[9780684869155.0, 9781941060155.0, 97814692455...",False,0
2,9.780515e+12,\nOne of Roberts' better stand-alone novels. L...,"[9780684869155.0, 9781941060155.0, 97814692455...",False,0
3,9.780099e+12,\nTerrible. I looked forward to reading a sequ...,"[9780552171274.0, 9781442304819.0, 97819221472...",False,0
4,9.780720e+12,\nApart from providing an insight into the 19t...,"[9781501263576.0, 9781501270475.0, 97818469738...",False,0
...,...,...,...,...,...
195,9.781742e+12,\nReally enjoyed this tension filled thriller ...,"[9781520079196.0, 9781522690870.0, 97803162428...",False,0
196,9.780100e+12,"\nYou can debate the Lovecraft comparisons, wh...","[9780099541066.0, 9781906838430.0, 97814915021...",False,0
197,9.780857e+12,I'm not typically a fan of mysteries. Usually ...,"[9780751570656.0, 9781455820894.0, 97815113745...",False,0
198,9.781544e+12,Once again Abby has to solve a case involving ...,"[9781447280927.0, 9781501229411.0, 97801411989...",False,0


In [48]:
show_books(test_thriller ,correct = 1)


-----Review-----
Book title for review: ['The Afghan']
Review:

The writing talent of Forsyth is evident here but unfortunately he misses the mark with this novel. He is known for developing a storyline slowly but in this case the story moves just too slow. The first half of the book is spent on character development of the Afghan prisoner who plays no role in the actual terrorist plot. The reader gets a thorough history lesson on armed conflict in Afghanistan but again that is not relevant to what the novel is suppose to be about.


-----Target book------
Title: ['The Afghan']
["When British and American intelligence catch wind of a major Al Qaeda operation in the works, they are primed for action - but what can they do? They know nothing about the attack: the what, where or when. They have no sources in Al Qaeda, and it's impossible to plant someone. Impossible, unless . . .<br /><br />The Afghan is Izmat Khan, a five-year prisoner of Guantanamo Bay and a former senior commander of 

In [49]:
show_books(test_thriller ,correct = 0)


-----Review-----
Book title for review: ['Bzrk']
Review:
Gone by Michael Grant have been one of my favorite series for a long time, so going into BZRK I had high expections. I wasn't disappointed either! BZRK was typically Grant: action, believable and realistic teenagers, weird but intriguing plot. I'm almost convinced that this man could write a bread recipe and still make it interesting! Both the mc's could have been taken directly out of real life, and all of the side characters were funny and all had their different personalities and traits that made them special and unique! 

-----Target Book------
Title: ['Bzrk']
['These are no ordinary soldiers.<br />This is no ordinary war.<br />Welcome to the nano, where the only battle is for sanity. Losing is not an option when a world of madness is at stake.<br /><br />The most breathtaking and exhilarating ride you will take this year, this is the first in an incredible new action thriller series from Michael Grant, author of "Gone". The

### Model Testing - Science Fiction Books

In [50]:
science_fiction = books_reviews[books_reviews['genres'].str.contains('science-fiction',na = False)]
print('Number of science fiction books with at least 1 review:', len(science_fiction))

sf_reviews = reviews[reviews['isbn13'].isin(science_fiction['isbn13'])]
print('Number of reviews for science fiction books:', len(sf_reviews))
sf_reviews_sample = sf_reviews.sample(200, random_state = 20).reset_index(drop = True)
sf_reviews_sample.shape

Number of science fiction books with at least 1 review: 359
Number of reviews for science fiction books: 1828


(200, 6)

In [51]:
sf_reviews_sample = sf_reviews_sample[['isbn13','review']]
sf_reviews_sample['predicted_isbn'] = None
sf_reviews_sample['predicted_isbn'] = sf_reviews_sample['predicted_isbn'].astype(object)

In [52]:
test_sf = test_df(sf_reviews_sample,10)
test_sf

Current progress: 200 out of 200 rows
Current run time: 13.06 minutes
Computing Score
Accuracy: 5.5 %


Unnamed: 0,isbn13,review,predicted_isbn,intersection,score
0,9.781789e+12,Copy furnished by Net Galley for the price of ...,"[9781593077440.0, 9781978677043.0, 97819786770...",False,0
1,9.781303e+12,\nStreamlines the canon for old fans and bring...,"[9780312949815.0, 9780743583503.0, 97806718671...",False,0
2,9.780785e+12,"Parts of this were thrilling, especially for t...","[9781302910730.0, 9780785195542.0, 97813029118...",False,0
3,9.781515e+12,\nIf you're ready for action on every page of ...,"[9781626921009.0, 9781626923263.0, 97816269226...",False,0
4,9.780672e+12,"""MINIONS. MINIONS ARE MY FAVOURITE THING."" -Mi...","[9781536615609.0, 9781536615616.0, 97814526694...",False,0
...,...,...,...,...,...
195,9.781422e+12,\nI don't think this volume is significantly b...,"[9781506703961.0, 9781400073405.0, 97805713482...",False,0
196,9.780346e+12,\nI liked the first book but beginning with th...,"[9781338169867.0, 9781338298697.0, 97813381818...",False,0
197,9.781401e+12,The first volume of Ground Zero is probably th...,"[9781982543464.0, 9781421526652.0, 97817601136...",False,0
198,9.780425e+12,\n \nI'm on a Clive Cussler roll!So...,"[9781405937122.0, 9781405937139.0, 97814272620...",False,0


In [53]:
show_books(test_sf ,correct = 1)


-----Review-----
Book title for review: ['Avengers X-men: Utopia']
Review:

Things weren't given a fair go in San Francisco... because along came the Norman Osborn and his Dark Reign. The X-Men find themselves out maneuvered and out gunned by mutant bigotry, the Dark Avengers... and the Dark X-Men!!! But all is not loss, Cyclops has a plan,. After... over 35 years getting there the fruit of Cyclops leadership and experience is all that stands between the end of the mutants! Anything Dark Reign was golden for me... even this... a solid 8 out of 12. 


-----Target book------
Title: ['Avengers X-men: Utopia']
['WHO ARE THE DARK X-MEN? He has his own Avengers team and now Norman Osborn has his own X-Men team. The other shoe has finally dropped and Emma Frost has betrayed Cyclops and the rest of the X-Men. And that\'s just one of the huge surprises in "UTOPIA". Is that Namor? Cloak and Dagger? Professor X?! The thing that you aren\'t ready for is that Osborn is right. Collects Uncanny X-Me

In [54]:
show_books(test_sf ,correct = 0)


-----Review-----
Book title for review: ['Ghost Virus']
Review:
Copy furnished by Net Galley for the price of a review.Creepy crawly coats, scuttling sweaters, grasping girdles, slapping shirtsleeves, raincoats on the rampage.Â  A second skin, rivers of blood, gore galore, and a surfeit of guts.Â The writing is fine, but the plot segued into silliness and then boldly leapt into the realm of the absurd.Â  If this is meant to be horror, then it is not my brand.Â  If the purpose is to parody, then mission accomplished. 

-----Target Book------
Title: ['Ghost Virus']
["<p>Samira had been staring into her mirror all morning before she picked up the small bottle of sulphuric acid and poured it over her forehead. She was a young woman with her whole life ahead of her. What could have brought her to this? </p><p>DC Jerry Pardoe and DS Jamila Patel of Tooting Police suspect it's suicide. But then a random outbreak of horrific crimes in London points to something more sinister. A deadly virus i

### Model Testing - Romance Books

In [55]:
romance = books_reviews[books_reviews['genres'].str.contains('romance',na = False)]
print('Number of romance books with at least 1 review:', len(romance))

romance_reviews = reviews[reviews['isbn13'].isin(romance['isbn13'])]
print('Number of reviews for romance books:', len(romance_reviews))
romance_reviews_sample = romance_reviews.sample(200, random_state = 20).reset_index(drop = True)
romance_reviews_sample.shape

Number of romance books with at least 1 review: 582
Number of reviews for romance books: 2906


(200, 6)

In [56]:
romance_reviews_sample = romance_reviews_sample[['isbn13','review']]
romance_reviews_sample['predicted_isbn'] = None
romance_reviews_sample['predicted_isbn'] = romance_reviews_sample['predicted_isbn'].astype(object)

In [57]:
test_romance = test_df(romance_reviews_sample,10)
test_romance

Current progress: 200 out of 200 rows
Current run time: 13.0 minutes
Computing Score
Accuracy: 4.5 %


Unnamed: 0,isbn13,review,predicted_isbn,intersection,score
0,9.781492e+12,\n \nIsn't it every woman's fantasy...,"[9780007459919.0, 9781509857050.0, 97803807194...",False,0
1,9.781594e+12,\nI am not usually very hard to please with no...,"[9781844882281.0, 9780143128755.0, 97817847014...",False,0
2,9.781785e+12,\n \nThe Custom of the Army- 4 star...,"[9781784751098.0, 9781592912728.0, 97819786707...",True,1
3,9.781303e+12,I was disappointed when I read that Spider-Wom...,"[9781606997710.0, 9781787630215.0, 97817841635...",False,0
4,9.780310e+12,"I unfortunately had to DNF this one, which is ...","[9781524776251.0, 9781401209889.0, 97814012061...",False,0
...,...,...,...,...,...
195,9.781721e+12,Pitched as a dark re-imagining and gender swap...,"[9780143108818.0, 9781942122456.0, 97819825179...",False,0
196,9.781557e+12,\n میدونید بعد از این فیلم اولین فکری که به ذه...,"[9781939810212.0, 9781906697358.0, 97803164425...",False,0
197,9.780062e+12,"It was just ok. I normally LOVE long novels, b...","[9781848691384.0, 9781721373512.0, 97812304105...",False,0
198,9.780554e+12,\nI read the 2nd book in the series years ago ...,"[9780062318701.0, 9781787531895.0, 97817875319...",False,0


In [58]:
show_books(test_romance ,correct = 1)


-----Review-----
Book title for review: ['Seven Stones to Stand or Fall : A Collection of Outlander Short Stories']
Review:

            
The Custom of the Army- 4 stars (read in 2012)The Space Between (originally in The Mad Scientist's Guide to World Domination) - 5 stars (read in 2013)A Plague of Zombies (originally in Down These Strange Streets) - 4 stars (read in 2011)A Leaf on the Wind of All Hallows - 4 stars (read in 2013)Virgins - 5 stars (read in 2016)A Fugitive Green - 4-1/2 starsBesieged - 5 stars

          

-----Target book------
Title: ['Seven Stones to Stand or Fall : A Collection of Outlander Short Stories']
['Previously published as <i>A Trail of Fire</i>. <b>Includes two never-published-before short stories</b> from the bestselling author of the Outlander series.<br /><br />Featuring all the characters you’ve come to love from the Outlander series, this brilliant collection of short stories throws you into the magical world of Outlander. Includes previously publishe

In [59]:
show_books(test_romance ,correct = 0)


-----Review-----
Book title for review: ["River's End"]
Review:

            
Isn't it every woman's fantasy to make love to a hot guy on a river bank in the mountains?The "villian" is someone you end up feeling sorry for. And at the end you love him and cry for him.Unexpected twist at the end.For a brief moment, I wanted to become a hiking guide. But then I came to my senses.

          

-----Target Book------
Title: ["River's End"]
["Olivia's parents had been one of Hollywood's glittering golden couples...until the night the monster came. The monster who destroyed their beautiful home and took her mother away from her forever. The monster with the face of her father...<br /><br />Now a young woman, Olivia finds her memory of that night has faded. Her mother's grieving family spared no effort to keep Olivia safe from the publicity, taking her to grow up in the beautiful natural splendor of the Pacific Northwest. But, despite the terror and the years that have passed, a part of her s