In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
from IPython.display import display_html, HTML
import urllib
import glob
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import numpy as np
from nltk.stem.porter import PorterStemmer
from nltk import word_tokenize          
from nltk.stem import WordNetLemmatizer
import re

letters = re.compile('[a-zA-Z]')

def df_to_html(df):
    display_html(HTML(df.to_html()))

def load_gutenberg_book(url, char_limit=10000, min_len_of_sections=40):
    """
    Returns a list of paragraphs in the book.
    
    url: A url from Project Gutenberg.
    char_limit: Amount of characters of the book to read.
    min_len_of_sections: Each paragraph must be at least this many characters long.
    """
    book = urllib.urlopen(url)
    book_text = book.read(char_limit if char_limit else -1)
    
    result = []
    for text in book_text[:char_limit].split("\r\n\r\n"):
        if len(text) >= min_len_of_sections:
            clean_text = text.replace("\r\n", " ").strip()
            result.append(clean_text)
    
    start_position = len(result) if len(result) < 6 else 6
    return result[start_position:]

def get_text(path):
    """
    Handle all the weird ways books are encoded.
    """
    encoding_options = "ascii utf-8 utf-16 utf-32 utf-16-be utf-16-le utf-32-be utf-32-le".split()
    
    for encoding in encoding_options:
        try:
            with open(path, encoding=encoding) as book:
                return book.read()
        except UnicodeDecodeError:
            continue
    raise ValueError

def extract_term(term_indicator, text, default=None, max_term_size=75):
    term_start = text.find(term_indicator)
    # If not found, return default.
    if term_start == -1:
        term = default
    else:
        term_end = text.find("\n", term_start)
        term = text[term_start+len(term_indicator):term_end].strip()
    if term and (len(term) > max_term_size):
        term = default
    return term

def get_author_and_title(book_text, title_case=True):
    title = extract_term("Title:", book_text, default=None)
    author = extract_term("Author:", book_text, default=None)
    # Solve for other strange author name formatting
    for term_indicator in ["\n\nby ", "\n\nOF ", "\nOF\n"]:
        if author is None:
            author = extract_term(term_indicator, book_text[:15000], max_term_size=25)
    if title_case and title and author:
        title, author = title.title(), author.title()
    return title, author

def locate_beginning_of_text(title, author, text):
    location = text.find("START OF THIS PROJECT GUTENBERG") + 20
    
    if location < 0:
        if title:
            location = text.find(title)
        if author:
            location = text.find(author)
            
    return location

def locate_end_of_text(text):
    f = text.find
    
    search_terms = ["End of Project Gutenberg",
                    "END OF THIS PROJECT GUTENBERG EBOOK",
                    "END OF THE PROJECT GUTENBERG EBOOK",
                    "End of the Project Gutenberg Etext"]
    
    location = max([f(term) for term in search_terms])
    if location < 0:
        print("Fail")
        location = None
    return location

def parse_book(book_text, min_paragraph_characters=100):
    """
    Given the text of a book, returns a list of dictionaries with the keys:
    {title, author, contents, part, hash}
    """
    parsed_book_paragraphs = []
    title, author = get_author_and_title(book_text)
    text_starts = locate_beginning_of_text(title, author, book_text)
    text_ends = locate_end_of_text(book_text)
    book_paragraphs = book_text[text_starts:text_ends].split("\n\n")
    for paragraph_number, raw_paragraph in enumerate(book_paragraphs):
        paragraph = raw_paragraph.replace("\n", " ").strip()
        if (len(paragraph) < min_paragraph_characters) or not re.search(letters, paragraph):
            continue
        if "gutenberg" in paragraph.lower() or "chapter" in paragraph.lower():
            continue
        book_data = {"title": title,
                     "author": author,
                     "contents": paragraph,
                     "part": paragraph_number}
        parsed_book_paragraphs.append(book_data)
    return parsed_book_paragraphs            

def get_list_of_book_paths(book_directory):
    return list(glob.iglob(book_directory + '/*.txt'))

def books_to_pandas(book_directory, min_paragraph_characters=100):
    paragraphs = []

    for filename in get_list_of_book_paths(book_directory):
        book_text = get_text(filename)
        parsed_book = parse_book(book_text, min_paragraph_characters)
        paragraphs.extend(parsed_book)
    
    return pd.DataFrame(paragraphs)

class LemmaTokenizer(object):
    def __init__(self):
        self.wnl = WordNetLemmatizer()
    def __call__(self, doc):
        return [self.wnl.lemmatize(t) for t in word_tokenize(doc)]

def cosine_similarity(new_docs, old_docs):
    """
    Returns a similarity matrix where the first row is an array of
    similarities of the first new_doc compared with each of the old
    docs.
    """
    return new_docs*old_docs.T

def find_closest_matches(similarity_matrix, n_matches_to_return=1):
    """
    Expects a dense array of the form [[1., .5, .2],
                                       [.3, 1., .1],
                                       [.2, .4, 1.]]
    where rows correspond to similarities.
    """
    top_indices = np.apply_along_axis(func1d=lambda x: x.argsort()[-n_matches_to_return:][::-1], 
                                      axis=1, 
                                      arr=similarity_matrix)
    return top_indices

simple_cache = {}

def search_book(paragraph, book_title, books, n_results=10, print_results=False, return_title=False):
    book_title_list = book_title if isinstance(book_title, (list, tuple)) else (book_title,)
    select_books = books[books.title.isin(book_title_list)].reset_index()
    contents = select_books.contents

    if book_title not in simple_cache:
        vectorizer = TfidfVectorizer(max_df=.7, min_df=.0001, tokenizer=LemmaTokenizer()).fit(contents)
        simple_cache[book_title] = {"vectorizer": vectorizer,
                                    "vect_book": vectorizer.transform(contents)}

    vectorizer = simple_cache[book_title]["vectorizer"]
    vect_book = simple_cache[book_title]["vect_book"]
    vect_paragraph = vectorizer.transform([paragraph])

    nbrs = NearestNeighbors(n_neighbors=n_results, algorithm='brute').fit(vect_book)

    distances, indices = nbrs.kneighbors(vect_paragraph)

    search_results = list(zip(distances[0], select_books.ix[indices[0]].contents, select_books.ix[indices[0]].title))
    
    if print_results:
        for dist, text, title in search_results:
            print(dist)
            print(text)
            print("\n")
    
    if return_title:
        return search_results
    return [(dist, text) for dist, text, title in search_results]

def compare_book_paragraphs(book_title, books, n_close_matches=10, same_book_in_corpus=True):
    results = []
    book_title_list = book_title if isinstance(book_title, (list, tuple)) else (book_title,)
    select_books = books[books.title.isin(book_title_list)].reset_index()
    
    for paragraph in select_books.contents:
        if same_book_in_corpus:
            result = search_book(paragraph, book_title, books, n_results=2)[1]
        else:
            result = search_book(paragraph, book_title, books, n_results=1)[0]
        results.append([paragraph] + list(result))

    df = pd.DataFrame(results, columns=["Text 1", "Distance", "Text 2"])
    df.sort_values("Distance", inplace=True)
    
    print("Perfect matches")
    perfect_matches = df[df.Distance == 0].drop_duplicates()
    df_to_html(perfect_matches)
    print("\n")

    print ("Close matches")
    top_close_matches = df[df.Distance != 0].drop_duplicates("Distance").head(n_close_matches)
    df_to_html(top_close_matches)
    
def compare_book_to_books(book_title, other_book_titles, books, n_close_matches=20):
    results = []
    
    if book_title in other_book_titles:
        other_book_titles = tuple([title for title in other_book_titles if title != book_title])
    
    select_books = books[books.title.isin(other_book_titles)].reset_index()
    book = books[books.title == book_title].reset_index()
    
    for paragraph in book.contents:
        result = search_book(paragraph, other_book_titles, books, n_results=1, return_title=True)[0]
        results.append([paragraph] + list(result))

    df = pd.DataFrame(results, columns=["Text 1", "Distance", "Text 2", "Title"])
    df.sort_values("Distance", inplace=True)
    
    print("Perfect matches")
    perfect_matches = df[df.Distance == 0].drop_duplicates()
    df_to_html(perfect_matches)
    print("\n")

    print ("Close matches")
    close_matches = df[df.Distance != 0]
    df_to_html(close_matches.drop_duplicates("Distance").head(n_close_matches))
    
    return close_matches

In [2]:
books = books_to_pandas("popular_books", min_paragraph_characters=1)

In [3]:
books.head()

Unnamed: 0,author,contents,part,title
0,"Alexandre Dumas, Pere",THE COUNT OF MONTE CRISTO,2,The Count Of Monte Cristo
1,"Alexandre Dumas, Pere","by Alexandre Dumas, Pere",3,The Count Of Monte Cristo
2,"Alexandre Dumas, Pere","On the 24th of February, 1815, the look-out at...",6,The Count Of Monte Cristo
3,"Alexandre Dumas, Pere","As usual, a pilot put off immediately, and rou...",7,The Count Of Monte Cristo
4,"Alexandre Dumas, Pere","Immediately, and according to custom, the ramp...",8,The Count Of Monte Cristo


In [4]:
books.title.value_counts()

War And Peace                                11373
The Count Of Monte Cristo                    11211
The Three Musketeers                          8206
The Complete Works Of William Shakespeare     6351
The Man In The Iron Mask                      5404
The Works Of Edgar Allan Poe                  4836
Notre-Dame De Paris                           4025
Great Expectations                            3834
Ben-Hur                                       3572
A Tale Of Two Cities                          3315
The Adventures Of Sherlock Holmes             2540
Moby Dick; Or The Whale                       2485
A Journey To The Centre Of The Earth          2446
Adventures Of Huckleberry Finn, Complete      2386
The Phantom Of The Opera                      2340
Emma                                          2320
The Iliad Of Homer                            2199
Leviathan                                     2104
Dracula                                       2070
Pride And Prejudice            

## Search for one paragraph in one book

Number of comparisons ~ 5000

In [5]:
%%time
paragraph = "alice doesn't know which way to go"
book_title = "Alice'S Adventures In Wonderland"

search_book(paragraph, book_title, books, n_results=5, print_results=True)

0.872256370575
'Then it doesn't matter which way you go,' said the Cat.


1.14624113894
She ate a little bit, and said anxiously to herself, 'Which way? Which way?', holding her hand on the top of her head to feel which way it was growing, and she was quite surprised to find that she remained the same size: to be sure, this generally happens when one eats cake, but Alice had got so much into the way of expecting nothing but out-of-the-way things to happen, that it seemed quite dull and stupid for life to go on in the common way.


1.15233505399
'It goes on, you know,' the Hatter continued, 'in this way:--


1.22691887913
'Oh, I'm not particular as to size,' Alice hastily replied; 'only one doesn't like changing so often, you know.'


1.2269301777
'I DON'T know,' said the Caterpillar.


Wall time: 4.72 s


In [6]:
%%time
paragraph = "queen says off with his or her head"
book_title = "Alice'S Adventures In Wonderland"

search_book(paragraph, book_title, books, n_results=5, print_results=True)

1.12373165041
'Are their heads off?' shouted the Queen.


1.16790123046
All the time they were playing the Queen never left off quarrelling with the other players, and shouting 'Off with his head!' or 'Off with her head!' Those whom she sentenced were taken into custody by the soldiers, who of course had to leave off being arches to do this, so that by the end of half an hour or so there were no arches left, and all the players, except the King, the Queen, and Alice, were in custody and under sentence of execution.


1.16958287991
The Queen turned crimson with fury, and, after glaring at her for a moment like a wild beast, screamed 'Off with her head! Off--'


1.17078483868
The Queen had only one way of settling all difficulties, great or small. 'Off with his head!' she said, without even looking round.


1.17398804409
'Off with her head!' the Queen shouted at the top of her voice. Nobody moved.


Wall time: 16 ms


## Compare all paragraphs in one book

Number of comparisons ~ 25 million

In [7]:
# See the entire string when printing a data frame
pd.set_option('display.max_colwidth', -1)

In [8]:
%%time
compare_book_paragraphs(book_title, books)

Perfect matches


Unnamed: 0,Text 1,Distance,Text 2
260,'Wow! wow! wow!',0,'Wow! wow! wow!'
254,CHORUS.,0,CHORUS.




Close matches


Unnamed: 0,Text 1,Distance,Text 2
592,"Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?",0.085699,"Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?""'"
647,"'Beautiful Soup! Who cares for fish, Game, or any other dish? Who would not give all else for two Pennyworth only of beautiful Soup? Pennyworth only of beautiful Soup? Beau--ootiful Soo--oop! Beau--ootiful Soo--oop! Soo--oop of the e--e--evening, Beautiful, beauti--FUL SOUP!'",0.58181,"'Beautiful Soup, so rich and green, Waiting in a hot tureen! Who for such dainties would not stoop? Soup of the evening, beautiful Soup! Soup of the evening, beautiful Soup! Beau--ootiful Soo--oop! Beau--ootiful Soo--oop! Soo--oop of the e--e--evening, Beautiful, beautiful Soup!"
183,"'I DON'T know,' said the Caterpillar.",0.651287,"'I don't see,' said the Caterpillar."
154,'You!' said the Caterpillar contemptuously. 'Who are YOU?',0.707552,'Who are YOU?' said the Caterpillar.
734,"'Nothing whatever,' said Alice.",0.748197,"'Nothing,' said Alice."
733,'Nothing WHATEVER?' persisted the King.,0.754383,"'Nothing whatever,' said Alice."
651,"'Soo--oop of the e--e--evening, Beautiful, beautiful Soup!'",0.775282,"'Beautiful Soup, so rich and green, Waiting in a hot tureen! Who for such dainties would not stoop? Soup of the evening, beautiful Soup! Soup of the evening, beautiful Soup! Beau--ootiful Soo--oop! Beau--ootiful Soo--oop! Soo--oop of the e--e--evening, Beautiful, beautiful Soup!"
315,"'Not the same thing a bit!' said the Hatter. 'You might just as well say that ""I see what I eat"" is the same thing as ""I eat what I see""!'",0.815753,"'You might just as well say,' added the March Hare, 'that ""I like what I get"" is the same thing as ""I get what I like""!'"
554,'What was THAT like?' said Alice.,0.822642,'What for?' said Alice.
678,"'Give your evidence,' said the King; 'and don't be nervous, or I'll have you executed on the spot.'",0.846233,"'Give your evidence,' the King repeated angrily, 'or I'll have you executed, whether you're nervous or not.'"


Wall time: 12.5 s


## Compare all paragraphs in one book to all books

Number of comparisons ~ 500 million

In [9]:
all_book_titles = books.title.unique().tolist()

In [10]:
%%time
book_title = "Alice'S Adventures In Wonderland"
close_matches = compare_book_to_books(book_title, all_book_titles, books)

Perfect matches


Unnamed: 0,Text 1,Distance,Text 2,Title
797,THE END,0,THE END,Dracula
259,CHORUS.,0,Chorus.,The Complete Works Of William Shakespeare
260,'Wow! wow! wow!',0,'Tu whu! Tu whu! Tu whu!',Grimms' Fairy Tales
1,Lewis Carroll,0,LEWIS WALLACE,Ben-Hur
2,THE MILLENNIUM FULCRUM EDITION 3.0,0,The Millennium Fulcrum Edition 1.7,Through The Looking-Glass




Close matches


Unnamed: 0,Text 1,Distance,Text 2,Title
338,"'I don't know what you mean,' said Alice.",0.511965,"'I don't know what you mean by ""glory,""' Alice said.",Through The Looking-Glass
586,"'Very much indeed,' said Alice.",0.54244,"'Very much indeed,' Alice said politely.",Through The Looking-Glass
542,'Certainly not!' said Alice indignantly.,0.654421,"'Certainly,' said Alice.",Through The Looking-Glass
612,'And what are they made of?' Alice asked in a tone of great curiosity.,0.658161,'But what are they for?' Alice asked in a tone of great curiosity.,Through The Looking-Glass
391,"'Of course they were', said the Dormouse; '--well in.'",0.69369,"'Of course,' was my answer; 'of course we are.'",Wuthering Heights
786,'I won't!' said Alice.,0.707242,"'I see you don't,' said Alice.",Through The Looking-Glass
271,'I don't much care where--' said Alice.,0.721446,"'I see you don't,' said Alice.",Through The Looking-Glass
592,"Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?",0.728049,"""You won't, won't you? Well, I sh'd _reckon_ you won't!""","Adventures Of Huckleberry Finn, Complete"
596,"Will you, won't you, will you, won't you, will you join the dance? Will you, won't you, will you, won't you, won't you join the dance?""'",0.728398,"""You won't, won't you? Well, I sh'd _reckon_ you won't!""","Adventures Of Huckleberry Finn, Complete"
435,'Yes!' shouted Alice.,0.733234,"'Yes, if you like,' said Alice.",Through The Looking-Glass


Wall time: 8min 45s


In [11]:
close_matches[:100].Title.value_counts()

Through The Looking-Glass                   57
Wuthering Heights                           15
The Man In The Iron Mask                    4 
The Adventures Of Sherlock Holmes           4 
Adventures Of Huckleberry Finn, Complete    3 
The Count Of Monte Cristo                   2 
A Study In Scarlet                          2 
The Works Of Edgar Allan Poe                2 
Dracula                                     1 
Grimms' Fairy Tales                         1 
The Wonderful Wizard Of Oz                  1 
A Tale Of Two Cities                        1 
Pride And Prejudice                         1 
Robin Hood                                  1 
The Phantom Of The Opera                    1 
The Iliad Of Homer                          1 
Notre-Dame De Paris                         1 
Great Expectations                          1 
Name: Title, dtype: int64

In [12]:
%%time
book_title = "The Adventures Of Sherlock Holmes"
close_matches = compare_book_to_books(book_title, all_book_titles, books)

Perfect matches


Unnamed: 0,Text 1,Distance,Text 2,Title
1180,"""Certainly.""",0,"""Certainly.""",The Count Of Monte Cristo
124,"""It was.""",0,"""It was.""",The Count Of Monte Cristo
1413,"""Yes, sir.""",0,"""Yes, sir.""",The Count Of Monte Cristo
389,"""Yes.""",0,"""Yes.""",The Count Of Monte Cristo
1465,"""What, then?""",0,"""What then?""",The Count Of Monte Cristo
936,"""Entirely.""",0,"""Entirely.""",The Count Of Monte Cristo
1501,"""Always.""",0,"""Always.""",The Count Of Monte Cristo
1502,"""And why?""",0,"""And why?""",The Count Of Monte Cristo
916,"""Ah!""",0,"""Ah!""",The Man In The Iron Mask
1524,"""Yes, all.""",0,"""Yes, all.""",The Three Musketeers




Close matches


Unnamed: 0,Text 1,Distance,Text 2,Title
704,"""I think that it is very probable.""",0.204008,"""I think that is very probable.""",A Journey To The Centre Of The Earth
415,"""And what did you see?""",0.220975,"""What did you see?""",The Phantom Of The Opera
1642,"""But what will you do?""",0.272276,"""But you, what will you do?""",The Man In The Iron Mask
202,"""You have the photograph?""",0.282313,"""Have you?""",Great Expectations
2492,"""Yes, the wine-cellar.""",0.284365,"""Yes.""",The Count Of Monte Cristo
163,"""And what then?""",0.285819,"""What then?""",The Count Of Monte Cristo
1924,"""And is that all?""",0.289264,"""Is that all?""",The Man In The Iron Mask
1254,"""What, then, did Peterson do?""",0.299098,"""What did he do, then?""",The Man In The Iron Mask
402,"""What are you going to do, then?"" I asked.",0.311689,"""What are you going to do?"" I asked.",Dracula
135,"""And what of Irene Adler?"" I asked.",0.339803,"""What?"" I asked.",


Wall time: 14min 21s


In [13]:
close_matches[:200].Title.value_counts()

The Count Of Monte Cristo                   63
The Man In The Iron Mask                    19
A Study In Scarlet                          17
The Three Musketeers                        13
A Tale Of Two Cities                        12
The Works Of Edgar Allan Poe                11
Around The World In 80 Days                 8 
Great Expectations                          7 
Peter Pan                                   6 
Adventures Of Huckleberry Finn, Complete    6 
Pride And Prejudice                         5 
Dracula                                     4 
Moby Dick; Or The Whale                     3 
Through The Looking-Glass                   3 
The Phantom Of The Opera                    3 
Emma                                        3 
A Journey To The Centre Of The Earth        3 
Notre-Dame De Paris                         2 
The Adventures Of Tom Sawyer, Complete      2 
The Invisible Man                           2 
Wuthering Heights                           1 
The Picture O

## Compare all paragraphs in all books to all books

Number of comparisons > 13 billion

In [18]:
%%time
def compare_all_books(books, n_close_matches=20):
    vectorizer = TfidfVectorizer(max_df=.7, min_df=.0001, tokenizer=LemmaTokenizer()).fit(books.contents)
    vect_book = vectorizer.transform(books.contents)
    
    results = {"book_1_title":[],
               "book_1_paragraph":[],
               "book_2_title":[],
               "book_2_paragraph":[],
               "paragraph_distance":[]}
    
    book_titles = books.title.dropna().unique().tolist()
    
    for book_title in book_titles:
        book_mask = (books.title == book_title).values
        other_book_mask = ~book_mask
        
        nbrs = NearestNeighbors(n_neighbors=1, algorithm='brute').fit(vect_book[other_book_mask])
        distances, indices = nbrs.kneighbors(vect_book[book_mask])
        
        book_content = books.loc[book_mask, "contents"].tolist()
        results["book_1_paragraph"].extend(book_content)
        
        matches = books[other_book_mask].contents.values[indices.flatten()]
        results["book_2_paragraph"].extend(matches)
        
        book_1_title = [book_title] * sum(book_mask)
        results["book_1_title"].extend(book_1_title)
        
        book_2_title = books[other_book_mask].title.values[indices.flatten()]
        results["book_2_title"].extend(book_2_title)
        
        results["paragraph_distance"].extend(distances.flatten())

    results_sorted_by_distance = pd.DataFrame(results).sort_values("paragraph_distance")
    
    return results_sorted_by_distance

results = compare_all_books(books)

Wall time: 28min 44s


In [55]:
results[results.paragraph_distance == 0].drop_duplicates()[:100]

Unnamed: 0,book_1_paragraph,book_1_title,book_2_paragraph,book_2_title,paragraph_distance
114317,THE END,The Complete Works Of William Shakespeare,THE END,Dracula,0
8445,"""Certainly.""",The Count Of Monte Cristo,"""Certainly.""",The Man In The Iron Mask,0
8430,"""Probably.""",The Count Of Monte Cristo,"""Probably.""",The Man In The Iron Mask,0
45108,"""Whom?""",The Phantom Of The Opera,"""Whom?""",The Three Musketeers,0
8425,"""Are you sure of it?""",The Count Of Monte Cristo,"""Are you sure of it?""",The Three Musketeers,0
19244,"""Exactly.""",The Three Musketeers,"""Exactly.""",The Count Of Monte Cristo,0
45146,"""Co-ack!""",The Phantom Of The Opera,"""Shelled!""",The Works Of Edgar Allan Poe,0
8411,"""How do you know?""",The Count Of Monte Cristo,"""How do you know?""",The Phantom Of The Opera,0
69129,"""Never.""",A Journey To The Centre Of The Earth,"""Never.""",The Count Of Monte Cristo,0
8406,"""Yes.""",The Count Of Monte Cristo,"""Yes.""",The Man In The Iron Mask,0


In [54]:
results[results.paragraph_distance > 0].drop_duplicates("paragraph_distance")[:100]

Unnamed: 0,book_1_paragraph,book_1_title,book_2_paragraph,book_2_title,paragraph_distance
13890,"""Raoul! Raoul!""",The Man In The Iron Mask,"""Raoul! Raoul! Raoul!""",The Phantom Of The Opera,0.053526
37674,Produced by David Widger and Carlo Traverso,The Works Of Edgar Allan Poe,Produced by David Widger,"Adventures Of Huckleberry Finn, Complete",0.109705
107978,"If you discover a Defect in this etext within 90 days of receiv- ing it, you can receive a refund of the money (if any) you paid for it by sending an explanatory note within that time to the person you received it from. If you received it on a physical medium, you must return it with your note, and such person may choose to alternatively give you a replacement copy. If you received it electronically, such person may choose to alternatively give you a second opportunity to receive it electronically.",The Complete Works Of William Shakespeare,"If you discover a Defect in this etext within 90 days of receiving it, you can receive a refund of the money (if any) you paid for it by sending an explanatory note within that time to the person you received it from. If you received it on a physical medium, you must return it with your note, and such person may choose to alternatively give you a replacement copy. If you received it electronically, such person may choose to alternatively give you a second opportunity to receive it electronically.",,0.127986
6588,"""Oh, heavens!""",The Count Of Monte Cristo,"""Oh, Heaven! Oh, Heaven!""",The Phantom Of The Opera,0.145526
44943,"""Raoul!""",The Phantom Of The Opera,"""Raoul! Raoul!""",The Man In The Iron Mask,0.153920
56625,"Soldiers began then to make on the barrow The largest of dead-fires: dark o'er the vapor The smoke-cloud ascended, the sad-roaring fire, 10 Mingled with weeping (the wind-roar subsided) Till the building of bone it had broken to pieces, Hot in the heart. Heavy in spirit They mood-sad lamented the men-leader's ruin; And mournful measures the much-grieving widow 15 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 20 * * * * * * *",Beowulf,* Brucoea ferruginea.,The Count Of Monte Cristo,0.154537
97495,THE PREFACE,The Picture Of Dorian Gray,AUTHOR’S PREFACE,The Three Musketeers,0.156077
56317,"65 Dragon, to govern, who guarded a treasure, A high-rising stone-cliff, on heath that was grayish: A path 'neath it lay, unknown unto mortals. Some one of earthmen entered the mountain, The heathenish hoard laid hold of with ardor; 70 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *",Beowulf,* Brucoea ferruginea.,The Count Of Monte Cristo,0.156910
104938,BOOK FOURTH.,Notre-Dame De Paris,The Fourth Book,Leviathan,0.158824
36765,"""Surely, surely.""",A Tale Of Two Cities,"""Surely.""",,0.159182
