In [3]:
import pandas as pd

In [7]:
import pandas as pd
import re
import os
from pathlib import Path

def clean_text_for_rnn(text):
    """Clean and prepare text for RNN training"""
    # Remove extra whitespace and normalize
    text = ' '.join(text.split())
    
    # Remove special characters but keep punctuation for style
    text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\'\"]', ' ', text)
    
    # Remove multiple spaces
    text = re.sub(r'\s+', ' ', text)
    
    return text.strip()

def create_author_dataset_record(author, title, text, text_type="letters"):
    """Create a single record for RNN training dataset"""
    
    cleaned_text = clean_text_for_rnn(text)
    
    # Calculate text statistics for quality control
    word_count = len(cleaned_text.split())
    char_count = len(cleaned_text)
    
    return {
        'author': author,
        'title': title,
        'text': cleaned_text,
        'text_type': text_type,
        'word_count': word_count,
        'char_count': char_count,
        'suitable_for_training': word_count >= 100  # Minimum words for training
    }

def parse_filename(filename):
    """Extract author and title from filename"""
    # Remove .txt extension
    name_without_ext = filename.replace('.txt', '')
    
    # Split by '___' to get author and title
    if '___' in name_without_ext:
        parts = name_without_ext.split('___')
        author = parts[0].strip()
        title = parts[1].strip() if len(parts) > 1 else "Unknown Title"
    else:
        # Fallback if format is different
        author = name_without_ext
        title = "Unknown Title"
    
    return author, title

def process_gutenberg_files():
    """Process all files in the Gutenberg directory structure"""
    
    # Define the path to the text files
    txt_path = Path('Gutenberg_original/Gutenberg/txt')
    
    # Check if directory exists
    if not txt_path.exists():
        print(f"Directory {txt_path} does not exist!")
        return None
    
    all_records = []
    processed_files = 0
    skipped_files = 0
    
    # Process all .txt files in the directory
    for file_path in txt_path.glob('*.txt'):
        try:
            print(f"Processing: {file_path.name}")
            
            # Parse author and title from filename
            author, title = parse_filename(file_path.name)
            
            # Read the file content
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                text_content = file.read()
            
            # Skip if file is too small
            if len(text_content.strip()) < 100:
                print(f"  Skipped: File too small ({len(text_content)} chars)")
                skipped_files += 1
                continue
            
            # Create dataset record
            record = create_author_dataset_record(
                author=author,
                title=title,
                text=text_content,
                text_type="letters"  # You can modify this based on content analysis
            )
            
            # Only add if suitable for training
            if record['suitable_for_training']:
                # Remove the suitable_for_training field for final dataset
                final_record = {
                    'author': record['author'],
                    'title': record['title'],
                    'text': record['text'],
                    'text_type': record['text_type'],
                    'word_count': record['word_count']
                }
                all_records.append(final_record)
                processed_files += 1
                print(f"  ✓ Added: {author} - {title} ({record['word_count']} words)")
            else:
                print(f"  Skipped: Not suitable for training ({record['word_count']} words)")
                skipped_files += 1
                
        except Exception as e:
            print(f"  Error processing {file_path.name}: {str(e)}")
            skipped_files += 1
            continue
    
    print(f"\n=== PROCESSING SUMMARY ===")
    print(f"Files processed successfully: {processed_files}")
    print(f"Files skipped: {skipped_files}")
    print(f"Total records created: {len(all_records)}")
    
    if all_records:
        # Create DataFrame
        df = pd.DataFrame(all_records)
        
        # Display summary
        print(f"\n=== DATASET SUMMARY ===")
        print(f"Total authors: {df['author'].nunique()}")
        print(f"Total records: {len(df)}")
        print(f"Average word count: {df['word_count'].mean():.1f}")
        print(f"Min word count: {df['word_count'].min()}")
        print(f"Max word count: {df['word_count'].max()}")
        
        print(f"\n=== AUTHORS IN DATASET ===")
        author_counts = df['author'].value_counts()
        for author, count in author_counts.items():
            print(f"{author}: {count} text(s)")
        
        # Save to CSV
        output_filename = 'author_identification_dataset.csv'
        df.to_csv(output_filename, index=False)
        print(f"\n✓ Dataset saved as '{output_filename}'")
        
        # Display first few rows
        print(f"\n=== SAMPLE RECORDS ===")
        print(df.head())
        
        return df
    else:
        print("No records were created!")
        return None

# Run the processing
print("Starting to process Gutenberg files...")
dataset = process_gutenberg_files()

if dataset is not None:
    print(f"\n=== FINAL DATASET INFO ===")
    print(dataset.info())
else:
    print("Failed to create dataset!")

Starting to process Gutenberg files...
Processing: Abraham Lincoln___Lincoln Letters.txt
  ✓ Added: Abraham Lincoln - Lincoln Letters (1065 words)
Processing: Abraham Lincoln___Lincoln's First Inaugural Address.txt
  ✓ Added: Abraham Lincoln - Lincoln's First Inaugural Address (3626 words)
Processing: Abraham Lincoln___Lincoln's Gettysburg Address, given November 19, 1863.txt
  ✓ Added: Abraham Lincoln - Lincoln's Gettysburg Address, given November 19, 1863 (299 words)
Processing: Abraham Lincoln___Lincoln's Inaugurals, Addresses and Letters (Selections).txt
  ✓ Added: Abraham Lincoln - Lincoln's Inaugurals, Addresses and Letters (Selections) (43649 words)
Processing: Abraham Lincoln___Lincoln's Second Inaugural Address.txt
  ✓ Added: Abraham Lincoln - Lincoln's Second Inaugural Address (703 words)
Processing: Abraham Lincoln___Speeches and Letters of Abraham Lincoln, 1832-1865.txt
  ✓ Added: Abraham Lincoln - Speeches and Letters of Abraham Lincoln, 1832-1865 (90170 words)
Processing:

In [14]:
oldAI = pd.read_csv('author_identification_dataset.csv')

# === 1. Get Shape (rows, columns) ===
print("\n--- DATASET SHAPE ---")
print(f"Rows (records): {oldAI.shape[0]}")
print(f"Columns: {oldAI.shape[1]}")
print(f"Column names: {list(oldAI.columns)}")

# === 2. Info about columns & datatypes ===
print("\n--- DATASET INFO ---")
print(oldAI.info())

# === 3. Preview first few rows ===
print("\n--- DATASET PREVIEW (head) ---")
print(oldAI.head())

# === 4. Preview last few rows ===
print("\n--- LAST RECORDS (tail) ---")
print(oldAI.tail())

# === 5. Summary statistics for numeric columns ===
print("\n--- NUMERIC SUMMARY ---")
print(oldAI.describe())

# === 6. Check memory usage (approximate size in MB) ===
print("\n--- MEMORY USAGE ESTIMATE ---")
print(oldAI.memory_usage(deep=True).sum() / (1024**2), "MB")


--- DATASET SHAPE ---
Rows (records): 3036
Columns: 5
Column names: ['author', 'title', 'text', 'text_type', 'word_count']

--- DATASET INFO ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3036 entries, 0 to 3035
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   author      3036 non-null   object
 1   title       3036 non-null   object
 2   text        3036 non-null   object
 3   text_type   3036 non-null   object
 4   word_count  3036 non-null   int64 
dtypes: int64(1), object(4)
memory usage: 118.7+ KB
None

--- DATASET PREVIEW (head) ---
            author                                              title  \
0  Abraham Lincoln                                    Lincoln Letters   
1  Abraham Lincoln                  Lincoln's First Inaugural Address   
2  Abraham Lincoln  Lincoln's Gettysburg Address, given November 1...   
3  Abraham Lincoln  Lincoln's Inaugurals, Addresses and Letters (S...   
4  Abraham Lin

In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv("author_identification_dataset.csv")

# === OPTION 1: Pretty dictionary-like format ===
first_row = df.iloc[0]  # row at index 0
print("\n--- FIRST ROW (detailed) ---")
for col, val in first_row.items():
    print(f"{col}:")
    print(val)
    print("-" * 50)

# === OPTION 2: Print only the text if it's very long ===
print("\n--- FULL TEXT CONTENT ---")
print(first_row['text'])


--- FIRST ROW (detailed) ---
author:
Abraham Lincoln
--------------------------------------------------
title:
Lincoln Letters
--------------------------------------------------
text:
LINCOLN LETTERS By Abraham Lincoln Published by The Bibilophile Society NOTE The letters herein by Lincoln are so thoroughly characteristic of the man, and are in themselves so completely self-explanatory, that it requires no comment to enable the reader fully to understand and appreciate them. It will be observed that the philosophical admonitions in the letter to his brother, Johnston, were written on the same sheet with the letter to his father. The promptness and decision with which Lincoln despatched the multitudinous affairs of his office during the most turbulent scenes of the Civil War are exemplified in his unequivocal order to the Attorney-General, indorsed on the back of the letter of Hon. Austin A. King, requesting a pardon for John B. Corner. The indorsement bears even date with the letter i

In [3]:
import pandas as pd

# Load your dataset
df = pd.read_csv("author_identification_dataset.csv")

print("\n=== DATASET SUMMARY ===")
print(f"Total records: {len(df)}")
print(f"Unique authors: {df['author'].nunique()}")

print("\n=== AUTHORS LIST (with number of records) ===")
author_counts = df['author'].value_counts()

for author, count in author_counts.items():
    print(f"{author}: {count} records")

# Optional: Show top 10 authors only
print("\n=== TOP 10 AUTHORS (by record count) ===")
print(author_counts.head(10))


=== DATASET SUMMARY ===
Total records: 3036
Unique authors: 142

=== AUTHORS LIST (with number of records) ===
William Wymark Jacobs: 97 records
George Alfred Henty: 89 records
R M Ballantyne: 88 records
Nathaniel Hawthorne: 86 records
William Dean Howells: 84 records
Robert Louis Stevenson: 79 records
Henry James: 72 records
Anthony Trollope: 71 records
Charles Dickens: 61 records
Andrew Lang: 60 records
Charlotte Mary Yonge: 60 records
Edward Stratemeyer: 58 records
Bret Harte: 58 records
Sir Arthur Conan Doyle: 57 records
Edward Phillips Oppenheim: 53 records
Henry Rider Haggard: 52 records
Herbert George Wells: 51 records
Jack London: 48 records
Jacob Abbott: 47 records
Mark Twain: 47 records
Thomas Henry Huxley: 44 records
Charles Kingsley: 44 records
Harold Bindloss: 43 records
Rudyard Kipling: 43 records
Lyman Frank Baum: 42 records
George Bernard Shaw: 42 records
John Ruskin: 42 records
Daniel Defoe: 40 records
John Galsworthy: 40 records
G K Chesterton: 39 records
James Fenim

## API Retrievation

In [None]:
import requests

# Search for Abraham Lincoln works
url = "https://gutendex.com/books?search=lincoln"
response = requests.get(url)
data = response.json()

print(f"Total results: {data['count']}")
for book in data["results"][:5]:  # show first 5
    print(f"Title: {book['title']}")
    print(f"Authors: {[a['name'] for a in book['authors']]}")
    print(f"Text URL: {book['formats'].get('text/plain', 'N/A')}")
    print("-" * 50)

In [5]:
import requests

# Correct endpoint with trailing slash
url = "https://gutendex.com/books/?languages=en"

# Fetch first page of results (32 books per page)
resp = requests.get(url).json()

print(f"Total books in Gutendex: {resp['count']}")
print(f"Next page: {resp['next']}")
print(f"Number of results in this page: {len(resp['results'])}")
print("-" * 50)

# Show first 5 example books
for book in resp['results'][:5]:
    title = book['title']
    authors = [a['name'] for a in book['authors']]
    text_url = (book['formats'].get('text/plain; charset=utf-8') or
                book['formats'].get('text/plain; charset=us-ascii') or
                book['formats'].get('text/plain'))
    print(f"Title: {title}")
    print(f"Authors: {authors}")
    print(f"Text URL: {text_url}")
    print("-" * 50)

Total books in Gutendex: 61145
Next page: https://gutendex.com/books/?languages=en&page=2
Number of results in this page: 32
--------------------------------------------------
Title: Moby Dick; Or, The Whale
Authors: ['Melville, Herman']
Text URL: https://www.gutenberg.org/ebooks/2701.txt.utf-8
--------------------------------------------------
Title: Frankenstein; Or, The Modern Prometheus
Authors: ['Shelley, Mary Wollstonecraft']
Text URL: https://www.gutenberg.org/ebooks/84.txt.utf-8
--------------------------------------------------
Title: Romeo and Juliet
Authors: ['Shakespeare, William']
Text URL: https://www.gutenberg.org/ebooks/1513.txt.utf-8
--------------------------------------------------
Title: A Room with a View
Authors: ['Forster, E. M. (Edward Morgan)']
Text URL: https://www.gutenberg.org/ebooks/2641.txt.utf-8
--------------------------------------------------
Title: Pride and Prejudice
Authors: ['Austen, Jane']
Text URL: https://www.gutenberg.org/ebooks/1342.txt.utf-8


In [6]:
import requests
import pandas as pd
import re

# === Cleaning function (same as before) ===
def clean_text(text):
    text = ' '.join(text.split())
    text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\'\"]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# === Fetch books from Gutendex ===
def fetch_books(language="en", max_results=500, search=None):
    """
    Fetches books metadata from Gutendex API (with fixed URL).
    :param language: Language filter (en = English)
    :param max_results: Limit how many books to fetch
    :param search: Optional author/title keyword
    """
    url = f"https://gutendex.com/books/?languages={language}"  # 👈 updated with slash
    if search:
        url += f"&search={search}"
    results = []
    
    while url and len(results) < max_results:
        resp = requests.get(url).json()
        results.extend(resp["results"])
        print(f"Fetched {len(results)} so far...")  # progress log
        url = resp["next"]  # follow pagination
    
    return results[:max_results]

# === Download the text content ===
def download_book_text(book):
    # Try different plain-text formats (some books only have ascii or utf-8)
    for key in ["text/plain; charset=utf-8",
                "text/plain; charset=us-ascii",
                "text/plain"]:
        if key in book["formats"]:
            link = book["formats"][key]
            try:
                text = requests.get(link, timeout=15).text
                return clean_text(text)
            except Exception as e:
                print(f"⚠️ Could not download {book['title']} - {e}")
                return None
    return None

# === Convert Gutendex book metadata to dataset record ===
def book_to_record(book):
    authors = [a["name"] for a in book["authors"]]
    author = authors[0] if authors else "Unknown"
    title = book["title"]
    
    text = download_book_text(book)
    if not text or len(text.split()) < 100:  # skip very short/empty texts
        return None
    
    word_count = len(text.split())
    
    return {
        "author": author,
        "title": title,
        "text": text,
        "text_type": "prose",
        "word_count": word_count
    }

# === MAIN PIPELINE ===
def expand_dataset(existing_csv="author_identification_dataset.csv",
                   output_csv="author_identification_dataset_expanded.csv",
                   n_new_books=300):
    
    # Load existing dataset
    df_existing = pd.read_csv(existing_csv)
    print(f"Existing dataset: {len(df_existing)} records, "
          f"{df_existing['author'].nunique()} unique authors")

    # Fetch books from Gutendex
    books = fetch_books(language="en", max_results=n_new_books)

    print(f"Fetched {len(books)} new book metadata from Gutendex API.")

    # Convert metadata → dataset records
    records = []
    for book in books:
        record = book_to_record(book)
        if record:
            records.append(record)

    print(f"Downloaded and cleaned {len(records)} new valid books.")

    df_new = pd.DataFrame(records)

    # Merge with existing
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)\
                    .drop_duplicates(subset=["author","title"])

    # Save updated dataset
    df_combined.to_csv(output_csv, index=False)

    # Print new summary
    print("\n=== UPDATED DATASET SUMMARY ===")
    print(f"Total records: {len(df_combined)}")
    print(f"Unique authors: {df_combined['author'].nunique()}")
    print("\nTop 10 authors:")
    print(df_combined['author'].value_counts().head(10))

    return df_combined

# === Run it ===
expanded_df = expand_dataset(n_new_books=300)  # fetch 300 new English books

Existing dataset: 3036 records, 142 unique authors
Fetched 32 so far...
Fetched 64 so far...
Fetched 96 so far...
Fetched 128 so far...
Fetched 160 so far...
Fetched 192 so far...
Fetched 224 so far...
Fetched 256 so far...
Fetched 288 so far...
Fetched 320 so far...
Fetched 300 new book metadata from Gutendex API.
Downloaded and cleaned 298 new valid books.

=== UPDATED DATASET SUMMARY ===
Total records: 3329
Unique authors: 350

Top 10 authors:
author
William Wymark Jacobs     97
George Alfred Henty       89
R M Ballantyne            88
Nathaniel Hawthorne       86
William Dean Howells      84
Robert Louis Stevenson    79
Henry James               72
Anthony Trollope          71
Charles Dickens           61
Andrew Lang               60
Name: count, dtype: int64


In [2]:

# df_2 = pd.read_csv('author_identification_dataset_expanded.csv')

# df_2.shape()

In [1]:
import requests
import pandas as pd
import re

# === Cleaning function ===
def clean_text(text):
    text = ' '.join(text.split())
    text = re.sub(r'[^\w\s\.\,\!\?\;\:\-\'\"]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# === Fetch books from Gutendex ===
def fetch_books(language="en", max_results=500, search=None):
    """
    Fetches books metadata from Gutendex API (with pagination).
    :param language: Language filter (en = English)
    :param max_results: Limit how many books to fetch
    :param search: Optional author/title keyword
    """
    url = f"https://gutendex.com/books/?languages={language}"  # ✅ correct URL with slash
    if search:
        url += f"&search={search}"
    results = []
    
    while url and len(results) < max_results:
        resp = requests.get(url).json()
        results.extend(resp["results"])
        print(f"Fetched {len(results)} so far...")  # progress log
        url = resp["next"]  # pagination
    
    return results[:max_results]

# === Download the text content ===
def download_book_text(book):
    for key in ["text/plain; charset=utf-8",
                "text/plain; charset=us-ascii",
                "text/plain"]:
        if key in book["formats"]:
            link = book["formats"][key]
            try:
                text = requests.get(link, timeout=15).text
                return clean_text(text)
            except Exception as e:
                print(f"⚠️ Could not download {book['title']} - {e}")
                return None
    return None

# === Convert book metadata → record ===
def book_to_record(book):
    authors = [a["name"] for a in book["authors"]]
    author = authors[0] if authors else "Unknown"
    title = book["title"]
    
    text = download_book_text(book)
    if not text or len(text.split()) < 100:
        return None
    
    word_count = len(text.split())
    return {
        "author": author,
        "title": title,
        "text": text,
        "text_type": "prose",
        "word_count": word_count
    }

# === MAIN PIPELINE ===
def expand_dataset(existing_csv="author_identification_dataset.csv",
                   output_csv="author_identification_dataset_expanded.csv",
                   fresh_books_csv="author_identification_dataset_2500.csv",
                   n_new_books=2500):
    
    # Load existing dataset
    df_existing = pd.read_csv(existing_csv)
    print(f"Existing dataset: {len(df_existing)} records, "
          f"{df_existing['author'].nunique()} unique authors")

    # Fetch new books
    books = fetch_books(language="en", max_results=n_new_books)
    print(f"Fetched {len(books)} new book metadata from Gutendex API.")

    # Convert metadata → records
    records = []
    for book in books:
        record = book_to_record(book)
        if record:
            records.append(record)

    df_new = pd.DataFrame(records)
    print(f"Downloaded and cleaned {len(df_new)} new valid books.")
    print(f"Unique authors (new df only): {df_new['author'].nunique()}")

    # Save the new books dataset separately
    df_new.to_csv(fresh_books_csv, index=False)
    print(f"✅ Saved new books only to {fresh_books_csv}")

    # Merge with existing
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)\
                    .drop_duplicates(subset=["author","title"])
    df_combined.to_csv(output_csv, index=False)

    # Print combined summary
    print("\n=== UPDATED DATASET SUMMARY ===")
    print(f"Total records: {len(df_combined)}")
    print(f"Unique authors: {df_combined['author'].nunique()}")
    print("\nTop 10 authors:")
    print(df_combined['author'].value_counts().head(10))

    return df_combined, df_new

# === Run it ===
expanded_df, new_books_df = expand_dataset(n_new_books=2500)



Existing dataset: 3036 records, 142 unique authors
Fetched 32 so far...
Fetched 64 so far...
Fetched 96 so far...
Fetched 128 so far...
Fetched 160 so far...
Fetched 192 so far...
Fetched 224 so far...
Fetched 256 so far...
Fetched 288 so far...
Fetched 320 so far...
Fetched 352 so far...
Fetched 384 so far...
Fetched 416 so far...
Fetched 448 so far...
Fetched 480 so far...
Fetched 512 so far...
Fetched 544 so far...
Fetched 576 so far...
Fetched 608 so far...
Fetched 640 so far...
Fetched 672 so far...
Fetched 704 so far...
Fetched 736 so far...
Fetched 768 so far...
Fetched 800 so far...
Fetched 832 so far...
Fetched 864 so far...
Fetched 896 so far...
Fetched 928 so far...
Fetched 960 so far...
Fetched 992 so far...
Fetched 1024 so far...
Fetched 1056 so far...
Fetched 1088 so far...
Fetched 1120 so far...
Fetched 1152 so far...
Fetched 1184 so far...
Fetched 1216 so far...
Fetched 1248 so far...
Fetched 1280 so far...
Fetched 1312 so far...
Fetched 1344 so far...
Fetched 1376 so f

In [1]:
import pandas as pd

# Load your expanded dataset
df = pd.read_csv("author_identification_dataset_expanded.csv")

# Calculate character lengths for each text
df["char_count"] = df["text"].apply(len)

# Summary of character lengths
print("\n=== CHARACTER COUNT SUMMARY ===")
print(f"Total records: {len(df)}")
print(f"Average characters per record: {df['char_count'].mean():,.0f}")
print(f"Median characters per record: {df['char_count'].median():,.0f}")
print(f"Minimum characters: {df['char_count'].min():,}")
print(f"Maximum characters: {df['char_count'].max():,}")

# See distribution of character lengths
print("\n=== CHARACTER COUNT DISTRIBUTION HEAD ===")
print(df['char_count'].describe(percentiles=[.25, .5, .75, .9, .95, .99]))


=== CHARACTER COUNT SUMMARY ===
Total records: 5483
Average characters per record: 556,717
Median characters per record: 415,557
Minimum characters: 641
Maximum characters: 27,260,077

=== CHARACTER COUNT DISTRIBUTION HEAD ===
count    5.483000e+03
mean     5.567172e+05
std      7.091880e+05
min      6.410000e+02
25%      1.885010e+05
50%      4.155570e+05
75%      7.235500e+05
90%      1.112660e+06
95%      1.467559e+06
99%      2.737460e+06
max      2.726008e+07
Name: char_count, dtype: float64


In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("author_identification_dataset_expanded.csv")

# Filter rows with author = "Unknown"
unknown_books = df[df['author'].str.strip().str.lower() == "unknown"]

print(f"Found {len(unknown_books)} records with 'Unknown' author.\n")

# Print just titles (and maybe text length for context)
for idx, row in unknown_books.iterrows():
    print(f"- Title: {row['title']}  (Length: {len(row['text'])} chars)")

Found 159 records with 'Unknown' author.

- Title: Beowulf: An Anglo-Saxon Epic Poem  (Length: 254004 chars)
- Title: The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art  (Length: 137037 chars)
- Title: Chambers's Twentieth Century Dictionary (part 1 of 4: A-D)  (Length: 2207689 chars)
- Title: Manual of Classical Erotology (De figuris Veneris)  (Length: 304845 chars)
- Title: The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3  (Length: 3683369 chars)
- Title: The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments  (Length: 1475858 chars)
- Title: The King James Version of the Bible  (Length: 4325842 chars)
- Title: The Story of Beowulf, Translated from Anglo-Saxon into Modern English Prose  (Length: 208851 chars)
- Title: Doctrina Christiana: The first book printed in the Philippines, Manila, 1593.  

In [13]:
import pandas as pd

# Load dataset
df = pd.read_csv("author_identification_dataset_expanded.csv")

# Filter rows with author = "Various"
various_books = df[df['author'].str.strip().str.lower() == "various"]

print(f"Found {len(various_books)} records with 'Various' as author.\n")

# Print only titles
for title in various_books['title']:
    print(title)

Found 109 records with 'Various' as author.

Notes and Queries, Number 82, May 24, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc.
Golden Days for Boys and Girls, Vol. XII, Jan. 3, 1891
Encyclopaedia Britannica, 11th Edition, "Cincinnatus" to "Cleruchy": Volume 6, Slice 4
Encyclopaedia Britannica, 11th Edition, "Gichtel, Johann" to "Glory": Volume 12, Slice 1
Encyclopaedia Britannica, 11th Edition, "Coucy-le-Château" to "Crocodile": Volume 7, Slice 6
Webster's Unabridged Dictionary
Encyclopaedia Britannica, 11th Edition, "Bohemia" to "Borgia, Francis": Volume 4, Slice 2
Encyclopaedia Britannica, 11th Edition, "Basso-relievo" to "Bedfordshire": Volume 3, Slice 4
The New Gresham Encyclopedia. A to Amide: Vol. 1 Part 1
Encyclopaedia Britannica, 11th Edition, "Fenton, Edward" to "Finistere": Volume 10, Slice 3
Encyclopaedia Britannica, 11th Edition, "Echinoderma" to "Edward, prince of Wales": Volume 8, Slice 10
Notes and Queries, Vol. IV, Nu

In [10]:
import requests

def find_author_by_title_api(title, language="en"):
    """
    Search Gutendex API for a book title and extract author or translator if author missing.
    """
    url = f"https://gutendex.com/books/?search={title}&languages={language}"
    resp = requests.get(url).json()
    
    if not resp["results"]:
        return "Unknown"
    
    # Take the first match
    book = resp["results"][0]
    
    # Authors
    if book["authors"]:
        authors = [a["name"] for a in book["authors"]]
        return authors[0]
    
    # If no author, fallback to translators
    if book["translators"]:
        translators = [t["name"] for t in book["translators"]]
        return translators[0] + " (translator)"
    
    return "Unknown"

# === Test Example ===
title = "The lesser Key of Solomon"
author = find_author_by_title_api(title)
print(f"Title: {title}")
print(f"Extracted Author: {author}")

Title: The lesser Key of Solomon
Extracted Author: Unknown


In [None]:
# Analyzing request- The task is to find authors for 109 listed titles, all currently attributed to 'Various'.
# - The user seems to focus on the last two titles at the end.
# - Many titles are from the Encyclopaedia Britannica, 11th Edition, edited by Hugh Chisholm.
# - For compilations like magazines, authors are often listed as "Various" with specific editors.
# | Title | Author/Editor |
# |-------|--------------|
# | Notes and Queries, Number 82, May 24, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc. | Various |
# | Golden Days for Boys and Girls, Vol. XII, Jan. 3, 1891 | Various (Editor: James Elverson) |
# | Encyclopaedia Britannica, 11th Edition, "Cincinnatus" to "Cleruchy": Volume 6, Slice 4 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Gichtel, Johann" to "Glory": Volume 12, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Coucy-le-Château" to "Crocodile": Volume 7, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Webster's Unabridged Dictionary | Noah Webster |
# | Encyclopaedia Britannica, 11th Edition, "Bohemia" to "Borgia, Francis": Volume 4, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Basso-relievo" to "Bedfordshire": Volume 3, Slice 4 | Various (Editor: Hugh Chisholm) |
# | The New Gresham Encyclopedia. A to Amide: Vol. 1 Part 1 | Various |
# | Encyclopaedia Britannica, 11th Edition, "Fenton, Edward" to "Finistere": Volume 10, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Echinoderma" to "Edward, prince of Wales": Volume 8, Slice 10 | Various (Editor: Hugh Chisholm) |
# | Notes and Queries, Vol. IV, Number 97, September 6, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc. | Various |
# | Encyclopaedia Britannica, 11th Edition, "Lord Chamberlain" to "Luqman": Volume 17, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Bréquigny, Louis Georges Oudard Feudrix de" to "Bulgaria": Volume 4, Part 3 | Various (Editor: Hugh Chisholm) |
# | The Antiquarian Magazine & Bibliographer; Vol. 4, July-Dec 1884 | Various (Editor: Edward Walford) |
# | Encyclopaedia Britannica, 11th Edition, "Bent, James" to "Bibirine": Volume 3, Slice 6 | Various (Editor: Hugh Chisholm) |
# | The Journal of Negro History, Volume 8, 1923 | Various (Editor: Carter G. Woodson) |
# | Encyclopaedia Britannica, 11th Edition, "Ethiopia" to "Evangelical Association": Volume 9, Slice 8 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Dübner, Johann Friedrich" to "Dyeing": Volume 8, Slice 8 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Lightfoot, Joseph" to "Liquidation": Volume 16, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Dodwell, Edward" to "Drama": Volume 8, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Bradford, William" to "Brequigny, Louis": Volume 4, Slice 4 | Various (Editor: Hugh Chisholm) |
# | The Cumulative Book Review Digest, Volume 1, 1905: Complete in a single alphabet | Various (Similar to Book Review Digest series, edited by various including Mary Katharine Reely) |
# | Encyclopaedia Britannica, 11th Edition, "Capefigue" to "Carneades": Volume 5, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Cowboy Songs, and Other Frontier Ballads | Various (Collected by John A. Lomax) |
# | Encyclopaedia Britannica, 11th Edition, "Baconthorpe" to "Bankruptcy": Volume 3, Part 1, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Cockaigne" to "Columbus, Christopher": Volume 6, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "G" to "Gaskell, Elizabeth": Volume 11, Slice 4 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "David, St" to "Demidov": Volume 7, Slice 10 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Cerargyrite" to "Charing Cross": Volume 5, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Anjar" to "Apollo": Volume 2, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Lucifer: A Theosophical Magazine. Volume I. September 1887-February 1888. | Various (Edited by H. P. Blavatsky and Mabel Collins) |
# | Encyclopaedia Britannica, 11th Edition, "Austria, Lower" to "Bacon": Volume 3, Part 1, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Edwardes, Sir Herbert Benjamin" to "Ehrenbreitstein": Volume 9, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Demijohn" to "Destructors": Volume 8, Slice 2 | Various (Editor: Hugh Chisholm) |
# | The London Mercury, Vol. I, Nos. 1-6, November 1919 to April 1920 | Various (Editor: Sir John Collings Squire) |
# | Encyclopaedia Britannica, 11th Edition, "Chitral" to "Cincinnati": Volume 6, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Geoponici" to "Germany": Volume 11, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Astounding Stories of Super-Science February 1930 | Various (Editor: Harry Bates) |
# | Encyclopaedia Britannica, 11th Edition, "Bulgaria" to "Calgary": Volume 4, Part 4 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Luray Cavern" to "Mackinac Island": Volume 17, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Dinard" to "Dodsworth, Roger": Volume 8, Slice 5 | Various (Editor: Hugh Chisholm) |
# | Scientific American, Vol. XXXVII.—No. 2. [New Series.], July 14, 1877: A Weekly Journal of Practical Information, Art, Science, Mechanics, Chemistry, and Manufactures | Various |
# | Encyclopaedia Britannica, 11th Edition, "Kite-Flying" to "Kyshtym": Volume 15, Slice 8 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Borgia, Lucrezia" to "Bradford, John": Volume 4, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "England" to "English Finance": Volume 9, Slice 4 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "English Language" to "Epsom Salts": Volume 9, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Hudson River" to "Hurstmonceaux": Volume 13, Slice 8 | Various (Editor: Hugh Chisholm) |
# | The New Gresham Encyclopedia. Deposition to Eberswalde: Volume 4, Part 1 | Various |
# | Encyclopaedia Britannica, 11th Edition, "Map" to "Mars": Volume 17, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Coquelin, Benoît Constant" to "Costume": Volume 7, Slice 4 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Kelly, Edward" to "Kite": Volume 15, Slice 7 | Various (Editor: Hugh Chisholm) |
# | The New Gresham Encyclopedia. Estremoz to Felspar: Volume 4, Part 3 | Various |
# | The inter ocean curiosity shop for the year 1883 | Various (Compiled by various contributors; specific editor not identified in search results, but consistent with pattern) |
# | Encyclopaedia Britannica, 11th Edition, "Columbus" to "Condottiere": Volume 6, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Helmont, Jean" to "Hernosand": Volume 13, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "McKinley, William" to "Magnetism, Terrestrial": Volume 17, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Calhoun" to "Camoens": Volume 5, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "French Literature" to "Frost, William": Volume 11, Slice 2 | Various (Editor: Hugh Chisholm) |
# | The International Monthly, Volume 3, No. 1, April, 1851 | Various |
# | The Atlantic Monthly, Volume 17, No. 101, March, 1866: A Magazine of Literature, Science, Art, and Politics | Various |
# | Encyclopaedia Britannica, 11th Edition, "Carnegie Andrew" to "Casus Belli": Volume 5, Slice 4 | Various (Editor: Hugh Chisholm) |
# | The Book Review Digest, v. 16, 1920 : $b Sixteenth annual accumulation. Reviews of 1920 books | Various (Editor: Mary Katharine Reely) |
# | Scientific  American, Volume XXXVI., No. 8, February 24, 1877: A Weekly Journal of Practical Information, Art, Science,; Mechanics, Chemistry, and Manufactures. | Various |
# | Encyclopaedia Britannica, 11th Edition, "Gyantse" to "Hallel": Volume 12, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Haller, Albrecht" to "Harmonium": Volume 12, Slice 8 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Bedlam" to "Benson, George": Volume 3, Slice 5 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Greek Law" to "Ground-Squirrel": Volume 12, Slice 5 | Various (Editor: Hugh Chisholm) |
# | The journal of the American-Irish Historical Society, Vol. IX, 1910 | Various |
# | Encyclopaedia Britannica, 11th Edition, "Latin Language" to "Lefebvre, François-Joseph": Volume 16, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Franciscans" to "French Language": Volume 11, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Fox, George" to "France": Volume 10, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Arundel, Thomas" to "Athens": Volume 2, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Scientific  American, Volume XXIV., No. 12,  March 18, 1871: A Weekly Journal of Practical Information, Art, Science,; Mechanics, Chemistry, and Manufactures. | Various |
# | Encyclopaedia Britannica, 11th Edition, "Camorra" to "Cape Colony": Volume 5, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Hinduism" to "Home, Earls of": Volume 13, Slice 5 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Gordon, Lord George" to "Grasses": Volume 12, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Evangelical Church Conference" to "Fairbairn, Sir William": Volume 10, Slice 1 | Various (Editor: Hugh Chisholm) |
# | The Washington Historical Quarterly, Volume V, 1914 | Various (Edited by Edmond S. Meany) |
# | Encyclopaedia Britannica, 11th Edition, "Fairbanks, Erastus" to "Fens": Volume 10, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Punch, or the London Charivari, Volume 152, January 24, 1917 | Various |
# | The New Gresham Encyclopedia. Atrebates to Bedlis: Vol. 1 Part 3 | Various |
# | Encyclopaedia Britannica, 11th Edition, "Celtes, Konrad" to "Ceramics": Volume 5, Slice 6 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "L" to "Lamellibranchia": Volume 16, Slice 1 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Electrostatics" to "Engis": Volume 9, Slice 3 | Various (Editor: Hugh Chisholm) |
# | The International Monthly, Volume 4, No. 3, October, 1851 | Various |
# | The American Missionary — Volume 49, No. 03, March, 1895 | Various |
# | The New Gresham Encyclopedia. Ebert to Estremadura: Volume 4, Part 2 | Various |
# | Encyclopaedia Britannica, 11th Edition, "Letter" to "Lightfoot, John": Volume 16, Slice 5 | Various (Editor: Hugh Chisholm) |
# | The Journal of Negro History, Volume 7, 1922 | Various (Editor: Carter G. Woodson) |
# | The Germ: Thoughts towards Nature in Poetry, Literature and Art | Various (Edited by Dante Gabriel Rossetti; Commentator: William Michael Rossetti) |
# | Encyclopaedia Britannica, 11th Edition, "Dagupan" to "David": Volume 7, Slice 9 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Lamennais, Robert de" to "Latini, Brunetto": Volume 16, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Destructors" to "Diameter": Volume 8, Slice 3 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Gloss" to "Gordon, Charles George": Volume 12, Slice 2 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Cat" to "Celt": Volume 5, Slice 5 | Various (Editor: Hugh Chisholm) |
# | Current History, Vol. VIII, No. 3, June 1918: A Monthly Magazine of the New York Times | Various |
# | Punch, or the London Charivari, Volume 1, Complete | Various |
# | Encyclopaedia Britannica, 11th Edition, "Cosway, Richard" to "Coucy, Le Châtelain de": Volume 7, Slice 5 | Various (Editor: Hugh Chisholm) |
# | The journal of the American-Irish Historical Society, Vol. VIII, 1909 | Various |
# | Astounding Stories, July, 1931 | Various (Editor: Harry Bates) |
# | Encyclopaedia Britannica, 11th Edition, "Crocoite" to "Cuba": Volume 7, Slice 7 | Various (Editor: Hugh Chisholm) |
# | The Illustrated London Reading Book | Various |
# | Encyclopaedia Britannica, 11th Edition, "Diameter" to "Dinarchus": Volume 8, Slice 4 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Chariot" to "Chatelaine": Volume 5, Slice 8 | Various (Editor: Hugh Chisholm) |
# | The Newcastle Song Book; or, Tyne-Side Songster: Being a Collection of Comic and Satirical Songs, Descriptive of Eccentric Characters, and the Manners and Customs of a Portion of the Labouring Population of Newcastle and the Neighbourhood | Various |
# | The Scrap Book, Volume 1, No. 4: June 1906 | Various |
# | Encyclopaedia Britannica, 11th Edition, "Mars" to "Matteawan": Volume 17, Slice 7 | Various (Editor: Hugh Chisholm) |
# | Encyclopaedia Britannica, 11th Edition, "Groups, Theory of" to "Gwyniad": Volume 12, Slice 6 | Various (Editor: Hugh Chisholm) 

In [None]:
# Analyzing request- The task is to find authors for a list of 100 titles and organize them in a table.
# - The list includes works like "Beowulf" and "Arabian Nights," many with unknown authors.
# - Many titles seem to be from Project Gutenberg, given the character lengths.
# - Let's search for authors using online resources, focusing on Project Gutenberg.
# - Given the volume, grouping titles for efficiency makes sense.
# Planning author search- Many titles, like Beowulf, are likely anonymous or traditional.
# - Mahabharata is attributed to Vyasa, while the Bible has various authors.
# - For "Beowulf: An Anglo-Saxon Epic Poem," J. Lesslie Hall is listed, likely as translator.
# | Title | Author |
# |-------|--------|
# | Beowulf: An Anglo-Saxon Epic Poem | J. Lesslie Hall (translator)- The lesser Key of Solomon is probably attributed to King Solomon, but often listed as Unknown in Project Gutenberg.
#  |
# | The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art | Anonymous |
# | Chambers's Twentieth Century Dictionary (part 1 of 4: A-D) | Thomas Davidson |
# | Manual of Classical Erotology (De figuris Veneris) | Friedrich Karl Forberg |
# | The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3 | Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli) |
# | The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments | Anonymous (edited by Edward Stanley Poole) |
# | The King James Version of the Bible | Various (King James Version translators) |
# | The Story of Beowulf, Translated from Anglo-Saxon into Modern English Prose | Ernest J. B. Kirtlan (translator) |
# | Doctrina Christiana: The first book printed in the Philippines, Manila, 1593. | Anonymous |
# | Philippine Folk Tales | Clara Kern Bayliss, Berton L. Maxfield, Fletcher Gardner, Laura Watson Benedict, W. H. Millington (collectors) |
# | The Egyptian Book of the dead | P. Le Page Renouf and Edouard Naville (translators) |
# | The Doré Bible Gallery, Complete: Containing One Hundred Superb Illustrations, and a Page of Explanatory Letter-press Facing Each | Anonymous (illustrated by Gustave Doré) |
# | Forty-Eighth Annual Report of the Bureau of American Ethnology to the Secretary of the Smithsonian Institution, 1930-1931, Government Printing Office, Washington, 1933. | Bureau of American Ethnology (edited by various) |
# | The Philippine Islands, 1493-1898 — Volume 07 of 55: 1588-1591; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century | Emma Helen Blair, James Alexander Robertson, Edward Gaylord Bourne (editors) |
# | Reliques of Ancient English Poetry, Volume 2 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date | Thomas Percy (editor) |
# | A Polyglot of Foreign Proverbs: Comprising French, German, Dutch, Spanish, Portuguese and Danish, with English Translations and a General Index | Henry Ellis (compiler) |
# | The Book of the Thousand Nights and a Night — Volume 02 (of 10) | Richard Francis Burton (translator) |
# | The Song Celestial; Or, Bhagavad-Gîtâ (from the Mahâbhârata): Being a discourse between Arjuna, Prince of India, and the Supreme Being under the form of Krishna | Sir Edwin Arnold (translator) |
# | Narrative and Critical History of America, Vol. 2 (of 8): Spanish Explorations and Settlements in America from the Fifteenth to the Seventeenth Century | Justin Winsor (editor) |
# | Reliques of Ancient English Poetry, Volume 1 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date | Thomas Percy (editor) |
# | The Glories of Ireland | Joseph Dunn and P. J. Lennox (editors) |
# | The Nursery Rhymes of England | James Orchard Halliwell (collector) |
# | Pen-portraits of literary women : $b by themselves and others, Volume 2 (of 2) | Helen Gray Cone (contributor) |
# | Twenty-Five Ghost Stories | W. Bob Holland (compiler) |
# | The Wit and Humor of America, Volume X (of X) | Marshall P. Wilder (editor) |
# | The Best American Humorous Short Stories | Alexander Jessup (editor) |
# | Modern Spanish Lyrics | Elijah Clarence Hills and S. Griswold Morley (editors) |
# | The Bible, Douay-Rheims, Complete: The Challoner Revision | Bishop Richard Challoner (revisor) |
# | The Book of the Thousand Nights and a Night — Volume 01 (of 10) | Richard Francis Burton (translator) |
# | Best Russian Short Stories | Thomas Seltzer (editor) |
# | The History of Orange County New York | Russel Headley |
# | A dictionary of English proverbs and proverbial phrases : $b with a copious index of principal words | Thomas Preston |
# | Dhammapada, a Collection of Verses; Being One of the Canonical Books of the Buddhists | F. Max Müller (translator) |
# | A Journal of the First Voyage of  Vasco da Gama 1497-1499 | Alvaro Velho (et al.) |
# | Sir Gawayne and the Green Knight: An Alliterative Romance-Poem (c. 1360 A.D.) | Anonymous (edited by Sir Frederick Madden) |
# | Mother Goose's Nursery Rhymes: A Collection of Alphabets, Rhymes, Tales, and Jingles | Walter Crane (editor) |
# | The Philippine Islands, 1493-1898 - Volume 40 of 55, 1690-1691: Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century | Emma Helen Blair, James Alexander Robertson (editors) |
# | Familiar Quotations: A Collection of Passages, Phrases, and Proverbs Traced to; Their Sources in Ancient and Modern Literature | John Bartlett |
# | A Hundred and Seventy Chinese Poems | Arthur Waley (translator) |
# | The best short stories of 1920, and the yearbook of the American short story | Edward J. O'Brien (editor) |
# | Poems Every Child Should Know | Mary E. Burt (editor) |
# | Kalevala : the Epic Poem of Finland — Complete | John Martin Crawford (translator) |
# | The Nibelungenlied: Translated into Rhymed English Verse in the Metre of the Original | George Henry Needler (translator) |
# | Filipino Popular Tales | Dean Spruill Fansler (collector) |
# | Poems Every Child Should Know: The What-Every-Child-Should-Know-Library | Mary E. Burt (editor) |
# | Famous Modern Ghost Stories | Dorothy Scarborough (editor) |
# | The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 2: Books 4, 5, 6 and 7 | Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli) |
# | The Anglo-Saxon Chronicle | Anonymous (translated by James Ingram) |
# | The Boy Mechanic, Volume 1: 700 Things for Boys to Do | H. H. Windsor (editor) |
# | Reliques of Ancient English Poetry, Volume 3 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date | Thomas Percy (editor) |
# | The New Testament of our Lord and Savior Jesus Christ.: The common English version, corrected by the final committee of the American Bible Union. | American Bible Union (committee) |
# | Wine, Women, and Song: Mediaeval Latin Students' songs; Now first translated into English verse | John Addington Symonds (translator) |
# | The Institutes of Justinian | Caesar Flavius Justinian (translated by J.B. Moyle) |
# | Library of the World's Best Literature, Ancient and Modern — Volume 09 | Charles Dudley Warner (editor) |
# | Russian Fairy Tales: A Choice Collection of Muscovite Folk-lore | Anonymous (translated by Nora Kershaw) |
# | Poems of American History | Burton Egbert Stevenson (compiler) |
# | Little Masterpieces of American Wit and Humor, Volume I | Thomas L. Masson (editor) |
# | The Book of the Thousand Nights and a Night — Volume 10 (of 10) | Richard Francis Burton (translator) |
# | Prayers of the Early Church | J. Manning Potts (editor) |
# | Fairy and Folk Tales of the Irish Peasantry | William Butler Yeats (editor) |
# | The Jargon File, Version 4.2.2, 20 Aug 2000 | Eric S. Raymond (compiler) |
# | West African Folk-Tales | W. H. Barker and Cecilia Sinclair (collectors) |
# | Humour, Wit, & Satire of the Seventeenth Century | John Ashton (editor) |
# | The Story of the Volsungs (Volsunga Saga); with Excerpts from the Poetic Edda | Anonymous (translated by Eirikr Magnusson and William Morris) |
# | English Fairy Tales | Joseph Jacobs (collector) |
# | Jane's All the World's Aircraft. 1913 | Fred T. Jane (editor) |
# | The Philippine Islands, 1493-1898; Volume 46, 1721-1739: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
# | The Jesuit Relations and Allied Documents, Vol. 1: Acadia, 1610-1613 | Reuben Gold Thwaites (editor) |
# | The Arabian Nights: Their Best-known Tales | Kate Douglas Wiggin and Nora A. Smith (editors, illustrated by Maxfield Parrish) |
# | The Ancient Irish Epic Tale Táin Bó Cúalnge | Joseph Dunn (translator) |
# | The book of wonders : $b gives plain and simple answers to the thousands of everyday questions that are asked and which all should be able to, but cannot answer... | Anonymous |
# | The Philippine Islands, 1493-1803 — Volume 05 of 55: 1582-1583; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Beginning of the Nineteenth Century | Emma Helen Blair and James Alexander Robertson (editors) |
# | A Cyclopaedia of Canadian Biography: Being Chiefly Men of the Time: A Collection of Persons Distinguished in Professional and Political Life, Leaders in the Commerce and Industry of Canada, and Successful Pioneers | George Maclean Rose (editor) |
# | The Big Book of Nursery Rhymes | Walter Jerrold (editor, illustrated by Charles Robinson) |
# | The Philippine Islands, 1493-1898 — Volume 28 of 55: 1637-38; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century | Emma Helen Blair and James Alexander Robertson (editors) |
# | The Mabinogion | Lady Charlotte Guest (translator) |
# | A Cyclopædia of Canadian Biography: Brief biographies of persons distinguished in the professional, military and political life, and the commerce and industry of Canada, in the twentieth century | Various (edited by multiple contributors) |
# | Mr. Punch's Golf Stories | J. A. Hammerton (compiler) |
# | Cyclopedia of illustrations for public speakers : $b Containing facts, incidents, stories, experiences, anecdotes, selections, etc., for illustrative purposes, with cross-references | Cyrus Augustine Bartol (compiler) |
# | A guide book of art, architecture, and historic interests in Pennsylvania | Anna Margaretta Archambault |
# | Narrative and Critical History of America, Vol. 3 (of 8): English Explorations and Settlements in North America 1497-1689 | Justin Winsor (editor) |
# | The Oera Linda Book, from a Manuscript of the Thirteenth Century | J. G. Ottema (translator) |
# | The Little Red Hen: An Old English Folk Tale | Florence White Williams (adapter) |
# | The Book of American Negro Poetry | James Weldon Johnson (editor) |
# | The Bible, Douay-Rheims, Complete | Bishop Richard Challoner (revisor) |
# | I. Beówulf: an Anglo-Saxon poem. II. The fight at Finnsburh: a fragment. | James A. Harrison and Robert Sharp (editors) |
# | Northern Nut Growers Association Report of the Proceedings at the 44th Annual Meeting: Rochester, N.Y. August 31 and September 1, 1953 | Northern Nut Growers Association (various contributors) |
# | The Real Mother Goose | Blanche Fisher Wright (illustrator) |
# | The Philippine Islands, 1493-1898, Volume 35, 1640-1649: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
# | The American Joe Miller: A Collection of Yankee Wit and Humor | Robert Kempt (compiler) |
# | History of Woman Suffrage, Volume I | Elizabeth Cady Stanton, Susan B. Anthony, Matilda Joslyn Gage (editors) |
# | Privateering and Piracy in the Colonial Period: Illustrative Documents | John Franklin Jameson (editor) |
# | The Philippine Islands, 1493-1898, Volume 43, 1670-1700: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
# | Library of the World's Best Literature, Ancient and Modern — Volume 16 | Charles Dudley Warner (editor) |
# | The Middle English Poem, Erthe Upon Erthe | Anonymous (edited by William W. Skeat) |
# | A Manual of American Literature | Theodore Stanton (editor) |
# | Buddhist birth stories; or, Jataka tales, Volume 1 | T. W. Rhys Davids (translator) |
# | Eskimo Folk-Tales | Knud Rasmussen (collector, translated by W. J. Alexander Worster) |
# | The Book of the Thousand Nights and a Night — Volume 03 (of 10) | Richard Francis Burton (translator) |
# | The Mahabharata of Krishna-Dwaipayana Vyasa Translated into English Prose: Vana Parva, Part 1 | Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli) |
# | "Everyman," with other interludes, including eight miracle plays | Anonymous (edited by Ernest Rhys) |
# | The Philippine Islands, 1493-1898, Volume 52, 1841-1898: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
# | Hawaiian folk tales : $b a collection of native legends | Thomas G. Thrum (collector) |
# | The Chinese Fairy Book | R. Wilhelm (editor, translated by Frederick H. Martens) |
# | The Cavalier Songs and Ballads of England from 1642 to 1684 | Charles Mackay (editor) |
# | The World's Greatest Books — Volume 02 — Fiction | Arthur Mee and J. A. Hammerton (editors) |
# | The Oxford Book of Latin Verse: From the earliest fragments to the end of the Vth Century A.D. | H. W. Garrod (editor) |
# | The Ballads and Songs of Yorkshire: Transcribed from Private Manuscripts, Rare Broadsides, and Scarce Publications; with Notes and a Glossary | C. J. Davison Ingledew (editor) |
# | English Economic History: Select Documents | Alfred Edward Bland, et al. (editors) |
# | A Century of Parody and Imitation | Walter Jerrold and R. M. Leonard (editors) |
# | The English and Scottish popular ballads, volume 1 (of 5) | Francis James Child (editor) |
# | Ten Thousand Wonderful Things: Comprising whatever is marvellous and rare, curious, eccentric and extraordinary in all ages and nations | Anonymous (edited by I. P. Collins) |
# | Narrative and Critical History of America, Vol. 6 (of 8): The United States of North America, Part I | Justin Winsor (editor) |
# | The Circle of Knowledge: A Classified, Simplified, Visualized Book of Answers | Henry W. Ruoff |
# | A Prose English Translation of Harivamsha | Manmatha Nath Dutt (translator) |
# | Short stories from Life: The 81 prize stories in "Life's" Shortest Story Contest | Thomas L. Masson (editor) |
# | A Catalogue of Books in English Later than 1700, Vol. 2: Forming a portion of the library of Robert Hoe | Robert Hoe (collector, catalogued by various) |
# | The best British short stories of 1922 | Edward J. O'Brien and John Cournos (editors) |
# | The best short stories of 1922, and the yearbook of the American short story | Edward J. O'Brien (editor) |
# | Tennyson and his friends | Hallam Tennyson (editor) |
# | Khaki knitting book | Olive Whiting |
# | A Catalogue of Books in English Later than 1700, Vol. 1: Forming a portion of the library of Robert Hoe | Robert Hoe (collector, catalogued by various) |
# | Childhood's Favorites and Fairy Stories: The Young Folks Treasury, Volume 1 | Hamilton Wright Mabie, Edward Everett Hale, William Byron Forbush (editors) |
# | The Little Mother Goose | Anonymous (illustrated by Jessie Willcox Smith) |
# | Library of the World's Best Mystery and Detective Stories | Julian Hawthorne (editor) |
# | The best short stories of 1917, and the yearbook of the American short story | Edward J. O'Brien (editor) |
# | A collection of short-stories | Lemuel Arthur Pittenger (editor) |
# | Library of the World's Best Literature, Ancient and Modern — Volume 06 | Charles Dudley Warner (editor) |
# | Henley's Twentieth Century Formulas, Recipes and Processes | Gardner Dexter Hiscox (editor) |
# | Armenia and the Armenians: A List of References in the New York Public Library | Richard Gottheil |
# | Narrative and Critical History of America, Vol. 5 (of 8): The English and French in North America 1689-1763 | Justin Winsor (editor) |
# | Tales of King Arthur and the Round Table, Adapted from the Book of Romance | Andrew Lang, et al. (adapters) |
# | The Book of the Thousand Nights and a Night — Volume 04 (of 10) | Richard Francis Burton (translator) |
# | The Bible, King James Version, Complete | Various (King James Version translators) |
# | Seventh Annual Report of the Bureau of Ethnology: to the Secretary of the Smithsonian Institution, 1885-1886, Government Printing Office, Washington, 1891 | Bureau of Ethnology (various contributors) |
# | Pennsylvania Dutch Cooking | Anonymous |
# | The Boy Mechanic, Book 2: 1000 Things for Boys to Do | H. H. Windsor (editor) |
# | The Wit and Humor of America, Volume II. (of X.) | Marshall P. Wilder (editor) |
# | Japan: From the Japanese Government History | Kan'ichi Asakawa (translator) |
# | On the magnet, magnetick bodies also, and on the great magnet the earth: a new physiology, demonstrated by many arguments & experiments | William Gilbert (author) |
# | The Diamond Sutra (Chin-Kang-Ching) or Prajna-Paramita | William Gemmell (translator) |
# | The Philippine Islands, 1493-1898, Volume 38, 1674-1683: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
# | The Wit and Humor of America, Volume IX (of X) | Marshall P. Wilder (editor) |
# | Christopher Columbus and His Monument Columbia: being a concordance of choice tributes to the great Genoese, his grand discovery, and his greatness of mind and purpose | Anonymous (compiled by various) |
# | Library of the World's Best Literature, Ancient and Modern — Volume 13 | Charles Dudley Warner (editor) |
# | The History of Woman Suffrage, Volume VI | Ida Husted Harper (editor) |
# | The Colleges of Oxford: Their History and Traditions: XXI Chapters Contributed by Members of the Colleges | Various (contributed by members) |
# | Celtic Folk and Fairy Tales | Joseph Jacobs (editor) |
# | Indian Fairy Tales | Joseph Jacobs (collector) |
# | Anthologica Rarissima: The Way of a Virgin: Being excerpts from rare, curious and diverting books | Various (edited anonymously) |
# | The Dean of Lismore's Book: A Selection of Ancient Gaelic Poetry | Thomas Maclauchlan (editor) |
# | Lancelot of the Laik: A Scottish Metrical Romance (About 1490-1500 A. D.) | Walter W. Skeat (editor) |
# | More toasts: Jokes, stories and quotations | Marion Dix Mosher (compiler) |
# | Devil Stories: An Anthology | Maximilian J. Rudwin (editor) |
# | Sagas from the Far East; or, Kalmouk and Mongolian Traditionary Tales | Rachel Harriette Busk (collector) |
# | The Burlington magazine : $b for connoisseurs. vol. II—June to August | Various |
# | Tea-Cup Reading and Fortune-Telling by Tea Leaves, by a Highland Seer | Anonymous (A Highland Seer) |
# | The Veil Lifted: Modern Developments of Spirit Photography | J. Traill Taylor (author) |
# | Ancient Poems, Ballads, and Songs of the Peasantry of England | James Maidment and Robert Bell (editors) 

In [1]:
import pandas as pd
df_2 = pd.read_csv("author_identification_dataset_expanded.csv")

df_2.head()

Unnamed: 0,author,title,text,text_type,word_count
0,Abraham Lincoln,Lincoln Letters,LINCOLN LETTERS By Abraham Lincoln Published b...,letters,1065
1,Abraham Lincoln,Lincoln's First Inaugural Address,"Lincoln's First Inaugural Address March 4, 186...",letters,3626
2,Abraham Lincoln,"Lincoln's Gettysburg Address, given November 1...","Lincoln's Gettysburg Address, given November 1...",letters,299
3,Abraham Lincoln,"Lincoln's Inaugurals, Addresses and Letters (S...",Longman's English Classics LINCOLN'S INAUGURAL...,letters,43649
4,Abraham Lincoln,Lincoln's Second Inaugural Address,"Lincoln's Second Inaugural Address March 4, 18...",letters,703


In [5]:
table_text = """
| Title | Author |
|-------|--------|
| Beowulf: An Anglo-Saxon Epic Poem | J. Lesslie Hall (translator)- The lesser Key of Solomon is probably attributed to King Solomon, but often listed as Unknown in Project Gutenberg.
 |
| The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art | Anonymous |
| Chambers's Twentieth Century Dictionary (part 1 of 4: A-D) | Thomas Davidson |
| Manual of Classical Erotology (De figuris Veneris) | Friedrich Karl Forberg |
| The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3 | Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli) |
| The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments | Anonymous (edited by Edward Stanley Poole) |
| The King James Version of the Bible | Various (King James Version translators) |
| The Story of Beowulf, Translated from Anglo-Saxon into Modern English Prose | Ernest J. B. Kirtlan (translator) |
| Doctrina Christiana: The first book printed in the Philippines, Manila, 1593. | Anonymous |
| Philippine Folk Tales | Clara Kern Bayliss, Berton L. Maxfield, Fletcher Gardner, Laura Watson Benedict, W. H. Millington (collectors) |
| The Egyptian Book of the dead | P. Le Page Renouf and Edouard Naville (translators) |
| The Doré Bible Gallery, Complete: Containing One Hundred Superb Illustrations, and a Page of Explanatory Letter-press Facing Each | Anonymous (illustrated by Gustave Doré) |
| Forty-Eighth Annual Report of the Bureau of American Ethnology to the Secretary of the Smithsonian Institution, 1930-1931, Government Printing Office, Washington, 1933. | Bureau of American Ethnology (edited by various) |
| The Philippine Islands, 1493-1898 — Volume 07 of 55: 1588-1591; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century | Emma Helen Blair, James Alexander Robertson, Edward Gaylord Bourne (editors) |
| Reliques of Ancient English Poetry, Volume 2 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date | Thomas Percy (editor) |
| A Polyglot of Foreign Proverbs: Comprising French, German, Dutch, Spanish, Portuguese and Danish, with English Translations and a General Index | Henry Ellis (compiler) |
| The Book of the Thousand Nights and a Night — Volume 02 (of 10) | Richard Francis Burton (translator) |
| The Song Celestial; Or, Bhagavad-Gîtâ (from the Mahâbhârata): Being a discourse between Arjuna, Prince of India, and the Supreme Being under the form of Krishna | Sir Edwin Arnold (translator) |
| Narrative and Critical History of America, Vol. 2 (of 8): Spanish Explorations and Settlements in America from the Fifteenth to the Seventeenth Century | Justin Winsor (editor) |
| Reliques of Ancient English Poetry, Volume 1 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date | Thomas Percy (editor) |
| The Glories of Ireland | Joseph Dunn and P. J. Lennox (editors) |
| The Nursery Rhymes of England | James Orchard Halliwell (collector) |
| Pen-portraits of literary women : $b by themselves and others, Volume 2 (of 2) | Helen Gray Cone (contributor) |
| Twenty-Five Ghost Stories | W. Bob Holland (compiler) |
| The Wit and Humor of America, Volume X (of X) | Marshall P. Wilder (editor) |
| The Best American Humorous Short Stories | Alexander Jessup (editor) |
| Modern Spanish Lyrics | Elijah Clarence Hills and S. Griswold Morley (editors) |
| The Bible, Douay-Rheims, Complete: The Challoner Revision | Bishop Richard Challoner (revisor) |
| The Book of the Thousand Nights and a Night — Volume 01 (of 10) | Richard Francis Burton (translator) |
| Best Russian Short Stories | Thomas Seltzer (editor) |
| The History of Orange County New York | Russel Headley |
| A dictionary of English proverbs and proverbial phrases : $b with a copious index of principal words | Thomas Preston |
| Dhammapada, a Collection of Verses; Being One of the Canonical Books of the Buddhists | F. Max Müller (translator) |
| A Journal of the First Voyage of  Vasco da Gama 1497-1499 | Alvaro Velho (et al.) |
| Sir Gawayne and the Green Knight: An Alliterative Romance-Poem (c. 1360 A.D.) | Anonymous (edited by Sir Frederick Madden) |
| Mother Goose's Nursery Rhymes: A Collection of Alphabets, Rhymes, Tales, and Jingles | Walter Crane (editor) |
| The Philippine Islands, 1493-1898 - Volume 40 of 55, 1690-1691: Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century | Emma Helen Blair, James Alexander Robertson (editors) |
| Familiar Quotations: A Collection of Passages, Phrases, and Proverbs Traced to; Their Sources in Ancient and Modern Literature | John Bartlett |
| A Hundred and Seventy Chinese Poems | Arthur Waley (translator) |
| The best short stories of 1920, and the yearbook of the American short story | Edward J. O'Brien (editor) |
| Poems Every Child Should Know | Mary E. Burt (editor) |
| Kalevala : the Epic Poem of Finland — Complete | John Martin Crawford (translator) |
| The Nibelungenlied: Translated into Rhymed English Verse in the Metre of the Original | George Henry Needler (translator) |
| Filipino Popular Tales | Dean Spruill Fansler (collector) |
| Poems Every Child Should Know: The What-Every-Child-Should-Know-Library | Mary E. Burt (editor) |
| Famous Modern Ghost Stories | Dorothy Scarborough (editor) |
| The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 2: Books 4, 5, 6 and 7 | Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli) |
| The Anglo-Saxon Chronicle | Anonymous (translated by James Ingram) |
| The Boy Mechanic, Volume 1: 700 Things for Boys to Do | H. H. Windsor (editor) |
| Reliques of Ancient English Poetry, Volume 3 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date | Thomas Percy (editor) |
| The New Testament of our Lord and Savior Jesus Christ.: The common English version, corrected by the final committee of the American Bible Union. | American Bible Union (committee) |
| Wine, Women, and Song: Mediaeval Latin Students' songs; Now first translated into English verse | John Addington Symonds (translator) |
| The Institutes of Justinian | Caesar Flavius Justinian (translated by J.B. Moyle) |
| Library of the World's Best Literature, Ancient and Modern — Volume 09 | Charles Dudley Warner (editor) |
| Russian Fairy Tales: A Choice Collection of Muscovite Folk-lore | Anonymous (translated by Nora Kershaw) |
| Poems of American History | Burton Egbert Stevenson (compiler) |
| Little Masterpieces of American Wit and Humor, Volume I | Thomas L. Masson (editor) |
| The Book of the Thousand Nights and a Night — Volume 10 (of 10) | Richard Francis Burton (translator) |
| Prayers of the Early Church | J. Manning Potts (editor) |
| Fairy and Folk Tales of the Irish Peasantry | William Butler Yeats (editor) |
| The Jargon File, Version 4.2.2, 20 Aug 2000 | Eric S. Raymond (compiler) |
| West African Folk-Tales | W. H. Barker and Cecilia Sinclair (collectors) |
| Humour, Wit, & Satire of the Seventeenth Century | John Ashton (editor) |
| The Story of the Volsungs (Volsunga Saga); with Excerpts from the Poetic Edda | Anonymous (translated by Eirikr Magnusson and William Morris) |
| English Fairy Tales | Joseph Jacobs (collector) |
| Jane's All the World's Aircraft. 1913 | Fred T. Jane (editor) |
| The Philippine Islands, 1493-1898; Volume 46, 1721-1739: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
| The Jesuit Relations and Allied Documents, Vol. 1: Acadia, 1610-1613 | Reuben Gold Thwaites (editor) |
| The Arabian Nights: Their Best-known Tales | Kate Douglas Wiggin and Nora A. Smith (editors, illustrated by Maxfield Parrish) |
| The Ancient Irish Epic Tale Táin Bó Cúalnge | Joseph Dunn (translator) |
| The book of wonders : $b gives plain and simple answers to the thousands of everyday questions that are asked and which all should be able to, but cannot answer... | Anonymous |
| The Philippine Islands, 1493-1803 — Volume 05 of 55: 1582-1583; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Beginning of the Nineteenth Century | Emma Helen Blair and James Alexander Robertson (editors) |
| A Cyclopaedia of Canadian Biography: Being Chiefly Men of the Time: A Collection of Persons Distinguished in Professional and Political Life, Leaders in the Commerce and Industry of Canada, and Successful Pioneers | George Maclean Rose (editor) |
| The Big Book of Nursery Rhymes | Walter Jerrold (editor, illustrated by Charles Robinson) |
| The Philippine Islands, 1493-1898 — Volume 28 of 55: 1637-38; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century | Emma Helen Blair and James Alexander Robertson (editors) |
| The Mabinogion | Lady Charlotte Guest (translator) |
| A Cyclopædia of Canadian Biography: Brief biographies of persons distinguished in the professional, military and political life, and the commerce and industry of Canada, in the twentieth century | Various (edited by multiple contributors) |
| Mr. Punch's Golf Stories | J. A. Hammerton (compiler) |
| Cyclopedia of illustrations for public speakers : $b Containing facts, incidents, stories, experiences, anecdotes, selections, etc., for illustrative purposes, with cross-references | Cyrus Augustine Bartol (compiler) |
| A guide book of art, architecture, and historic interests in Pennsylvania | Anna Margaretta Archambault |
| Narrative and Critical History of America, Vol. 3 (of 8): English Explorations and Settlements in North America 1497-1689 | Justin Winsor (editor) |
| The Oera Linda Book, from a Manuscript of the Thirteenth Century | J. G. Ottema (translator) |
| The Little Red Hen: An Old English Folk Tale | Florence White Williams (adapter) |
| The Book of American Negro Poetry | James Weldon Johnson (editor) |
| The Bible, Douay-Rheims, Complete | Bishop Richard Challoner (revisor) |
| I. Beówulf: an Anglo-Saxon poem. II. The fight at Finnsburh: a fragment. | James A. Harrison and Robert Sharp (editors) |
| Northern Nut Growers Association Report of the Proceedings at the 44th Annual Meeting: Rochester, N.Y. August 31 and September 1, 1953 | Northern Nut Growers Association (various contributors) |
| The Real Mother Goose | Blanche Fisher Wright (illustrator) |
| The Philippine Islands, 1493-1898, Volume 35, 1640-1649: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
| The American Joe Miller: A Collection of Yankee Wit and Humor | Robert Kempt (compiler) |
| History of Woman Suffrage, Volume I | Elizabeth Cady Stanton, Susan B. Anthony, Matilda Joslyn Gage (editors) |
| Privateering and Piracy in the Colonial Period: Illustrative Documents | John Franklin Jameson (editor) |
| The Philippine Islands, 1493-1898, Volume 43, 1670-1700: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
| Library of the World's Best Literature, Ancient and Modern — Volume 16 | Charles Dudley Warner (editor) |
| The Middle English Poem, Erthe Upon Erthe | Anonymous (edited by William W. Skeat) |
| A Manual of American Literature | Theodore Stanton (editor) |
| Buddhist birth stories; or, Jataka tales, Volume 1 | T. W. Rhys Davids (translator) |
| Eskimo Folk-Tales | Knud Rasmussen (collector, translated by W. J. Alexander Worster) |
| The Book of the Thousand Nights and a Night — Volume 03 (of 10) | Richard Francis Burton (translator) |
| The Mahabharata of Krishna-Dwaipayana Vyasa Translated into English Prose: Vana Parva, Part 1 | Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli) |
| "Everyman," with other interludes, including eight miracle plays | Anonymous (edited by Ernest Rhys) |
| The Philippine Islands, 1493-1898, Volume 52, 1841-1898: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
| Hawaiian folk tales : $b a collection of native legends | Thomas G. Thrum (collector) |
| The Chinese Fairy Book | R. Wilhelm (editor, translated by Frederick H. Martens) |
| The Cavalier Songs and Ballads of England from 1642 to 1684 | Charles Mackay (editor) |
| The World's Greatest Books — Volume 02 — Fiction | Arthur Mee and J. A. Hammerton (editors) |
| The Oxford Book of Latin Verse: From the earliest fragments to the end of the Vth Century A.D. | H. W. Garrod (editor) |
| The Ballads and Songs of Yorkshire: Transcribed from Private Manuscripts, Rare Broadsides, and Scarce Publications; with Notes and a Glossary | C. J. Davison Ingledew (editor) |
| English Economic History: Select Documents | Alfred Edward Bland, et al. (editors) |
| A Century of Parody and Imitation | Walter Jerrold and R. M. Leonard (editors) |
| The English and Scottish popular ballads, volume 1 (of 5) | Francis James Child (editor) |
| Ten Thousand Wonderful Things: Comprising whatever is marvellous and rare, curious, eccentric and extraordinary in all ages and nations | Anonymous (edited by I. P. Collins) |
| Narrative and Critical History of America, Vol. 6 (of 8): The United States of North America, Part I | Justin Winsor (editor) |
| The Circle of Knowledge: A Classified, Simplified, Visualized Book of Answers | Henry W. Ruoff |
| A Prose English Translation of Harivamsha | Manmatha Nath Dutt (translator) |
| Short stories from Life: The 81 prize stories in "Life's" Shortest Story Contest | Thomas L. Masson (editor) |
| A Catalogue of Books in English Later than 1700, Vol. 2: Forming a portion of the library of Robert Hoe | Robert Hoe (collector, catalogued by various) |
| The best British short stories of 1922 | Edward J. O'Brien and John Cournos (editors) |
| The best short stories of 1922, and the yearbook of the American short story | Edward J. O'Brien (editor) |
| Tennyson and his friends | Hallam Tennyson (editor) |
| Khaki knitting book | Olive Whiting |
| A Catalogue of Books in English Later than 1700, Vol. 1: Forming a portion of the library of Robert Hoe | Robert Hoe (collector, catalogued by various) |
| Childhood's Favorites and Fairy Stories: The Young Folks Treasury, Volume 1 | Hamilton Wright Mabie, Edward Everett Hale, William Byron Forbush (editors) |
| The Little Mother Goose | Anonymous (illustrated by Jessie Willcox Smith) |
| Library of the World's Best Mystery and Detective Stories | Julian Hawthorne (editor) |
| The best short stories of 1917, and the yearbook of the American short story | Edward J. O'Brien (editor) |
| A collection of short-stories | Lemuel Arthur Pittenger (editor) |
| Library of the World's Best Literature, Ancient and Modern — Volume 06 | Charles Dudley Warner (editor) |
| Henley's Twentieth Century Formulas, Recipes and Processes | Gardner Dexter Hiscox (editor) |
| Armenia and the Armenians: A List of References in the New York Public Library | Richard Gottheil |
| Narrative and Critical History of America, Vol. 5 (of 8): The English and French in North America 1689-1763 | Justin Winsor (editor) |
| Tales of King Arthur and the Round Table, Adapted from the Book of Romance | Andrew Lang, et al. (adapters) |
| The Book of the Thousand Nights and a Night — Volume 04 (of 10) | Richard Francis Burton (translator) |
| The Bible, King James Version, Complete | Various (King James Version translators) |
| Seventh Annual Report of the Bureau of Ethnology: to the Secretary of the Smithsonian Institution, 1885-1886, Government Printing Office, Washington, 1891 | Bureau of Ethnology (various contributors) |
| Pennsylvania Dutch Cooking | Anonymous |
| The Boy Mechanic, Book 2: 1000 Things for Boys to Do | H. H. Windsor (editor) |
| The Wit and Humor of America, Volume II. (of X.) | Marshall P. Wilder (editor) |
| Japan: From the Japanese Government History | Kan'ichi Asakawa (translator) |
| On the magnet, magnetick bodies also, and on the great magnet the earth: a new physiology, demonstrated by many arguments & experiments | William Gilbert (author) |
| The Diamond Sutra (Chin-Kang-Ching) or Prajna-Paramita | William Gemmell (translator) |
| The Philippine Islands, 1493-1898, Volume 38, 1674-1683: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century | Emma Helen Blair and James Alexander Robertson (editors) |
| The Wit and Humor of America, Volume IX (of X) | Marshall P. Wilder (editor) |
| Christopher Columbus and His Monument Columbia: being a concordance of choice tributes to the great Genoese, his grand discovery, and his greatness of mind and purpose | Anonymous (compiled by various) |
| Library of the World's Best Literature, Ancient and Modern — Volume 13 | Charles Dudley Warner (editor) |
| The History of Woman Suffrage, Volume VI | Ida Husted Harper (editor) |
| The Colleges of Oxford: Their History and Traditions: XXI Chapters Contributed by Members of the Colleges | Various (contributed by members) |
| Celtic Folk and Fairy Tales | Joseph Jacobs (editor) |
| Indian Fairy Tales | Joseph Jacobs (collector) |
| Anthologica Rarissima: The Way of a Virgin: Being excerpts from rare, curious and diverting books | Various (edited anonymously) |
| The Dean of Lismore's Book: A Selection of Ancient Gaelic Poetry | Thomas Maclauchlan (editor) |
| Lancelot of the Laik: A Scottish Metrical Romance (About 1490-1500 A. D.) | Walter W. Skeat (editor) |
| More toasts: Jokes, stories and quotations | Marion Dix Mosher (compiler) |
| Devil Stories: An Anthology | Maximilian J. Rudwin (editor) |
| Sagas from the Far East; or, Kalmouk and Mongolian Traditionary Tales | Rachel Harriette Busk (collector) |
| The Burlington magazine : $b for connoisseurs. vol. II—June to August | Various |
| Tea-Cup Reading and Fortune-Telling by Tea Leaves, by a Highland Seer | Anonymous (A Highland Seer) |
| The Veil Lifted: Modern Developments of Spirit Photography | J. Traill Taylor (author) |
| Ancient Poems, Ballads, and Songs of the Peasantry of England | James Maidment and Robert Bell (editors) 

"""

resolved_authors = {}

for line in table_text.splitlines():
    line = line.strip()
    if not line.startswith("|") or line.startswith("|---") or line.lower().startswith("| title"):
        continue
    parts = [p.strip() for p in line.strip("|").split("|")]
    if len(parts) == 2:
        title, author = parts
        # Escape problematic characters in keys for Python safety
        safe_title = title.replace('"', '\\"')  # escape internal double quotes
        safe_author = author.replace('"', '\\"')
        resolved_authors[title] = author

# Print in nice Python syntax
print("resolved_authors = {")
for title, author in resolved_authors.items():
    # Wrap titles in double quotes, escape any embedded double quotes
    safe_title = title.replace('"', '\\"')
    safe_author = author.replace('"', '\\"')
    print(f'    "{safe_title}": "{safe_author}",')
print("}")


resolved_authors = {
    "Beowulf: An Anglo-Saxon Epic Poem": "J. Lesslie Hall (translator)- The lesser Key of Solomon is probably attributed to King Solomon, but often listed as Unknown in Project Gutenberg.",
    "The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art": "Anonymous",
    "Chambers's Twentieth Century Dictionary (part 1 of 4: A-D)": "Thomas Davidson",
    "Manual of Classical Erotology (De figuris Veneris)": "Friedrich Karl Forberg",
    "The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments": "Anonymous (edited by Edward Stanley Poole)",
    "The King James Version of the Bible": "Various (King James Version translators)",
    "The Story of Beowulf, Translated from

In [8]:
import pandas as pd

resolved_authors = {
    "Beowulf: An Anglo-Saxon Epic Poem": "J. Lesslie Hall (translator)- The lesser Key of Solomon is probably attributed to King Solomon, but often listed as Unknown in Project Gutenberg.",
    "The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art": "L. W. de Laurence",
    "Chambers's Twentieth Century Dictionary (part 1 of 4: A-D)": "Thomas Davidson",
    "Manual of Classical Erotology (De figuris Veneris)": "Friedrich Karl Forberg",
    "The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments": "Anonymous (edited by Edward Stanley Poole)",
    "The King James Version of the Bible": "Various (King James Version translators)",
    "The Story of Beowulf, Translated from Anglo-Saxon into Modern English Prose": "Ernest J. B. Kirtlan (translator)",
    "Doctrina Christiana: The first book printed in the Philippines, Manila, 1593.": "Anonymous",
    "Philippine Folk Tales": "Clara Kern Bayliss, Berton L. Maxfield, Fletcher Gardner, Laura Watson Benedict, W. H. Millington (collectors)",
    "The Egyptian Book of the dead": "P. Le Page Renouf and Edouard Naville (translators)",
    "The Doré Bible Gallery, Complete: Containing One Hundred Superb Illustrations, and a Page of Explanatory Letter-press Facing Each": "Anonymous (illustrated by Gustave Doré)",
    "Forty-Eighth Annual Report of the Bureau of American Ethnology to the Secretary of the Smithsonian Institution, 1930-1931, Government Printing Office, Washington, 1933.": "Bureau of American Ethnology (edited by various)",
    "The Philippine Islands, 1493-1898 — Volume 07 of 55: 1588-1591; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century": "Emma Helen Blair, James Alexander Robertson, Edward Gaylord Bourne (editors)",
    "Reliques of Ancient English Poetry, Volume 2 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date": "Thomas Percy (editor)",
    "A Polyglot of Foreign Proverbs: Comprising French, German, Dutch, Spanish, Portuguese and Danish, with English Translations and a General Index": "Henry Ellis (compiler)",
    "The Book of the Thousand Nights and a Night — Volume 02 (of 10)": "Richard Francis Burton (translator)",
    "The Song Celestial; Or, Bhagavad-Gîtâ (from the Mahâbhârata): Being a discourse between Arjuna, Prince of India, and the Supreme Being under the form of Krishna": "Sir Edwin Arnold (translator)",
    "Narrative and Critical History of America, Vol. 2 (of 8): Spanish Explorations and Settlements in America from the Fifteenth to the Seventeenth Century": "Justin Winsor (editor)",
    "Reliques of Ancient English Poetry, Volume 1 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date": "Thomas Percy (editor)",
    "The Glories of Ireland": "Joseph Dunn and P. J. Lennox (editors)",
    "The Nursery Rhymes of England": "James Orchard Halliwell (collector)",
    "Pen-portraits of literary women : $b by themselves and others, Volume 2 (of 2)": "Helen Gray Cone (contributor)",
    "Twenty-Five Ghost Stories": "W. Bob Holland (compiler)",
    "The Wit and Humor of America, Volume X (of X)": "Marshall P. Wilder (editor)",
    "The Best American Humorous Short Stories": "Alexander Jessup (editor)",
    "Modern Spanish Lyrics": "Elijah Clarence Hills and S. Griswold Morley (editors)",
    "The Bible, Douay-Rheims, Complete: The Challoner Revision": "Bishop Richard Challoner (revisor)",
    "The Book of the Thousand Nights and a Night — Volume 01 (of 10)": "Richard Francis Burton (translator)",
    "Best Russian Short Stories": "Thomas Seltzer (editor)",
    "The History of Orange County New York": "Russel Headley",
    "A dictionary of English proverbs and proverbial phrases : $b with a copious index of principal words": "Thomas Preston",
    "Dhammapada, a Collection of Verses; Being One of the Canonical Books of the Buddhists": "F. Max Müller (translator)",
    "A Journal of the First Voyage of  Vasco da Gama 1497-1499": "Alvaro Velho (et al.)",
    "Sir Gawayne and the Green Knight: An Alliterative Romance-Poem (c. 1360 A.D.)": "Anonymous (edited by Sir Frederick Madden)",
    "Mother Goose's Nursery Rhymes: A Collection of Alphabets, Rhymes, Tales, and Jingles": "Walter Crane (editor)",
    "The Philippine Islands, 1493-1898 - Volume 40 of 55, 1690-1691: Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century": "Emma Helen Blair, James Alexander Robertson (editors)",
    "Familiar Quotations: A Collection of Passages, Phrases, and Proverbs Traced to; Their Sources in Ancient and Modern Literature": "John Bartlett",
    "A Hundred and Seventy Chinese Poems": "Arthur Waley (translator)",
    "The best short stories of 1920, and the yearbook of the American short story": "Edward J. O'Brien (editor)",
    "Poems Every Child Should Know": "Mary E. Burt (editor)",
    "Kalevala : the Epic Poem of Finland — Complete": "John Martin Crawford (translator)",
    "The Nibelungenlied: Translated into Rhymed English Verse in the Metre of the Original": "George Henry Needler (translator)",
    "Filipino Popular Tales": "Dean Spruill Fansler (collector)",
    "Poems Every Child Should Know: The What-Every-Child-Should-Know-Library": "Mary E. Burt (editor)",
    "Famous Modern Ghost Stories": "Dorothy Scarborough (editor)",
    "The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 2: Books 4, 5, 6 and 7": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "The Anglo-Saxon Chronicle": "Anonymous (translated by James Ingram)",
    "The Boy Mechanic, Volume 1: 700 Things for Boys to Do": "H. H. Windsor (editor)",
    "Reliques of Ancient English Poetry, Volume 3 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date": "Thomas Percy (editor)",
    "The New Testament of our Lord and Savior Jesus Christ.: The common English version, corrected by the final committee of the American Bible Union.": "American Bible Union (committee)",
    "Wine, Women, and Song: Mediaeval Latin Students' songs; Now first translated into English verse": "John Addington Symonds (translator)",
    "The Institutes of Justinian": "Caesar Flavius Justinian (translated by J.B. Moyle)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 09": "Charles Dudley Warner (editor)",
    "Russian Fairy Tales: A Choice Collection of Muscovite Folk-lore": "Anonymous (translated by Nora Kershaw)",
    "Poems of American History": "Burton Egbert Stevenson (compiler)",
    "Little Masterpieces of American Wit and Humor, Volume I": "Thomas L. Masson (editor)",
    "The Book of the Thousand Nights and a Night — Volume 10 (of 10)": "Richard Francis Burton (translator)",
    "Prayers of the Early Church": "J. Manning Potts (editor)",
    "Fairy and Folk Tales of the Irish Peasantry": "William Butler Yeats (editor)",
    "The Jargon File, Version 4.2.2, 20 Aug 2000": "Eric S. Raymond (compiler)",
    "West African Folk-Tales": "W. H. Barker and Cecilia Sinclair (collectors)",
    "Humour, Wit, & Satire of the Seventeenth Century": "John Ashton (editor)",
    "The Story of the Volsungs (Volsunga Saga); with Excerpts from the Poetic Edda": "Anonymous (translated by Eirikr Magnusson and William Morris)",
    "English Fairy Tales": "Joseph Jacobs (collector)",
    "Jane's All the World's Aircraft. 1913": "Fred T. Jane (editor)",
    "The Philippine Islands, 1493-1898; Volume 46, 1721-1739: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The Jesuit Relations and Allied Documents, Vol. 1: Acadia, 1610-1613": "Reuben Gold Thwaites (editor)",
    "The Arabian Nights: Their Best-known Tales": "Kate Douglas Wiggin and Nora A. Smith (editors, illustrated by Maxfield Parrish)",
    "The Ancient Irish Epic Tale Táin Bó Cúalnge": "Joseph Dunn (translator)",
    "The book of wonders : $b gives plain and simple answers to the thousands of everyday questions that are asked and which all should be able to, but cannot answer...": "Anonymous",
    "The Philippine Islands, 1493-1803 — Volume 05 of 55: 1582-1583; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Beginning of the Nineteenth Century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "A Cyclopaedia of Canadian Biography: Being Chiefly Men of the Time: A Collection of Persons Distinguished in Professional and Political Life, Leaders in the Commerce and Industry of Canada, and Successful Pioneers": "George Maclean Rose (editor)",
    "The Big Book of Nursery Rhymes": "Walter Jerrold (editor, illustrated by Charles Robinson)",
    "The Philippine Islands, 1493-1898 — Volume 28 of 55: 1637-38; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The Mabinogion": "Lady Charlotte Guest (translator)",
    "A Cyclopædia of Canadian Biography: Brief biographies of persons distinguished in the professional, military and political life, and the commerce and industry of Canada, in the twentieth century": "Various (edited by multiple contributors)",
    "Mr. Punch's Golf Stories": "J. A. Hammerton (compiler)",
    "Cyclopedia of illustrations for public speakers : $b Containing facts, incidents, stories, experiences, anecdotes, selections, etc., for illustrative purposes, with cross-references": "Cyrus Augustine Bartol (compiler)",
    "A guide book of art, architecture, and historic interests in Pennsylvania": "Anna Margaretta Archambault",
    "Narrative and Critical History of America, Vol. 3 (of 8): English Explorations and Settlements in North America 1497-1689": "Justin Winsor (editor)",
    "The Oera Linda Book, from a Manuscript of the Thirteenth Century": "J. G. Ottema (translator)",
    "The Little Red Hen: An Old English Folk Tale": "Florence White Williams (adapter)",
    "The Book of American Negro Poetry": "James Weldon Johnson (editor)",
    "The Bible, Douay-Rheims, Complete": "Bishop Richard Challoner (revisor)",
    "I. Beówulf: an Anglo-Saxon poem. II. The fight at Finnsburh: a fragment.": "James A. Harrison and Robert Sharp (editors)",
    "Northern Nut Growers Association Report of the Proceedings at the 44th Annual Meeting: Rochester, N.Y. August 31 and September 1, 1953": "Northern Nut Growers Association (various contributors)",
    "The Real Mother Goose": "Blanche Fisher Wright (illustrator)",
    "The Philippine Islands, 1493-1898, Volume 35, 1640-1649: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The American Joe Miller: A Collection of Yankee Wit and Humor": "Robert Kempt (compiler)",
    "History of Woman Suffrage, Volume I": "Elizabeth Cady Stanton, Susan B. Anthony, Matilda Joslyn Gage (editors)",
    "Privateering and Piracy in the Colonial Period: Illustrative Documents": "John Franklin Jameson (editor)",
    "The Philippine Islands, 1493-1898, Volume 43, 1670-1700: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 16": "Charles Dudley Warner (editor)",
    "The Middle English Poem, Erthe Upon Erthe": "Anonymous (edited by William W. Skeat)",
    "A Manual of American Literature": "Theodore Stanton (editor)",
    "Buddhist birth stories; or, Jataka tales, Volume 1": "T. W. Rhys Davids (translator)",
    "Eskimo Folk-Tales": "Knud Rasmussen (collector, translated by W. J. Alexander Worster)",
    "The Book of the Thousand Nights and a Night — Volume 03 (of 10)": "Richard Francis Burton (translator)",
    "The Mahabharata of Krishna-Dwaipayana Vyasa Translated into English Prose: Vana Parva, Part 1": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "\"Everyman,\" with other interludes, including eight miracle plays": "Anonymous (edited by Ernest Rhys)",
    "The Philippine Islands, 1493-1898, Volume 52, 1841-1898: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "Hawaiian folk tales : $b a collection of native legends": "Thomas G. Thrum (collector)",
    "The Chinese Fairy Book": "R. Wilhelm (editor, translated by Frederick H. Martens)",
    "The Cavalier Songs and Ballads of England from 1642 to 1684": "Charles Mackay (editor)",
    "The World's Greatest Books — Volume 02 — Fiction": "Arthur Mee and J. A. Hammerton (editors)",
    "The Oxford Book of Latin Verse: From the earliest fragments to the end of the Vth Century A.D.": "H. W. Garrod (editor)",
    "The Ballads and Songs of Yorkshire: Transcribed from Private Manuscripts, Rare Broadsides, and Scarce Publications; with Notes and a Glossary": "C. J. Davison Ingledew (editor)",
    "English Economic History: Select Documents": "Alfred Edward Bland, et al. (editors)",
    "A Century of Parody and Imitation": "Walter Jerrold and R. M. Leonard (editors)",
    "The English and Scottish popular ballads, volume 1 (of 5)": "Francis James Child (editor)",
    "Ten Thousand Wonderful Things: Comprising whatever is marvellous and rare, curious, eccentric and extraordinary in all ages and nations": "Anonymous (edited by I. P. Collins)",
    "Narrative and Critical History of America, Vol. 6 (of 8): The United States of North America, Part I": "Justin Winsor (editor)",
    "The Circle of Knowledge: A Classified, Simplified, Visualized Book of Answers": "Henry W. Ruoff",
    "A Prose English Translation of Harivamsha": "Manmatha Nath Dutt (translator)",
    "Short stories from Life: The 81 prize stories in \"Life's\" Shortest Story Contest": "Thomas L. Masson (editor)",
    "A Catalogue of Books in English Later than 1700, Vol. 2: Forming a portion of the library of Robert Hoe": "Robert Hoe (collector, catalogued by various)",
    "The best British short stories of 1922": "Edward J. O'Brien and John Cournos (editors)",
    "The best short stories of 1922, and the yearbook of the American short story": "Edward J. O'Brien (editor)",
    "Tennyson and his friends": "Hallam Tennyson (editor)",
    "Khaki knitting book": "Olive Whiting",
    "A Catalogue of Books in English Later than 1700, Vol. 1: Forming a portion of the library of Robert Hoe": "Robert Hoe (collector, catalogued by various)",
    "Childhood's Favorites and Fairy Stories: The Young Folks Treasury, Volume 1": "Hamilton Wright Mabie, Edward Everett Hale, William Byron Forbush (editors)",
    "The Little Mother Goose": "Anonymous (illustrated by Jessie Willcox Smith)",
    "Library of the World's Best Mystery and Detective Stories": "Julian Hawthorne (editor)",
    "The best short stories of 1917, and the yearbook of the American short story": "Edward J. O'Brien (editor)",
    "A collection of short-stories": "Lemuel Arthur Pittenger (editor)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 06": "Charles Dudley Warner (editor)",
    "Henley's Twentieth Century Formulas, Recipes and Processes": "Gardner Dexter Hiscox (editor)",
    "Armenia and the Armenians: A List of References in the New York Public Library": "Richard Gottheil",
    "Narrative and Critical History of America, Vol. 5 (of 8): The English and French in North America 1689-1763": "Justin Winsor (editor)",
    "Tales of King Arthur and the Round Table, Adapted from the Book of Romance": "Andrew Lang, et al. (adapters)",
    "The Book of the Thousand Nights and a Night — Volume 04 (of 10)": "Richard Francis Burton (translator)",
    "The Bible, King James Version, Complete": "Various (King James Version translators)",
    "Seventh Annual Report of the Bureau of Ethnology: to the Secretary of the Smithsonian Institution, 1885-1886, Government Printing Office, Washington, 1891": "Bureau of Ethnology (various contributors)",
    "Pennsylvania Dutch Cooking": "Anonymous",
    "The Boy Mechanic, Book 2: 1000 Things for Boys to Do": "H. H. Windsor (editor)",
    "The Wit and Humor of America, Volume II. (of X.)": "Marshall P. Wilder (editor)",
    "Japan: From the Japanese Government History": "Kan'ichi Asakawa (translator)",
    "On the magnet, magnetick bodies also, and on the great magnet the earth: a new physiology, demonstrated by many arguments & experiments": "William Gilbert (author)",
    "The Diamond Sutra (Chin-Kang-Ching) or Prajna-Paramita": "William Gemmell (translator)",
    "The Philippine Islands, 1493-1898, Volume 38, 1674-1683: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The Wit and Humor of America, Volume IX (of X)": "Marshall P. Wilder (editor)",
    "Christopher Columbus and His Monument Columbia: being a concordance of choice tributes to the great Genoese, his grand discovery, and his greatness of mind and purpose": "Anonymous (compiled by various)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 13": "Charles Dudley Warner (editor)",
    "The History of Woman Suffrage, Volume VI": "Ida Husted Harper (editor)",
    "The Colleges of Oxford: Their History and Traditions: XXI Chapters Contributed by Members of the Colleges": "Various (contributed by members)",
    "Celtic Folk and Fairy Tales": "Joseph Jacobs (editor)",
    "Indian Fairy Tales": "Joseph Jacobs (collector)",
    "Anthologica Rarissima: The Way of a Virgin: Being excerpts from rare, curious and diverting books": "Various (edited anonymously)",
    "The Dean of Lismore's Book: A Selection of Ancient Gaelic Poetry": "Thomas Maclauchlan (editor)",
    "Lancelot of the Laik: A Scottish Metrical Romance (About 1490-1500 A. D.)": "Walter W. Skeat (editor)",
    "More toasts: Jokes, stories and quotations": "Marion Dix Mosher (compiler)",
    "Devil Stories: An Anthology": "Maximilian J. Rudwin (editor)",
    "Sagas from the Far East; or, Kalmouk and Mongolian Traditionary Tales": "Rachel Harriette Busk (collector)",
    "The Burlington magazine : $b for connoisseurs. vol. II—June to August": "Various",
    "Tea-Cup Reading and Fortune-Telling by Tea Leaves, by a Highland Seer": "Anonymous (A Highland Seer)",
    "The Veil Lifted: Modern Developments of Spirit Photography": "J. Traill Taylor (author)",
    "Ancient Poems, Ballads, and Songs of the Peasantry of England": "James Maidment and Robert Bell (editors)",
}

# Load dataset
df = pd.read_csv("author_identification_dataset_expanded.csv")

# Filter rows with 'Unknown'
mask_unknown = df['author'].str.strip().str.lower() == "unknown"
unknown_books = df[mask_unknown]

print(f"Found {len(unknown_books)} records with 'Unknown' author.\n")

# Preview replacements
for idx, row in unknown_books.iterrows():
    title = row['title'].strip()
    replacement = resolved_authors.get(title, None)
    if replacement:
        print(f"- Title: {title}")
        print(f"  Current author: Unknown")
        print(f"  → Replacement: {replacement}\n")
    else:
        print(f"- Title: {title}")
        print(f"  Current author: Unknown")
        print(f"  → ⚠️ No mapping found in resolved_authors\n")


for title, true_author in resolved_authors.items():
    mask = (df['author'].str.strip().str.lower() == "unknown") & (df['title'].str.strip() == title)
    df.loc[mask, 'author'] = true_author

remaining = (df['author'].str.lower() == "unknown").sum()
print("\nAfter applying replacements:")
print("Remaining 'Unknown' authors:", remaining)

# === Save cleaned dataset ===
df.to_csv("author_identification_dataset_expanded_fixed.csv", index=False, encoding="utf-8")
print("✅ Saved cleaned dataset as 'author_identification_dataset_expanded_fixed.csv'")


Found 159 records with 'Unknown' author.

- Title: Beowulf: An Anglo-Saxon Epic Poem
  Current author: Unknown
  → Replacement: J. Lesslie Hall (translator)- The lesser Key of Solomon is probably attributed to King Solomon, but often listed as Unknown in Project Gutenberg.

- Title: The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art
  Current author: Unknown
  → Replacement: L. W. de Laurence

- Title: Chambers's Twentieth Century Dictionary (part 1 of 4: A-D)
  Current author: Unknown
  → Replacement: Thomas Davidson

- Title: Manual of Classical Erotology (De figuris Veneris)
  Current author: Unknown
  → Replacement: Friedrich Karl Forberg

- Title: The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3
  Current author: Unknown
  → Replacement: Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)

- Title: The Thousand and O

In [10]:
table_text = """
| Title | Author/Editor |
|-------|--------------|
| Notes and Queries, Number 82, May 24, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc. | Various |
| Golden Days for Boys and Girls, Vol. XII, Jan. 3, 1891 | Various (Editor: James Elverson) |
| Encyclopaedia Britannica, 11th Edition, "Cincinnatus" to "Cleruchy": Volume 6, Slice 4 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Gichtel, Johann" to "Glory": Volume 12, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Coucy-le-Château" to "Crocodile": Volume 7, Slice 6 | Various (Editor: Hugh Chisholm) |
| Webster's Unabridged Dictionary | Noah Webster |
| Encyclopaedia Britannica, 11th Edition, "Bohemia" to "Borgia, Francis": Volume 4, Slice 2 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Basso-relievo" to "Bedfordshire": Volume 3, Slice 4 | Various (Editor: Hugh Chisholm) |
| The New Gresham Encyclopedia. A to Amide: Vol. 1 Part 1 | Various |
| Encyclopaedia Britannica, 11th Edition, "Fenton, Edward" to "Finistere": Volume 10, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Echinoderma" to "Edward, prince of Wales": Volume 8, Slice 10 | Various (Editor: Hugh Chisholm) |
| Notes and Queries, Vol. IV, Number 97, September 6, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc. | Various |
| Encyclopaedia Britannica, 11th Edition, "Lord Chamberlain" to "Luqman": Volume 17, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Bréquigny, Louis Georges Oudard Feudrix de" to "Bulgaria": Volume 4, Part 3 | Various (Editor: Hugh Chisholm) |
| The Antiquarian Magazine & Bibliographer; Vol. 4, July-Dec 1884 | Various (Editor: Edward Walford) |
| Encyclopaedia Britannica, 11th Edition, "Bent, James" to "Bibirine": Volume 3, Slice 6 | Various (Editor: Hugh Chisholm) |
| The Journal of Negro History, Volume 8, 1923 | Various (Editor: Carter G. Woodson) |
| Encyclopaedia Britannica, 11th Edition, "Ethiopia" to "Evangelical Association": Volume 9, Slice 8 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Dübner, Johann Friedrich" to "Dyeing": Volume 8, Slice 8 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Lightfoot, Joseph" to "Liquidation": Volume 16, Slice 6 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Dodwell, Edward" to "Drama": Volume 8, Slice 6 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Bradford, William" to "Brequigny, Louis": Volume 4, Slice 4 | Various (Editor: Hugh Chisholm) |
| The Cumulative Book Review Digest, Volume 1, 1905: Complete in a single alphabet | Various (Similar to Book Review Digest series, edited by various including Mary Katharine Reely) |
| Encyclopaedia Britannica, 11th Edition, "Capefigue" to "Carneades": Volume 5, Slice 3 | Various (Editor: Hugh Chisholm) |
| Cowboy Songs, and Other Frontier Ballads | Various (Collected by John A. Lomax) |
| Encyclopaedia Britannica, 11th Edition, "Baconthorpe" to "Bankruptcy": Volume 3, Part 1, Slice 2 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Cockaigne" to "Columbus, Christopher": Volume 6, Slice 6 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "G" to "Gaskell, Elizabeth": Volume 11, Slice 4 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "David, St" to "Demidov": Volume 7, Slice 10 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Cerargyrite" to "Charing Cross": Volume 5, Slice 7 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Anjar" to "Apollo": Volume 2, Slice 2 | Various (Editor: Hugh Chisholm) |
| Lucifer: A Theosophical Magazine. Volume I. September 1887-February 1888. | Various (Edited by H. P. Blavatsky and Mabel Collins) |
| Encyclopaedia Britannica, 11th Edition, "Austria, Lower" to "Bacon": Volume 3, Part 1, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Edwardes, Sir Herbert Benjamin" to "Ehrenbreitstein": Volume 9, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Demijohn" to "Destructors": Volume 8, Slice 2 | Various (Editor: Hugh Chisholm) |
| The London Mercury, Vol. I, Nos. 1-6, November 1919 to April 1920 | Various (Editor: Sir John Collings Squire) |
| Encyclopaedia Britannica, 11th Edition, "Chitral" to "Cincinnati": Volume 6, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Geoponici" to "Germany": Volume 11, Slice 7 | Various (Editor: Hugh Chisholm) |
| Astounding Stories of Super-Science February 1930 | Various (Editor: Harry Bates) |
| Encyclopaedia Britannica, 11th Edition, "Bulgaria" to "Calgary": Volume 4, Part 4 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Luray Cavern" to "Mackinac Island": Volume 17, Slice 2 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Dinard" to "Dodsworth, Roger": Volume 8, Slice 5 | Various (Editor: Hugh Chisholm) |
| Scientific American, Vol. XXXVII.—No. 2. [New Series.], July 14, 1877: A Weekly Journal of Practical Information, Art, Science, Mechanics, Chemistry, and Manufactures | Various |
| Encyclopaedia Britannica, 11th Edition, "Kite-Flying" to "Kyshtym": Volume 15, Slice 8 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Borgia, Lucrezia" to "Bradford, John": Volume 4, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "England" to "English Finance": Volume 9, Slice 4 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "English Language" to "Epsom Salts": Volume 9, Slice 6 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Hudson River" to "Hurstmonceaux": Volume 13, Slice 8 | Various (Editor: Hugh Chisholm) |
| The New Gresham Encyclopedia. Deposition to Eberswalde: Volume 4, Part 1 | Various |
| Encyclopaedia Britannica, 11th Edition, "Map" to "Mars": Volume 17, Slice 6 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Coquelin, Benoît Constant" to "Costume": Volume 7, Slice 4 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Kelly, Edward" to "Kite": Volume 15, Slice 7 | Various (Editor: Hugh Chisholm) |
| The New Gresham Encyclopedia. Estremoz to Felspar: Volume 4, Part 3 | Various |
| The inter ocean curiosity shop for the year 1883 | Various (Compiled by various contributors; specific editor not identified in search results, but consistent with pattern) |
| Encyclopaedia Britannica, 11th Edition, "Columbus" to "Condottiere": Volume 6, Slice 7 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Helmont, Jean" to "Hernosand": Volume 13, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "McKinley, William" to "Magnetism, Terrestrial": Volume 17, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Calhoun" to "Camoens": Volume 5, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "French Literature" to "Frost, William": Volume 11, Slice 2 | Various (Editor: Hugh Chisholm) |
| The International Monthly, Volume 3, No. 1, April, 1851 | Various |
| The Atlantic Monthly, Volume 17, No. 101, March, 1866: A Magazine of Literature, Science, Art, and Politics | Various |
| Encyclopaedia Britannica, 11th Edition, "Carnegie Andrew" to "Casus Belli": Volume 5, Slice 4 | Various (Editor: Hugh Chisholm) |
| The Book Review Digest, v. 16, 1920 : $b Sixteenth annual accumulation. Reviews of 1920 books | Various (Editor: Mary Katharine Reely) |
| Scientific  American, Volume XXXVI., No. 8, February 24, 1877: A Weekly Journal of Practical Information, Art, Science,; Mechanics, Chemistry, and Manufactures. | Various |
| Encyclopaedia Britannica, 11th Edition, "Gyantse" to "Hallel": Volume 12, Slice 7 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Haller, Albrecht" to "Harmonium": Volume 12, Slice 8 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Bedlam" to "Benson, George": Volume 3, Slice 5 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Greek Law" to "Ground-Squirrel": Volume 12, Slice 5 | Various (Editor: Hugh Chisholm) |
| The journal of the American-Irish Historical Society, Vol. IX, 1910 | Various |
| Encyclopaedia Britannica, 11th Edition, "Latin Language" to "Lefebvre, François-Joseph": Volume 16, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Franciscans" to "French Language": Volume 11, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Fox, George" to "France": Volume 10, Slice 7 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Arundel, Thomas" to "Athens": Volume 2, Slice 7 | Various (Editor: Hugh Chisholm) |
| Scientific  American, Volume XXIV., No. 12,  March 18, 1871: A Weekly Journal of Practical Information, Art, Science,; Mechanics, Chemistry, and Manufactures. | Various |
| Encyclopaedia Britannica, 11th Edition, "Camorra" to "Cape Colony": Volume 5, Slice 2 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Hinduism" to "Home, Earls of": Volume 13, Slice 5 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Gordon, Lord George" to "Grasses": Volume 12, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Evangelical Church Conference" to "Fairbairn, Sir William": Volume 10, Slice 1 | Various (Editor: Hugh Chisholm) |
| The Washington Historical Quarterly, Volume V, 1914 | Various (Edited by Edmond S. Meany) |
| Encyclopaedia Britannica, 11th Edition, "Fairbanks, Erastus" to "Fens": Volume 10, Slice 2 | Various (Editor: Hugh Chisholm) |
| Punch, or the London Charivari, Volume 152, January 24, 1917 | Various |
| The New Gresham Encyclopedia. Atrebates to Bedlis: Vol. 1 Part 3 | Various |
| Encyclopaedia Britannica, 11th Edition, "Celtes, Konrad" to "Ceramics": Volume 5, Slice 6 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "L" to "Lamellibranchia": Volume 16, Slice 1 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Electrostatics" to "Engis": Volume 9, Slice 3 | Various (Editor: Hugh Chisholm) |
| The International Monthly, Volume 4, No. 3, October, 1851 | Various |
| The American Missionary — Volume 49, No. 03, March, 1895 | Various |
| The New Gresham Encyclopedia. Ebert to Estremadura: Volume 4, Part 2 | Various |
| Encyclopaedia Britannica, 11th Edition, "Letter" to "Lightfoot, John": Volume 16, Slice 5 | Various (Editor: Hugh Chisholm) |
| The Journal of Negro History, Volume 7, 1922 | Various (Editor: Carter G. Woodson) |
| The Germ: Thoughts towards Nature in Poetry, Literature and Art | Various (Edited by Dante Gabriel Rossetti; Commentator: William Michael Rossetti) |
| Encyclopaedia Britannica, 11th Edition, "Dagupan" to "David": Volume 7, Slice 9 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Lamennais, Robert de" to "Latini, Brunetto": Volume 16, Slice 2 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Destructors" to "Diameter": Volume 8, Slice 3 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Gloss" to "Gordon, Charles George": Volume 12, Slice 2 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Cat" to "Celt": Volume 5, Slice 5 | Various (Editor: Hugh Chisholm) |
| Current History, Vol. VIII, No. 3, June 1918: A Monthly Magazine of the New York Times | Various |
| Punch, or the London Charivari, Volume 1, Complete | Various |
| Encyclopaedia Britannica, 11th Edition, "Cosway, Richard" to "Coucy, Le Châtelain de": Volume 7, Slice 5 | Various (Editor: Hugh Chisholm) |
| The journal of the American-Irish Historical Society, Vol. VIII, 1909 | Various |
| Astounding Stories, July, 1931 | Various (Editor: Harry Bates) |
| Encyclopaedia Britannica, 11th Edition, "Crocoite" to "Cuba": Volume 7, Slice 7 | Various (Editor: Hugh Chisholm) |
| The Illustrated London Reading Book | Various |
| Encyclopaedia Britannica, 11th Edition, "Diameter" to "Dinarchus": Volume 8, Slice 4 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Chariot" to "Chatelaine": Volume 5, Slice 8 | Various (Editor: Hugh Chisholm) |
| The Newcastle Song Book; or, Tyne-Side Songster: Being a Collection of Comic and Satirical Songs, Descriptive of Eccentric Characters, and the Manners and Customs of a Portion of the Labouring Population of Newcastle and the Neighbourhood | Various |
| The Scrap Book, Volume 1, No. 4: June 1906 | Various |
| Encyclopaedia Britannica, 11th Edition, "Mars" to "Matteawan": Volume 17, Slice 7 | Various (Editor: Hugh Chisholm) |
| Encyclopaedia Britannica, 11th Edition, "Groups, Theory of" to "Gwyniad": Volume 12, Slice 6 | Various (Editor: Hugh Chisholm) 
"""

resolved_authors = {}

for line in table_text.splitlines():
    line = line.strip()
    # Skip header lines
    if not line.startswith("|") or line.startswith("|---") or line.lower().startswith("| title"):
        continue
    parts = [p.strip() for p in line.strip("|").split("|")]
    if len(parts) == 2:
        title, author = parts
        resolved_authors[title] = author

# Print in nice Python dict syntax
print("resolved_authors = {")
for title, author in resolved_authors.items():
    # No escaping double quotes needed if we wrap keys/values in single quotes
    print(f"    '{title}': '{author}',")
print("}")

resolved_authors = {
    'Notes and Queries, Number 82, May 24, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc.': 'Various',
    'Golden Days for Boys and Girls, Vol. XII, Jan. 3, 1891': 'Various (Editor: James Elverson)',
    'Encyclopaedia Britannica, 11th Edition, "Cincinnatus" to "Cleruchy": Volume 6, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Gichtel, Johann" to "Glory": Volume 12, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Coucy-le-Château" to "Crocodile": Volume 7, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Webster's Unabridged Dictionary': 'Noah Webster',
    'Encyclopaedia Britannica, 11th Edition, "Bohemia" to "Borgia, Francis": Volume 4, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Basso-relievo" to "Bedfordshire": Volume 3, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'The Ne

In [14]:
resolved_authors = {
    'Notes and Queries, Number 82, May 24, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc.': 'Various',
    'Golden Days for Boys and Girls, Vol. XII, Jan. 3, 1891': 'Various (Editor: James Elverson)',
    'Encyclopaedia Britannica, 11th Edition, "Cincinnatus" to "Cleruchy": Volume 6, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Gichtel, Johann" to "Glory": Volume 12, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Coucy-le-Château" to "Crocodile": Volume 7, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Websters Unabridged Dictionary': 'Noah Webster',
    'Encyclopaedia Britannica, 11th Edition, "Bohemia" to "Borgia, Francis": Volume 4, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Basso-relievo" to "Bedfordshire": Volume 3, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'The New Gresham Encyclopedia. A to Amide: Vol. 1 Part 1': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Fenton, Edward" to "Finistere": Volume 10, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Echinoderma" to "Edward, prince of Wales": Volume 8, Slice 10': 'Various (Editor: Hugh Chisholm)',
    'Notes and Queries, Vol. IV, Number 97, September 6, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc.': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Lord Chamberlain" to "Luqman": Volume 17, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Bréquigny, Louis Georges Oudard Feudrix de" to "Bulgaria": Volume 4, Part 3': 'Various (Editor: Hugh Chisholm)',
    'The Antiquarian Magazine & Bibliographer; Vol. 4, July-Dec 1884': 'Various (Editor: Edward Walford)',
    'Encyclopaedia Britannica, 11th Edition, "Bent, James" to "Bibirine": Volume 3, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'The Journal of Negro History, Volume 8, 1923': 'Various (Editor: Carter G. Woodson)',
    'Encyclopaedia Britannica, 11th Edition, "Ethiopia" to "Evangelical Association": Volume 9, Slice 8': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Dübner, Johann Friedrich" to "Dyeing": Volume 8, Slice 8': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Lightfoot, Joseph" to "Liquidation": Volume 16, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Dodwell, Edward" to "Drama": Volume 8, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Bradford, William" to "Brequigny, Louis": Volume 4, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'The Cumulative Book Review Digest, Volume 1, 1905: Complete in a single alphabet': 'Various (Similar to Book Review Digest series, edited by various including Mary Katharine Reely)',
    'Encyclopaedia Britannica, 11th Edition, "Capefigue" to "Carneades": Volume 5, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Cowboy Songs, and Other Frontier Ballads': 'Various (Collected by John A. Lomax)',
    'Encyclopaedia Britannica, 11th Edition, "Baconthorpe" to "Bankruptcy": Volume 3, Part 1, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Cockaigne" to "Columbus, Christopher": Volume 6, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "G" to "Gaskell, Elizabeth": Volume 11, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "David, St" to "Demidov": Volume 7, Slice 10': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Cerargyrite" to "Charing Cross": Volume 5, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Anjar" to "Apollo": Volume 2, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Lucifer: A Theosophical Magazine. Volume I. September 1887-February 1888.': 'Various (Edited by H. P. Blavatsky and Mabel Collins)',
    'Encyclopaedia Britannica, 11th Edition, "Austria, Lower" to "Bacon": Volume 3, Part 1, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Edwardes, Sir Herbert Benjamin" to "Ehrenbreitstein": Volume 9, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Demijohn" to "Destructors": Volume 8, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'The London Mercury, Vol. I, Nos. 1-6, November 1919 to April 1920': 'Various (Editor: Sir John Collings Squire)',
    'Encyclopaedia Britannica, 11th Edition, "Chitral" to "Cincinnati": Volume 6, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Geoponici" to "Germany": Volume 11, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Astounding Stories of Super-Science February 1930': 'Various (Editor: Harry Bates)',
    'Encyclopaedia Britannica, 11th Edition, "Bulgaria" to "Calgary": Volume 4, Part 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Luray Cavern" to "Mackinac Island": Volume 17, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Dinard" to "Dodsworth, Roger": Volume 8, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'Scientific American, Vol. XXXVII.—No. 2. [New Series.], July 14, 1877: A Weekly Journal of Practical Information, Art, Science, Mechanics, Chemistry, and Manufactures': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Kite-Flying" to "Kyshtym": Volume 15, Slice 8': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Borgia, Lucrezia" to "Bradford, John": Volume 4, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "England" to "English Finance": Volume 9, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "English Language" to "Epsom Salts": Volume 9, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Hudson River" to "Hurstmonceaux": Volume 13, Slice 8': 'Various (Editor: Hugh Chisholm)',
    'The New Gresham Encyclopedia. Deposition to Eberswalde: Volume 4, Part 1': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Map" to "Mars": Volume 17, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Coquelin, Benoît Constant" to "Costume": Volume 7, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Kelly, Edward" to "Kite": Volume 15, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'The New Gresham Encyclopedia. Estremoz to Felspar: Volume 4, Part 3': 'Various',
    'The inter ocean curiosity shop for the year 1883': 'Various (Compiled by various contributors; specific editor not identified in search results, but consistent with pattern)',
    'Encyclopaedia Britannica, 11th Edition, "Columbus" to "Condottiere": Volume 6, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Helmont, Jean" to "Hernosand": Volume 13, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "McKinley, William" to "Magnetism, Terrestrial": Volume 17, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Calhoun" to "Camoens": Volume 5, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "French Literature" to "Frost, William": Volume 11, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'The International Monthly, Volume 3, No. 1, April, 1851': 'Various',
    'The Atlantic Monthly, Volume 17, No. 101, March, 1866: A Magazine of Literature, Science, Art, and Politics': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Carnegie Andrew" to "Casus Belli": Volume 5, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'The Book Review Digest, v. 16, 1920 : $b Sixteenth annual accumulation. Reviews of 1920 books': 'Various (Editor: Mary Katharine Reely)',
    'Scientific  American, Volume XXXVI., No. 8, February 24, 1877: A Weekly Journal of Practical Information, Art, Science,; Mechanics, Chemistry, and Manufactures.': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Gyantse" to "Hallel": Volume 12, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Haller, Albrecht" to "Harmonium": Volume 12, Slice 8': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Bedlam" to "Benson, George": Volume 3, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Greek Law" to "Ground-Squirrel": Volume 12, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'The journal of the American-Irish Historical Society, Vol. IX, 1910': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Latin Language" to "Lefebvre, François-Joseph": Volume 16, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Franciscans" to "French Language": Volume 11, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Fox, George" to "France": Volume 10, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Arundel, Thomas" to "Athens": Volume 2, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Scientific  American, Volume XXIV., No. 12,  March 18, 1871: A Weekly Journal of Practical Information, Art, Science,; Mechanics, Chemistry, and Manufactures.': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Camorra" to "Cape Colony": Volume 5, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Hinduism" to "Home, Earls of": Volume 13, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Gordon, Lord George" to "Grasses": Volume 12, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Evangelical Church Conference" to "Fairbairn, Sir William": Volume 10, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'The Washington Historical Quarterly, Volume V, 1914': 'Various (Edited by Edmond S. Meany)',
    'Encyclopaedia Britannica, 11th Edition, "Fairbanks, Erastus" to "Fens": Volume 10, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Punch, or the London Charivari, Volume 152, January 24, 1917': 'Various',
    'The New Gresham Encyclopedia. Atrebates to Bedlis: Vol. 1 Part 3': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Celtes, Konrad" to "Ceramics": Volume 5, Slice 6': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "L" to "Lamellibranchia": Volume 16, Slice 1': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Electrostatics" to "Engis": Volume 9, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'The International Monthly, Volume 4, No. 3, October, 1851': 'Various',
    'The American Missionary — Volume 49, No. 03, March, 1895': 'Various',
    'The New Gresham Encyclopedia. Ebert to Estremadura: Volume 4, Part 2': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Letter" to "Lightfoot, John": Volume 16, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'The Journal of Negro History, Volume 7, 1922': 'Various (Editor: Carter G. Woodson)',
    'The Germ: Thoughts towards Nature in Poetry, Literature and Art': 'Various (Edited by Dante Gabriel Rossetti; Commentator: William Michael Rossetti)',
    'Encyclopaedia Britannica, 11th Edition, "Dagupan" to "David": Volume 7, Slice 9': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Lamennais, Robert de" to "Latini, Brunetto": Volume 16, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Destructors" to "Diameter": Volume 8, Slice 3': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Gloss" to "Gordon, Charles George": Volume 12, Slice 2': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Cat" to "Celt": Volume 5, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'Current History, Vol. VIII, No. 3, June 1918: A Monthly Magazine of the New York Times': 'Various',
    'Punch, or the London Charivari, Volume 1, Complete': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Cosway, Richard" to "Coucy, Le Châtelain de": Volume 7, Slice 5': 'Various (Editor: Hugh Chisholm)',
    'The journal of the American-Irish Historical Society, Vol. VIII, 1909': 'Various',
    'Astounding Stories, July, 1931': 'Various (Editor: Harry Bates)',
    'Encyclopaedia Britannica, 11th Edition, "Crocoite" to "Cuba": Volume 7, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'The Illustrated London Reading Book': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Diameter" to "Dinarchus": Volume 8, Slice 4': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Chariot" to "Chatelaine": Volume 5, Slice 8': 'Various (Editor: Hugh Chisholm)',
    'The Newcastle Song Book; or, Tyne-Side Songster: Being a Collection of Comic and Satirical Songs, Descriptive of Eccentric Characters, and the Manners and Customs of a Portion of the Labouring Population of Newcastle and the Neighbourhood': 'Various',
    'The Scrap Book, Volume 1, No. 4: June 1906': 'Various',
    'Encyclopaedia Britannica, 11th Edition, "Mars" to "Matteawan": Volume 17, Slice 7': 'Various (Editor: Hugh Chisholm)',
    'Encyclopaedia Britannica, 11th Edition, "Groups, Theory of" to "Gwyniad": Volume 12, Slice 6': 'Various (Editor: Hugh Chisholm)',
}

# Load dataset
df = pd.read_csv("author_identification_dataset_expanded.csv")

# Filter rows with 'Unknown'
mask_unknown = df['author'].str.strip().str.lower() == "various"
unknown_books = df[mask_unknown]

print(f"Found {len(unknown_books)} records with 'Unknown' author.\n")

# Preview replacements
for idx, row in unknown_books.iterrows():
    title = row['title'].strip()
    replacement = resolved_authors.get(title, None)
    if replacement:
        print(f"- Title: {title}")
        print(f"  Current author: various")
        print(f"  → Replacement: {replacement}\n")
    else:
        print(f"- Title: {title}")
        print(f"  Current author: various")
        print(f"  → ⚠️ No mapping found in resolved_authors\n")

# === Apply replacements for author='Various' ===
for title, true_author in resolved_authors.items():
    mask = (df['author'].str.strip().str.lower() == "various") & (df['title'].str.strip() == title)
    df.loc[mask, 'author'] = true_author

# === Check what's left with plain 'Various' ===
remaining = (df['author'].str.strip().str.lower() == "various").sum()
print("\nAfter applying replacements:")
print("Remaining 'Various' authors:", remaining)

# === Save cleaned dataset ===
df.to_csv("author_identification_dataset_expanded_fixed.csv", index=False, encoding="utf-8")
print("✅ Saved cleaned dataset as 'author_identification_dataset_expanded_fixed.csv'")

Found 109 records with 'Unknown' author.

- Title: Notes and Queries, Number 82, May 24, 1851: A Medium of Inter-communication for Literary Men, Artists, Antiquaries, Genealogists, etc.
  Current author: various
  → Replacement: Various

- Title: Golden Days for Boys and Girls, Vol. XII, Jan. 3, 1891
  Current author: various
  → Replacement: Various (Editor: James Elverson)

- Title: Encyclopaedia Britannica, 11th Edition, "Cincinnatus" to "Cleruchy": Volume 6, Slice 4
  Current author: various
  → Replacement: Various (Editor: Hugh Chisholm)

- Title: Encyclopaedia Britannica, 11th Edition, "Gichtel, Johann" to "Glory": Volume 12, Slice 1
  Current author: various
  → Replacement: Various (Editor: Hugh Chisholm)

- Title: Encyclopaedia Britannica, 11th Edition, "Coucy-le-Château" to "Crocodile": Volume 7, Slice 6
  Current author: various
  → Replacement: Various (Editor: Hugh Chisholm)

- Title: Webster's Unabridged Dictionary
  Current author: various
  → ⚠️ No mapping found in res

In [3]:
import pandas as pd

# Load your cleaned dataset
df = pd.read_csv("author_identification_dataset_expanded_fixed.csv")

print("=== Dataset Overview ===")
print(f"Total records: {len(df)}")
print(f"Total unique authors: {df['author'].nunique()}")
print(f"Total unique titles: {df['title'].nunique()}")

print("\n=== Top 20 Authors by Record Count ===")
author_counts = df['author'].value_counts().head(20)
print(author_counts)

print("\n=== Dataset Info ===")
print(df.info())

print("\n=== Sample of Dataset ===")
print(df.head(10))

=== Dataset Overview ===
Total records: 5483
Total unique authors: 1502
Total unique titles: 5248

=== Top 20 Authors by Record Count ===
author
Unknown                            159
William Wymark Jacobs               97
George Alfred Henty                 89
R M Ballantyne                      88
Nathaniel Hawthorne                 86
William Dean Howells                84
Robert Louis Stevenson              79
Henry James                         72
Various (Editor: Hugh Chisholm)     72
Anthony Trollope                    71
Charles Dickens                     61
Andrew Lang                         60
Charlotte Mary Yonge                60
Bret Harte                          58
Edward Stratemeyer                  58
Sir Arthur Conan Doyle              57
Edward Phillips Oppenheim           53
Henry Rider Haggard                 52
Herbert George Wells                51
Jack London                         48
Name: count, dtype: int64

=== Dataset Info ===
<class 'pandas.core.frame.D

In [4]:
import pandas as pd

# Load your cleaned dataset
df = pd.read_csv("author_identification_dataset_expanded_fixed.csv")

# Filter rows where author is still 'Unknown'
unknown_rows = df[df['author'].str.strip().str.lower() == "unknown"]

print(f"Total records with 'Unknown' author: {len(unknown_rows)}\n")

# Show unique titles (so you know exactly which works are left unmapped)
print("=== Unique Titles with 'Unknown' Author ===")
for title in unknown_rows['title'].unique():
    print("-", title)

# If you want both the count of how many times each 'Unknown' title shows:
print("\n=== Count per Title (Unknown Author) ===")
print(unknown_rows['title'].value_counts())

Total records with 'Unknown' author: 159

=== Unique Titles with 'Unknown' Author ===
- Beowulf: An Anglo-Saxon Epic Poem
- The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art
- Chambers's Twentieth Century Dictionary (part 1 of 4: A-D)
- Manual of Classical Erotology (De figuris Veneris)
- The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3
- The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments
- The King James Version of the Bible
- The Story of Beowulf, Translated from Anglo-Saxon into Modern English Prose
- Doctrina Christiana: The first book printed in the Philippines, Manila, 1593.
- Philippine Folk Tales
- The Egyptian Book of the dead
- The Doré Bible Gallery, Complete: Containing One Hundred Superb Illustrations, and a Page of Explanatory Letter-press Facing Each
- Forty-Eighth Annual Report

In [7]:
# === Load the partially cleaned dataset ===
df = pd.read_csv("author_identification_dataset_expanded_fixed.csv")

resolved_authors = {
    "Beowulf: An Anglo-Saxon Epic Poem": "J. Lesslie Hall (translator)- The lesser Key of Solomon is probably attributed to King Solomon, but often listed as Unknown in Project Gutenberg.",
    "The lesser Key of Solomon, Goetia, the book of evil spirits : $b contains two hundred diagrams and seals for invocation and convocation of spirits, necromancy, witchcraft and black art": "L. W. de Laurence",
    "Chambers's Twentieth Century Dictionary (part 1 of 4: A-D)": "Thomas Davidson",
    "Manual of Classical Erotology (De figuris Veneris)": "Friedrich Karl Forberg",
    "The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 1: Books 1, 2 and 3": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "The Thousand and One Nights, Vol. I.: Commonly Called the Arabian Nights' Entertainments": "Anonymous (edited by Edward Stanley Poole)",
    "The King James Version of the Bible": "Various (King James Version translators)",
    "The Story of Beowulf, Translated from Anglo-Saxon into Modern English Prose": "Ernest J. B. Kirtlan (translator)",
    "Doctrina Christiana: The first book printed in the Philippines, Manila, 1593.": "Anonymous",
    "Philippine Folk Tales": "Clara Kern Bayliss, Berton L. Maxfield, Fletcher Gardner, Laura Watson Benedict, W. H. Millington (collectors)",
    "The Egyptian Book of the dead": "P. Le Page Renouf and Edouard Naville (translators)",
    "The Doré Bible Gallery, Complete: Containing One Hundred Superb Illustrations, and a Page of Explanatory Letter-press Facing Each": "Anonymous (illustrated by Gustave Doré)",
    "Forty-Eighth Annual Report of the Bureau of American Ethnology to the Secretary of the Smithsonian Institution, 1930-1931, Government Printing Office, Washington, 1933.": "Bureau of American Ethnology (edited by various)",
    "The Philippine Islands, 1493-1898 — Volume 07 of 55: 1588-1591; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century": "Emma Helen Blair, James Alexander Robertson, Edward Gaylord Bourne (editors)",
    "Reliques of Ancient English Poetry, Volume 2 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date": "Thomas Percy (editor)",
    "A Polyglot of Foreign Proverbs: Comprising French, German, Dutch, Spanish, Portuguese and Danish, with English Translations and a General Index": "Henry Ellis (compiler)",
    "The Book of the Thousand Nights and a Night — Volume 02 (of 10)": "Richard Francis Burton (translator)",
    "The Song Celestial; Or, Bhagavad-Gîtâ (from the Mahâbhârata): Being a discourse between Arjuna, Prince of India, and the Supreme Being under the form of Krishna": "Sir Edwin Arnold (translator)",
    "Narrative and Critical History of America, Vol. 2 (of 8): Spanish Explorations and Settlements in America from the Fifteenth to the Seventeenth Century": "Justin Winsor (editor)",
    "Reliques of Ancient English Poetry, Volume 1 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date": "Thomas Percy (editor)",
    "The Glories of Ireland": "Joseph Dunn and P. J. Lennox (editors)",
    "The Nursery Rhymes of England": "James Orchard Halliwell (collector)",
    "Pen-portraits of literary women : $b by themselves and others, Volume 2 (of 2)": "Helen Gray Cone (contributor)",
    "Twenty-Five Ghost Stories": "W. Bob Holland (compiler)",
    "The Wit and Humor of America, Volume X (of X)": "Marshall P. Wilder (editor)",
    "The Best American Humorous Short Stories": "Alexander Jessup (editor)",
    "Modern Spanish Lyrics": "Elijah Clarence Hills and S. Griswold Morley (editors)",
    "The Bible, Douay-Rheims, Complete: The Challoner Revision": "Bishop Richard Challoner (revisor)",
    "The Book of the Thousand Nights and a Night — Volume 01 (of 10)": "Richard Francis Burton (translator)",
    "Best Russian Short Stories": "Thomas Seltzer (editor)",
    "The History of Orange County New York": "Russel Headley",
    "A dictionary of English proverbs and proverbial phrases : $b with a copious index of principal words": "Thomas Preston",
    "Dhammapada, a Collection of Verses; Being One of the Canonical Books of the Buddhists": "F. Max Müller (translator)",
    "A Journal of the First Voyage of  Vasco da Gama 1497-1499": "Alvaro Velho (et al.)",
    "Sir Gawayne and the Green Knight: An Alliterative Romance-Poem (c. 1360 A.D.)": "Anonymous (edited by Sir Frederick Madden)",
    "Mother Goose's Nursery Rhymes: A Collection of Alphabets, Rhymes, Tales, and Jingles": "Walter Crane (editor)",
    "The Philippine Islands, 1493-1898 - Volume 40 of 55, 1690-1691: Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century": "Emma Helen Blair, James Alexander Robertson (editors)",
    "Familiar Quotations: A Collection of Passages, Phrases, and Proverbs Traced to; Their Sources in Ancient and Modern Literature": "John Bartlett",
    "A Hundred and Seventy Chinese Poems": "Arthur Waley (translator)",
    "The best short stories of 1920, and the yearbook of the American short story": "Edward J. O'Brien (editor)",
    "Poems Every Child Should Know": "Mary E. Burt (editor)",
    "Kalevala : the Epic Poem of Finland — Complete": "John Martin Crawford (translator)",
    "The Nibelungenlied: Translated into Rhymed English Verse in the Metre of the Original": "George Henry Needler (translator)",
    "Filipino Popular Tales": "Dean Spruill Fansler (collector)",
    "Poems Every Child Should Know: The What-Every-Child-Should-Know-Library": "Mary E. Burt (editor)",
    "Famous Modern Ghost Stories": "Dorothy Scarborough (editor)",
    "The Mahabharata of Krishna-Dwaipayana Vyasa, Volume 2: Books 4, 5, 6 and 7": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "The Anglo-Saxon Chronicle": "Anonymous (translated by James Ingram)",
    "The Boy Mechanic, Volume 1: 700 Things for Boys to Do": "H. H. Windsor (editor)",
    "Reliques of Ancient English Poetry, Volume 3 (of 3): Consisting of Old Heroic Ballads, Songs and Other Pieces of Our Earlier Poets Together With Some Few of Later Date": "Thomas Percy (editor)",
    "The New Testament of our Lord and Savior Jesus Christ.: The common English version, corrected by the final committee of the American Bible Union.": "American Bible Union (committee)",
    "Wine, Women, and Song: Mediaeval Latin Students' songs; Now first translated into English verse": "John Addington Symonds (translator)",
    "The Institutes of Justinian": "Caesar Flavius Justinian (translated by J.B. Moyle)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 09": "Charles Dudley Warner (editor)",
    "Russian Fairy Tales: A Choice Collection of Muscovite Folk-lore": "Anonymous (translated by Nora Kershaw)",
    "Poems of American History": "Burton Egbert Stevenson (compiler)",
    "Little Masterpieces of American Wit and Humor, Volume I": "Thomas L. Masson (editor)",
    "The Book of the Thousand Nights and a Night — Volume 10 (of 10)": "Richard Francis Burton (translator)",
    "Prayers of the Early Church": "J. Manning Potts (editor)",
    "Fairy and Folk Tales of the Irish Peasantry": "William Butler Yeats (editor)",
    "The Jargon File, Version 4.2.2, 20 Aug 2000": "Eric S. Raymond (compiler)",
    "West African Folk-Tales": "W. H. Barker and Cecilia Sinclair (collectors)",
    "Humour, Wit, & Satire of the Seventeenth Century": "John Ashton (editor)",
    "The Story of the Volsungs (Volsunga Saga); with Excerpts from the Poetic Edda": "Anonymous (translated by Eirikr Magnusson and William Morris)",
    "English Fairy Tales": "Joseph Jacobs (collector)",
    "Jane's All the World's Aircraft. 1913": "Fred T. Jane (editor)",
    "The Philippine Islands, 1493-1898; Volume 46, 1721-1739: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The Jesuit Relations and Allied Documents, Vol. 1: Acadia, 1610-1613": "Reuben Gold Thwaites (editor)",
    "The Arabian Nights: Their Best-known Tales": "Kate Douglas Wiggin and Nora A. Smith (editors, illustrated by Maxfield Parrish)",
    "The Ancient Irish Epic Tale Táin Bó Cúalnge": "Joseph Dunn (translator)",
    "The book of wonders : $b gives plain and simple answers to the thousands of everyday questions that are asked and which all should be able to, but cannot answer...": "Anonymous",
    "The Philippine Islands, 1493-1803 — Volume 05 of 55: 1582-1583; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Beginning of the Nineteenth Century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "A Cyclopaedia of Canadian Biography: Being Chiefly Men of the Time: A Collection of Persons Distinguished in Professional and Political Life, Leaders in the Commerce and Industry of Canada, and Successful Pioneers": "George Maclean Rose (editor)",
    "The Big Book of Nursery Rhymes": "Walter Jerrold (editor, illustrated by Charles Robinson)",
    "The Philippine Islands, 1493-1898 — Volume 28 of 55: 1637-38; Explorations by Early Navigators, Descriptions of the Islands and Their Peoples, Their History and Records of the Catholic Missions, as Related in Contemporaneous Books and Manuscripts, Showing the Political, Economic, Commercial and Religious Conditions of Those Islands from Their Earliest Relations with European Nations to the Close of the Nineteenth Century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The Mabinogion": "Lady Charlotte Guest (translator)",
    "A Cyclopædia of Canadian Biography: Brief biographies of persons distinguished in the professional, military and political life, and the commerce and industry of Canada, in the twentieth century": "Various (edited by multiple contributors)",
    "Mr. Punch's Golf Stories": "J. A. Hammerton (compiler)",
    "Cyclopedia of illustrations for public speakers : $b Containing facts, incidents, stories, experiences, anecdotes, selections, etc., for illustrative purposes, with cross-references": "Cyrus Augustine Bartol (compiler)",
    "A guide book of art, architecture, and historic interests in Pennsylvania": "Anna Margaretta Archambault",
    "Narrative and Critical History of America, Vol. 3 (of 8): English Explorations and Settlements in North America 1497-1689": "Justin Winsor (editor)",
    "The Oera Linda Book, from a Manuscript of the Thirteenth Century": "J. G. Ottema (translator)",
    "The Little Red Hen: An Old English Folk Tale": "Florence White Williams (adapter)",
    "The Book of American Negro Poetry": "James Weldon Johnson (editor)",
    "The Bible, Douay-Rheims, Complete": "Bishop Richard Challoner (revisor)",
    "I. Beówulf: an Anglo-Saxon poem. II. The fight at Finnsburh: a fragment.": "James A. Harrison and Robert Sharp (editors)",
    "Northern Nut Growers Association Report of the Proceedings at the 44th Annual Meeting: Rochester, N.Y. August 31 and September 1, 1953": "Northern Nut Growers Association (various contributors)",
    "The Real Mother Goose": "Blanche Fisher Wright (illustrator)",
    "The Philippine Islands, 1493-1898, Volume 35, 1640-1649: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The American Joe Miller: A Collection of Yankee Wit and Humor": "Robert Kempt (compiler)",
    "History of Woman Suffrage, Volume I": "Elizabeth Cady Stanton, Susan B. Anthony, Matilda Joslyn Gage (editors)",
    "Privateering and Piracy in the Colonial Period: Illustrative Documents": "John Franklin Jameson (editor)",
    "The Philippine Islands, 1493-1898, Volume 43, 1670-1700: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 16": "Charles Dudley Warner (editor)",
    "The Middle English Poem, Erthe Upon Erthe": "Anonymous (edited by William W. Skeat)",
    "A Manual of American Literature": "Theodore Stanton (editor)",
    "Buddhist birth stories; or, Jataka tales, Volume 1": "T. W. Rhys Davids (translator)",
    "Eskimo Folk-Tales": "Knud Rasmussen (collector, translated by W. J. Alexander Worster)",
    "The Book of the Thousand Nights and a Night — Volume 03 (of 10)": "Richard Francis Burton (translator)",
    "The Mahabharata of Krishna-Dwaipayana Vyasa Translated into English Prose: Vana Parva, Part 1": "Krishna-Dwaipayana Vyasa (translated by Kisari Mohan Ganguli)",
    "\"Everyman,\" with other interludes, including eight miracle plays": "Anonymous (edited by Ernest Rhys)",
    "The Philippine Islands, 1493-1898, Volume 52, 1841-1898: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "Hawaiian folk tales : $b a collection of native legends": "Thomas G. Thrum (collector)",
    "The Chinese Fairy Book": "R. Wilhelm (editor, translated by Frederick H. Martens)",
    "The Cavalier Songs and Ballads of England from 1642 to 1684": "Charles Mackay (editor)",
    "The World's Greatest Books — Volume 02 — Fiction": "Arthur Mee and J. A. Hammerton (editors)",
    "The Oxford Book of Latin Verse: From the earliest fragments to the end of the Vth Century A.D.": "H. W. Garrod (editor)",
    "The Ballads and Songs of Yorkshire: Transcribed from Private Manuscripts, Rare Broadsides, and Scarce Publications; with Notes and a Glossary": "C. J. Davison Ingledew (editor)",
    "English Economic History: Select Documents": "Alfred Edward Bland, et al. (editors)",
    "A Century of Parody and Imitation": "Walter Jerrold and R. M. Leonard (editors)",
    "The English and Scottish popular ballads, volume 1 (of 5)": "Francis James Child (editor)",
    "Ten Thousand Wonderful Things: Comprising whatever is marvellous and rare, curious, eccentric and extraordinary in all ages and nations": "Anonymous (edited by I. P. Collins)",
    "Narrative and Critical History of America, Vol. 6 (of 8): The United States of North America, Part I": "Justin Winsor (editor)",
    "The Circle of Knowledge: A Classified, Simplified, Visualized Book of Answers": "Henry W. Ruoff",
    "A Prose English Translation of Harivamsha": "Manmatha Nath Dutt (translator)",
    "Short stories from Life: The 81 prize stories in \"Life's\" Shortest Story Contest": "Thomas L. Masson (editor)",
    "A Catalogue of Books in English Later than 1700, Vol. 2: Forming a portion of the library of Robert Hoe": "Robert Hoe (collector, catalogued by various)",
    "The best British short stories of 1922": "Edward J. O'Brien and John Cournos (editors)",
    "The best short stories of 1922, and the yearbook of the American short story": "Edward J. O'Brien (editor)",
    "Tennyson and his friends": "Hallam Tennyson (editor)",
    "Khaki knitting book": "Olive Whiting",
    "A Catalogue of Books in English Later than 1700, Vol. 1: Forming a portion of the library of Robert Hoe": "Robert Hoe (collector, catalogued by various)",
    "Childhood's Favorites and Fairy Stories: The Young Folks Treasury, Volume 1": "Hamilton Wright Mabie, Edward Everett Hale, William Byron Forbush (editors)",
    "The Little Mother Goose": "Anonymous (illustrated by Jessie Willcox Smith)",
    "Library of the World's Best Mystery and Detective Stories": "Julian Hawthorne (editor)",
    "The best short stories of 1917, and the yearbook of the American short story": "Edward J. O'Brien (editor)",
    "A collection of short-stories": "Lemuel Arthur Pittenger (editor)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 06": "Charles Dudley Warner (editor)",
    "Henley's Twentieth Century Formulas, Recipes and Processes": "Gardner Dexter Hiscox (editor)",
    "Armenia and the Armenians: A List of References in the New York Public Library": "Richard Gottheil",
    "Narrative and Critical History of America, Vol. 5 (of 8): The English and French in North America 1689-1763": "Justin Winsor (editor)",
    "Tales of King Arthur and the Round Table, Adapted from the Book of Romance": "Andrew Lang, et al. (adapters)",
    "The Book of the Thousand Nights and a Night — Volume 04 (of 10)": "Richard Francis Burton (translator)",
    "The Bible, King James Version, Complete": "Various (King James Version translators)",
    "Seventh Annual Report of the Bureau of Ethnology: to the Secretary of the Smithsonian Institution, 1885-1886, Government Printing Office, Washington, 1891": "Bureau of Ethnology (various contributors)",
    "Pennsylvania Dutch Cooking": "Anonymous",
    "The Boy Mechanic, Book 2: 1000 Things for Boys to Do": "H. H. Windsor (editor)",
    "The Wit and Humor of America, Volume II. (of X.)": "Marshall P. Wilder (editor)",
    "Japan: From the Japanese Government History": "Kan'ichi Asakawa (translator)",
    "On the magnet, magnetick bodies also, and on the great magnet the earth: a new physiology, demonstrated by many arguments & experiments": "William Gilbert (author)",
    "The Diamond Sutra (Chin-Kang-Ching) or Prajna-Paramita": "William Gemmell (translator)",
    "The Philippine Islands, 1493-1898, Volume 38, 1674-1683: Explorations by early navigators, descriptions of the islands and their peoples, their history and records of the Catholic missions, as related in contemporaneous books and manuscripts, showing the political, economic, commercial and religious conditions of those islands from their earliest relations with European nations to the close of the nineteenth century": "Emma Helen Blair and James Alexander Robertson (editors)",
    "The Wit and Humor of America, Volume IX (of X)": "Marshall P. Wilder (editor)",
    "Christopher Columbus and His Monument Columbia: being a concordance of choice tributes to the great Genoese, his grand discovery, and his greatness of mind and purpose": "Anonymous (compiled by various)",
    "Library of the World's Best Literature, Ancient and Modern — Volume 13": "Charles Dudley Warner (editor)",
    "The History of Woman Suffrage, Volume VI": "Ida Husted Harper (editor)",
    "The Colleges of Oxford: Their History and Traditions: XXI Chapters Contributed by Members of the Colleges": "Various (contributed by members)",
    "Celtic Folk and Fairy Tales": "Joseph Jacobs (editor)",
    "Indian Fairy Tales": "Joseph Jacobs (collector)",
    "Anthologica Rarissima: The Way of a Virgin: Being excerpts from rare, curious and diverting books": "Various (edited anonymously)",
    "The Dean of Lismore's Book: A Selection of Ancient Gaelic Poetry": "Thomas Maclauchlan (editor)",
    "Lancelot of the Laik: A Scottish Metrical Romance (About 1490-1500 A. D.)": "Walter W. Skeat (editor)",
    "More toasts: Jokes, stories and quotations": "Marion Dix Mosher (compiler)",
    "Devil Stories: An Anthology": "Maximilian J. Rudwin (editor)",
    "Sagas from the Far East; or, Kalmouk and Mongolian Traditionary Tales": "Rachel Harriette Busk (collector)",
    "The Burlington magazine : $b for connoisseurs. vol. II—June to August": "Various",
    "Tea-Cup Reading and Fortune-Telling by Tea Leaves, by a Highland Seer": "Anonymous (A Highland Seer)",
    "The Veil Lifted: Modern Developments of Spirit Photography": "J. Traill Taylor (author)",
    "Ancient Poems, Ballads, and Songs of the Peasantry of England": "James Maidment and Robert Bell (editors)",
}

# === Normalize function (to unify punctuation, spaces, etc.) ===
def normalize_title(text: str) -> str:
    return (
        text.strip()
            .replace("’", "'")   # fancy apostrophe → straight
            .replace("‘", "'")
            .replace("—", "-")   # em-dash → hyphen
            .replace("–", "-")   # en-dash → hyphen
            .replace("‐", "-")   # weird hyphen variant
    )

# Build normalized dictionary
resolved_normalized = {normalize_title(k): v for k, v in resolved_authors.items()}

# === Apply replacements for rows with 'Unknown' authors ===
for idx, row in df[df['author'].str.strip().str.lower() == "unknown"].iterrows():
    norm_title = normalize_title(row['title'])
    if norm_title in resolved_normalized:
        df.at[idx, 'author'] = resolved_normalized[norm_title]

# === Check how many "Unknown" remain ===
remaining = (df['author'].str.strip().str.lower() == "unknown").sum()
print("\nAfter applying replacements (normalized):")
print("Remaining 'Unknown' authors:", remaining)

# === Save the re‑fixed dataset ===
df.to_csv("author_identification_dataset_expanded_fixed.csv", index=False, encoding="utf-8")
print("✅ Updated dataset saved again as 'author_identification_dataset_expanded_fixed.csv'")


After applying replacements (normalized):
Remaining 'Unknown' authors: 0
✅ Updated dataset saved again as 'author_identification_dataset_expanded_fixed.csv'


In [8]:
import pandas as pd

# Load your cleaned dataset
df = pd.read_csv("author_identification_dataset_expanded_fixed.csv")

print("=== Dataset Overview ===")
print(f"Total records: {len(df)}")
print(f"Total unique authors: {df['author'].nunique()}")
print(f"Total unique titles: {df['title'].nunique()}")

print("\n=== Top 20 Authors by Record Count ===")
author_counts = df['author'].value_counts().head(20)
print(author_counts)

print("\n=== Dataset Info ===")
print(df.info())

print("\n=== Sample of Dataset ===")
print(df.head(10))

=== Dataset Overview ===
Total records: 5483
Total unique authors: 1625
Total unique titles: 5248

=== Top 20 Authors by Record Count ===
author
William Wymark Jacobs              97
George Alfred Henty                89
R M Ballantyne                     88
Nathaniel Hawthorne                86
William Dean Howells               84
Robert Louis Stevenson             79
Various (Editor: Hugh Chisholm)    72
Henry James                        72
Anthony Trollope                   71
Charles Dickens                    61
Charlotte Mary Yonge               60
Andrew Lang                        60
Bret Harte                         58
Edward Stratemeyer                 58
Sir Arthur Conan Doyle             57
Edward Phillips Oppenheim          53
Henry Rider Haggard                52
Herbert George Wells               51
Jack London                        48
Jacob Abbott                       47
Name: count, dtype: int64

=== Dataset Info ===
<class 'pandas.core.frame.DataFrame'>
RangeInde

In [9]:
# Calculate character lengths for each text
df["char_count"] = df["text"].apply(len)

# Summary of character lengths
print("\n=== CHARACTER COUNT SUMMARY ===")
print(f"Total records: {len(df)}")
print(f"Average characters per record: {df['char_count'].mean():,.0f}")
print(f"Median characters per record: {df['char_count'].median():,.0f}")
print(f"Minimum characters: {df['char_count'].min():,}")
print(f"Maximum characters: {df['char_count'].max():,}")

# See distribution of character lengths
print("\n=== CHARACTER COUNT DISTRIBUTION HEAD ===")
print(df['char_count'].describe(percentiles=[.25, .5, .75, .9, .95, .99]))


=== CHARACTER COUNT SUMMARY ===
Total records: 5483
Average characters per record: 556,717
Median characters per record: 415,557
Minimum characters: 641
Maximum characters: 27,260,077

=== CHARACTER COUNT DISTRIBUTION HEAD ===
count    5.483000e+03
mean     5.567172e+05
std      7.091880e+05
min      6.410000e+02
25%      1.885010e+05
50%      4.155570e+05
75%      7.235500e+05
90%      1.112660e+06
95%      1.467559e+06
99%      2.737460e+06
max      2.726008e+07
Name: char_count, dtype: float64
