In [18]:
import gutenbergpy.textget
from gutenbergpy.gutenbergcache import GutenbergCache
import numpy as np
import pandas as pd

In [2]:
cache = None
try:
    cache = GutenbergCache.get_cache()
except:
    GutenbergCache.create()
    cache = GutenbergCache.get_cache()

In [3]:
authors_list = cache.native_query("""
SELECT author.name
FROM books book
JOIN book_authors book_author ON book_author.bookid = book.id
JOIN authors author ON author.id = book_author.authorid
""").fetchall()
for author in set(np.array(authors_list).flatten()):
    if "Tolstoy" in author:
        print(author)

Tolstoy, Aleksey Konstantinovich, graf
Tolstoy van Aylde-Jonghe, Elzélina
Tolstoy, Leo, graf
Tolstoy, Sophie Andreevna
Tolstoy, Alexis Konstantinovich, graf
Tolstoy, A. K. (Aleksey Konstantinovich), graf
Tolstoy, Graf Leo
Tolstoy, Lev N.
Tolstoy, Alexandra, Countess
Tolstoy, Alexei Konstantinovich, graf
Tolstoy, Ilya, Count
Tolstoy, Sonya
Tolstoy, Aleksei Konstantinovich, Count


In [4]:
books = cache.native_query("""
SELECT title.name, book.gutenbergbookid
FROM books book
JOIN book_authors book_author ON book_author.bookid = book.id
JOIN authors author ON author.id = book_author.authorid
JOIN titles title ON title.bookid = book.id
WHERE author.name = 'Tolstoy, Graf Leo'
""").fetchall()
books

[('Der lebende Leichnam: Drama in sechs Akten (zwölf Bildern)', 46086),
 ('Kasakat: Kaukasialainen kertomus', 50172),
 ("La Pensée de l'Humanité\nDernière oeuvre de L. Tolstoï", 43761),
 ("'The Kingdom of God Is Within You'\r\nChristianity Not as a Mystic Religion but as a New Theory of Life",
  43302),
 ('What Shall We Do?', 38690),
 ('Sevastopol', 47197),
 ('My Religion', 43794),
 ('Anna Karenina', 13214),
 ('War and Peace, Book 01: 1805', 28920),
 ('What Men Live By, and Other Tales', 6157),
 ('La guerre et la paix, Tome III', 17951),
 ('Katia', 44266),
 ('Isäntä ja renki', 52350),
 ('Kreuzer-sonaatti', 51573),
 ('La guerre et la paix, Tome II', 17950),
 ('The Bobbsey Twins at Cedar Camp', 67224),
 ('The inverted pyramid', 689),
 ('Reconstruction and the Constitution, 1866-1876', 17552),
 ('The Days of Bruce: A Story from Scottish History. Vol. 1', 243),
 ('Emerson Radio Model 39 Warranty Card', 41119),
 ("Heath's Modern Language Series: La Mère de la Marquise", 49522),
 ('Greetings

In [14]:
def safe_get_text_by_id(book_id):
    try:
        return gutenbergpy.textget.get_text_by_id(book_id)
    except Exception as e:
        return None

In [15]:
texts = [(title, text) for title, book_id in books if (text := safe_get_text_by_id(book_id)) is not None]

In [28]:
texts_df = pd.DataFrame(texts, columns=['title','text'])
texts_df

Unnamed: 0,title,text
0,Der lebende Leichnam: Drama in sechs Akten (zw...,b'The Project Gutenberg EBook of Der lebende L...
1,Kasakat: Kaukasialainen kertomus,"b'The Project Gutenberg EBook of Kasakat, by L..."
2,La Pensée de l'Humanité\nDernière oeuvre de L....,"b""*** START OF THE PROJECT GUTENBERG EBOOK 437..."
3,'The Kingdom of God Is Within You'\r\nChristia...,b'Project Gutenberg\'s The Kingdom of God is W...
4,What Shall We Do?,b'The Project Gutenberg EBook of What Shall We...
...,...,...
94,Modern Woman: Her Intentions,b'Project Gutenberg\'s Where Love Is There God...
95,A Week's Tramp in Dickens-Land\r\nTogether wit...,"b'The Project Gutenberg eBook, Thoughts Evoked..."
96,Essays in Experimental Logic,b'The Project Gutenberg EBook of Herr und Knec...
97,Young Jack Harkaway Fighting the Pirates of th...,b'The Project Gutenberg eBook of Fables for Ch...


In [29]:
texts_df.to_csv('text.csv')