In [13]:
import feedparser
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

def parse_datetime(s):
    """Parse a datetime string, return None if empty"""
    return datetime.strptime(s, '%a, %d %b %Y %H:%M:%S %z') if s else None

def parse_entry(entry):
    """Parses a single entry from a Goodreads RSS feed into a dictionary."""
    book_info = {
        'Title': entry['title'],
        'Author': entry['author_name'],
        'Publication Year': entry['book_published'],
        'User Rating': entry['user_rating'],
        'Average Rating': entry['average_rating'],
        'Book ID': entry['book_id'],
        'ISBN': entry['isbn'],
        'Number of Pages': entry['num_pages'],
        'User Status': entry['user_shelves'],
        'Book Image URL': entry['book_image_url'],
        'Book Description': BeautifulSoup(entry['book_description'], 'html.parser').get_text(),
        'Book Large Image URL': entry['book_large_image_url'],
        'ID': entry['id'],
        'Link': entry['link'],
        'Published': parse_datetime(entry['published']),
        'User Date Added': parse_datetime(entry['user_date_added']),
        'User Date Created': parse_datetime(entry['user_date_created']),
        'User Read At': parse_datetime(entry['user_read_at']),
        'User Review': '' if pd.isnull(entry['user_review']) else entry['user_review'],
        'User Shelves': entry['user_shelves'],
    }
    return book_info


def get_goodreads_books(user_id):
    """Fetches books from a Goodreads user's RSS feed and returns them as a DataFrame."""
    url = f"https://www.goodreads.com/review/list_rss/{user_id}?shelf=%23ALL%23"
    feed = feedparser.parse(url)
    books = [parse_entry(entry) for entry in feed.entries]
    return pd.DataFrame(books)

In [15]:
# soli's goodreads user id
df = get_goodreads_books('78274842')
df.to_csv('./books/soli.csv', index=False)

In [16]:
import pandas as pd 
books = pd.read_csv('./books/soli.csv')

In [17]:
books.to_dict('records')[0]

{'Title': 'The Book of Form and Emptiness',
 'Author': 'Ruth Ozeki',
 'Publication Year': 2021.0,
 'User Rating': 0,
 'Average Rating': 4.04,
 'Book ID': 57004637,
 'ISBN': '0399563644',
 'Number of Pages': 548.0,
 'User Status': 'to-read',
 'Book Image URL': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1623850374l/57004637._SY75_.jpg',
 'Book Description': '\nA brilliantly inventive new novel about loss, growing up, and our relationship with things, by the Booker Prize-finalist author of A Tale for the Time Being\nAfter the tragic death of his beloved musician father, fourteen-year-old Benny Oh begins to hear voices. The voices belong to the things in his house--a sneaker, a broken Christmas ornament, a piece of wilted lettuce. Although Benny doesn\'t understand what these things are saying, he can sense their emotional tone; some are pleasant, a gentle hum or coo, but others are snide, angry and full of pain. When his mother, Annabelle, develops a hoarding p