# Display Sample Records

In [1]:
import gzip
import json
import re
import os
import sys
import numpy as np
import pandas as pd

**Specify your directory here:**

In [2]:
DIR = './'

**This function shows how to load datasets**

In [3]:
def load_data(file_name, head = 500):
    count = 0
    data = []
    with gzip.open(file_name) as fin:
        for l in fin:
            d = json.loads(l)
            count += 1
            data.append(d)
            
            # break if reaches the 100th line
            if (head is not None) and (count > head):
                break
    return data

**Load and display sample records of books/authors/works/series**

In [4]:
books = load_data(os.path.join(DIR, 'goodreads_books.json.gz'))
authors = load_data(os.path.join(DIR, 'goodreads_book_authors.json.gz'))
works = load_data(os.path.join(DIR, 'goodreads_book_works.json.gz'))
series = load_data(os.path.join(DIR, 'goodreads_book_series.json.gz'))

In [5]:
print(' == sample record (books) ==')
display(np.random.choice(books))
print(' == sample record (authors) ==')
display(np.random.choice(authors))
print(' == sample record (works) ==')
display(np.random.choice(works))
print(' == sample record (series) ==')
display(np.random.choice(series))

 == sample record (books) ==


{'isbn': '1591935857',
 'text_reviews_count': '4',
 'series': [],
 'country_code': 'US',
 'language_code': '',
 'popular_shelves': [{'count': '2', 'name': 'picture-books'},
  {'count': '2', 'name': 'ducks'},
  {'count': '1', 'name': 'online-reading-in-the-stacks'},
  {'count': '1', 'name': 'nature'},
  {'count': '1', 'name': 'children-books'},
  {'count': '1', 'name': 'animal-books'},
  {'count': '1', 'name': '19709'},
  {'count': '1', 'name': '17909-books'},
  {'count': '1', 'name': 'to-read'},
  {'count': '1', 'name': 'outreach-books'},
  {'count': '1', 'name': 'books-for-teaching'},
  {'count': '1', 'name': 'picture-books-read'},
  {'count': '1', 'name': 'photographs'},
  {'count': '1', 'name': 'birds'},
  {'count': '1', 'name': 'ald_neighborhood-animals'}],
 'asin': '',
 'is_ebook': 'false',
 'average_rating': '4.29',
 'kindle_asin': '',
 'similar_books': [],
 'description': '',
 'format': 'Hardcover',
 'link': 'https://www.goodreads.com/book/show/27036533-jump-little-wood-ducks',


 == sample record (authors) ==


{'average_rating': '3.51',
 'author_id': '2943855',
 'text_reviews_count': '634',
 'name': 'Kat Menschik',
 'ratings_count': '4599'}

 == sample record (works) ==


{'books_count': '2',
 'reviews_count': '33',
 'original_publication_month': '',
 'default_description_language_code': '',
 'text_reviews_count': '4',
 'best_book_id': '378460',
 'original_publication_year': '',
 'original_title': 'The Wanting of Levine',
 'rating_dist': '5:7|4:4|3:2|2:0|1:0|total:13',
 'default_chaptering_book_id': '',
 'original_publication_day': '',
 'original_language_id': '',
 'ratings_count': '13',
 'media_type': '',
 'ratings_sum': '57',
 'work_id': '368291'}

 == sample record (series) ==


{'numbered': 'true',
 'note': '',
 'description': 'War Stories was a comic book series written by Garth Ennis.',
 'title': 'War Stories',
 'series_works_count': '5',
 'series_id': '834955',
 'primary_work_count': '4'}

**Load and display sample records of user-book interactions (shelves)**

In [6]:
interactions = load_data(os.path.join(DIR, 'goodreads_interactions_dedup.json.gz'))
np.random.choice(interactions)

{'user_id': '8842281e1d1347389f2ab93d60773d4d',
 'book_id': '6565837',
 'review_id': 'c6c803a462ea21452ffc35b46093ada8',
 'is_read': False,
 'rating': 0,
 'review_text_incomplete': '',
 'date_added': 'Thu Aug 17 15:15:28 -0700 2017',
 'date_updated': 'Thu Aug 17 15:15:35 -0700 2017',
 'read_at': '',
 'started_at': ''}

**Load and display sample records of book reviews**

In [7]:
reviews = load_data(os.path.join(DIR, 'goodreads_reviews_dedup.json.gz'))
np.random.choice(reviews)

{'user_id': '8842281e1d1347389f2ab93d60773d4d',
 'book_id': '18245960',
 'review_id': 'dfdbb7b0eb5a7e4c26d59a937e2e5feb',
 'rating': 5,
 'review_text': 'This is a special book. It started slow for about the first third, then in the middle third it started to get interesting, then the last third blew my mind. This is what I love about good science fiction - it pushes your thinking about where things can go. \n It is a 2015 Hugo winner, and translated from its original Chinese, which made it interesting in just a different way from most things I\'ve read. For instance the intermixing of Chinese revolutionary history - how they kept accusing people of being "reactionaries", etc. \n It is a book about science, and aliens. The science described in the book is impressive - its a book grounded in physics and pretty accurate as far as I could tell. Though when it got to folding protons into 8 dimensions I think he was just making stuff up - interesting to think about though. \n But what would 

**Load and display sample records of book reviews (with spoiler tags)**

In [8]:
spoilers = load_data(os.path.join(DIR, 'goodreads_reviews_spoiler.json.gz'))
np.random.choice(spoilers)

{'user_id': '01ec1a320ffded6b2dd47833f2c8e4fb',
 'timestamp': '2016-01-14',
 'review_sentences': [[0, '3.5 - 4 Stars'],
  [0,
   'This is the very sexy and very sweet sequel to the short story, Hearts in Darkness.'],
  [0,
   'This book picks up shortly after Caden and Makenna first met in the darkened elevator.'],
  [0,
   'They have developed an almost-perfect relationship - and this book is verra verra steamy - but Caden has severe PTSD due to a very shocking incident in his past that has completely traumatized him.'],
  [0,
   "This is one of those stories where the hero doesn't think he is good enough for the heroine - she is smart, kind, beautiful, has a wonderful family, etc."],
  [0, '- so the hero must distance himself from her "for her own good."'],
  [0,
   'Makenna loves Caden but realizes that he is gun-shy and she is scared to make her true feelings known.'],
  [0, 'When she does, all hell breaks loose.'],
  [0,
   'This is a lovely, steamy story with lots of angst and a 

In [9]:
spoilers = load_data(os.path.join(DIR, 'goodreads_reviews_spoiler_raw.json.gz'))
np.random.choice([s for s in spoilers if 'view spoiler' in s['review_text']])

{'user_id': '8842281e1d1347389f2ab93d60773d4d',
 'book_id': '28684704',
 'review_id': '2ede853b14dc4583f96cf5d120af636f',
 'rating': 3,
 'review_text': 'A fun, fast paced science fiction thriller. I read it in 2 nights and couldn\'t put it down. The book is about the quantum theory of many worlds which states that all decisions we make throughout our lives basically create branches, and that each possible path through the decision tree can be thought of as a parallel world. And in this book, someone invents a way to switch between these worlds. This was nicely alluded to/foreshadowed in this quote: \n "I think about all the choices we\'ve made that created this moment. Us sitting here together at this beautiful table. Then I think of all the possible events that could have stopped this moment from ever happening, and it all feels, I don\'t know..." "What?" "So fragile." Now he becomes thoughtful for a moment. He says finally, "It\'s terrifying when you consider that every thought we ha