# Display Sample Records

In [1]:
import gzip
import json
import re
import os
import sys
import numpy as np
import pandas as pd

**Specify your directory here:**

In [2]:
DIR = './'

**This function shows how to load datasets**

In [3]:
def load_data(file_name, head = 100):
    count = 0
    data = []
    with gzip.open(file_name) as fin:
        for l in fin:
            d = json.loads(l)
            count += 1
            data.append(d)
            
            # break if reaches the 100th line
            if (head is not None) and (count > head):
                break
    return data

**Load and display sample records of books/authors/works/series**

In [4]:
books = load_data(os.path.join(DIR, 'goodreads_books.json.gz'))
authors = load_data(os.path.join(DIR, 'goodreads_book_authors.json.gz'))
works = load_data(os.path.join(DIR, 'goodreads_book_works.json.gz'))
series = load_data(os.path.join(DIR, 'goodreads_book_series.json.gz'))

In [5]:
print(' == sample record (books) ==')
display(np.random.choice(books))
print(' == sample record (authors) ==')
display(np.random.choice(authors))
print(' == sample record (works) ==')
display(np.random.choice(works))
print(' == sample record (series) ==')
display(np.random.choice(series))

 == sample record (books) ==


{'isbn': '1934876569',
 'text_reviews_count': '6',
 'series': ['151854'],
 'country_code': 'US',
 'language_code': '',
 'popular_shelves': [{'count': '515', 'name': 'to-read'},
  {'count': '25', 'name': 'fantasy'},
  {'count': '11', 'name': 'owned'},
  {'count': '11', 'name': 'books-i-own'},
  {'count': '9', 'name': 'currently-reading'},
  {'count': '9', 'name': 'favorites'},
  {'count': '9', 'name': 'magic'},
  {'count': '9', 'name': 'avalon'},
  {'count': '8', 'name': 'young-adult'},
  {'count': '6', 'name': 'series'},
  {'count': '6', 'name': 'fiction'},
  {'count': '5', 'name': 'books'},
  {'count': '4', 'name': 'childrens-books'},
  {'count': '4', 'name': 'owned-books'},
  {'count': '4', 'name': 'adventure'},
  {'count': '4', 'name': 'middle-grade'},
  {'count': '4', 'name': 'children'},
  {'count': '4', 'name': 'avalon-web-of-magic'},
  {'count': '3', 'name': 'ya'},
  {'count': '3', 'name': 'want'},
  {'count': '3', 'name': 'teen'},
  {'count': '3', 'name': 'faeries'},
  {'count'

 == sample record (authors) ==


{'average_rating': '3.99',
 'author_id': '5807700',
 'text_reviews_count': '986',
 'name': 'V.L. Locey',
 'ratings_count': '3130'}

 == sample record (works) ==


{'books_count': '1',
 'reviews_count': '2147',
 'original_publication_month': '6',
 'default_description_language_code': '',
 'text_reviews_count': '105',
 'best_book_id': '22600550',
 'original_publication_year': '2014',
 'original_title': '',
 'rating_dist': '5:172|4:289|3:177|2:37|1:11|total:686',
 'default_chaptering_book_id': '',
 'original_publication_day': '27',
 'original_language_id': '',
 'ratings_count': '686',
 'media_type': 'book',
 'ratings_sum': '2632',
 'work_id': '41697569'}

 == sample record (series) ==


{'numbered': 'true',
 'note': '',
 'description': '',
 'title': 'William the Conqueror',
 'series_works_count': '2',
 'series_id': '152366',
 'primary_work_count': '2'}

**Load and display sample records of user-book interactions (shelves)**

In [6]:
interactions = load_data(os.path.join(DIR, 'goodreads_interactions_dedup.json.gz'))
np.random.choice(interactions)

{'user_id': '8842281e1d1347389f2ab93d60773d4d',
 'book_id': '45252',
 'review_id': '7c65da9bddf845f7811c5f88d6375449',
 'is_read': False,
 'rating': 0,
 'review_text_incomplete': '',
 'date_added': 'Tue Apr 11 15:50:33 -0700 2017',
 'date_updated': 'Tue Apr 11 15:50:34 -0700 2017',
 'read_at': '',
 'started_at': ''}

**Load and display sample records of book reviews**

In [7]:
reviews = load_data(os.path.join(DIR, 'goodreads_reviews_parsed.json.gz'))
np.random.choice(reviews)

{'user_id': '8842281e1d1347389f2ab93d60773d4d',
 'book_id': '13453029',
 'review_id': '46a6e1a14e8afc82d221fec0a2bd3dd0',
 'rating': 4,
 'review_text': "A fun fast paced book that sucks you in right away and doesn't let go. The remnants of humanity live in a 150 story silo below the earth and can't leave as the outside is now toxic. This is a bit of an odd thing to have happened, but it gives us a nice palette to think about things. \n Everything is rationed, including how many children you can have. People self-identify by their profession and where in the silo they live: farmers and mechanics in the lowers, IT in the mids, and professional class in the uppers. The working class wear color coded uniforms to easily distinguish them. \n The book was a lot about control. How to control a contained civilization, and give them hope (eg a live feed of the outside) and yet keep them in harmony and doing their functions. Strange mechanisms of control were built all over the silo: communicatio