# Goodreads Book Reviews Analysis - Numerical Data Exploration

## Project Overview
This project aims to analyze **Goodreads book reviews**, focusing on **1-star ratings** to understand patterns in harsh reviews. The analysis is divided into two parts:
1. **Numerical Data Analysis** (Current Stage) - Examining numerical factors such as star ratings, review counts, and genre distributions.
2. **Natural Language Processing (NLP) Analysis** (Next Stage) - Exploring book descriptions and text reviews to identify sentiment patterns.

## Adding dataset with text reviews

In [None]:
import pandas as pd
import json
import gzip

chunk_size= 10000
chunks= []

with gzip.open ("./Data/goodreads_reviews_dedup.json.gz", "rt", encoding="utf-8") as f:
    for i, line in enumerate(f): #read line by line
        chunks.append(json.loads(line)) #convert json to stionf dict

    #every chuck line, process data to write csv
        if (i + 1) % chunk_size == 0:
            df_chunk = pd.DataFrame(chunks)
            df_chunk.to_csv("goodreads_reviews", mode="a", index= False, header = (i < chunk_size))
            chunks = []
        
if chunks:
    df_chunk = pd.DataFrame(chunks)
    df_chunk.to_csv("goodreads_reviews", mode ="a", index=False, header=False) 


In [None]:
df_reviews = pd.read_csv("goodreads_reviews")

In [None]:
df_reviews.head()

In [None]:
df_reviews.info()

In [None]:
df_reviews['book_id'].duplicated().any()

In [None]:
import pandas as pd
import json
import gzip

chunk_size= 10000
chunks= []

with gzip.open ("./Data/goodreads_books.json.gz", "rt", encoding="utf-8") as f:
    for i, line in enumerate(f): #read line by line
        chunks.append(json.loads(line)) #convert json to stionf dict
         
    #every chuck line, process data to write csv
        if (i + 1) % chunk_size == 0:
            df_chunk = pd.DataFrame(chunks)
            df_chunk.to_csv("goodreads_books", mode="a", index= False, header = (i < chunk_size))
            chunks = []
        
if chunks:
    df_chunk = pd.DataFrame(chunks)
    df_chunk.to_csv("goodreads_books", mode ="a", index=False, header=False) 

In [None]:
df_books = pd.read_csv("goodreads_books")

In [None]:
df_books.head(10)

In [None]:
df_books.info()

In [None]:
print(df_books.columns)

In [None]:
df_merged = df_reviews.merge(df_books, on="book_id", how="inner")

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
df_merged.head(10)

In [None]:
print(df_merged.columns)

In [None]:
df_merged=df_merged.drop(columns=['user_id','date_added','read_at','started_at','date_updated','read_at','kindle_asin','work_id','n_comments','asin','similar_books','series','similar_books','publication_month','publication_day','edition_information','is_ebook'])


In [None]:
df_merged.info()

In [None]:
df_merged=df_merged.drop(columns=['format', 'num_pages', 'isbn13', 'link', 'title_without_series'])

In [None]:
df_merged['review_id'].duplicated().any()

In [None]:
(df_merged['text_reviews_count']== 0).any()

In [None]:
df_merged[df_merged['text_reviews_count'] == 0]
#?? maybe outdated text review count

In [None]:
df_merged[df_merged['rating'] == 0]
#reviews that have text but no star rating was left? I am choosing to leave these out of analysis

In [None]:
df_merged= df_merged[df_merged['rating'].notna() & (df_merged['rating'] !=0)]

In [None]:
#for this analysis I will only be focusing on english reviews
#removing nonenglish rows and rows with no text in review_text or description. I dont think this will hurt bc the df is so large
df_merged= df_merged.dropna(subset=['review_text','description'])

In [None]:
df_merged.head()

In [None]:
#cleaning popular shelves column
print(df_merged['popular_shelves'].iloc[0])

In [None]:
#seeing which shelves have the highest counts
import ast
from collections import Counter

#function that extracts shelf names from string lists of the shelf dictionaires
def shelf_names(shelves_str):
    shelves_list = ast.literal_eval(shelves_str) #convert the string to a list of dicts
    if isinstance(shelves_list, list):
        return [shelf['name'] for shelf in shelves_list if 'name' in shelf] #extract 'name' value from each dict if it exists
    return []

shelf_counter = Counter()

In [None]:
#very large operation (takes about 100 minutes to run)
for row in df_merged['popular_shelves'].dropna():
    shelf_counter.update(shelf_names(row))

print(shelf_counter.most_common(30))

In [None]:
import random

unique_shelves = list(shelf_counter.keys())
print(f"unique names: {len(unique_shelves)}")

In [None]:
print(shelf_counter.most_common(1000))

In [None]:
def normalize_shelf(name):
    return name.strip().lower().replace(" ", "-")

In [None]:
#Filtering shelf names

In [None]:
#cleaning the author column
print(df_merged['authors'].iloc[0])

In [None]:
#there is already a language code column but it's not through. Try lang detect to fill in missing
from langdetect import detect
df_merged['dec']

In [None]:
#checking for final cleaning steps to slim down dataset futher before splitting  then saving to a csv

In [None]:
#split df into managable chunks for further analysis

In [None]:
for star in range(0,6):
    df_star = df_merged[df_merged['rating'] == star]
    df_star.to_csv(f"{star}star_reviews.csv")

In [None]:
import zipfile
import os

csv_files = ["./Data/1star_reviews.csv"]

zip_path = "./Data/1star_reviews.zip"

with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for file in csv_files:
        arcname = os.path.basename(file)
        zipf.write(file,arcname=arcname)

zip_path

In [None]:
#assigning them to variables then checking size

df_5star = pd.read_csv("./Data/5star_reviews.csv")
df_5star.info()

In [None]:
df_4star = pd.read_csv("./Data/4star_reviews.csv")
df_4star.info()

In [None]:
df_3star = pd.read_csv("./Data/3star_reviews.csv")
df_3star.info()

In [None]:
df_2star = pd.read_csv("./Data/2star_reviews.csv")
df_2star.info()

In [1]:
import pandas as pd
import json
import gzip
import ast
from collections import Counter

In [None]:
pip install "numpy<2"

In [3]:
df_1star = pd.read_csv("./1star_reviews.csv")
df_1star.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 419874 entries, 0 to 419873
Data columns (total 20 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Unnamed: 0          419874 non-null  int64  
 1   book_id             419874 non-null  int64  
 2   review_id           419874 non-null  object 
 3   rating              419874 non-null  int64  
 4   review_text         419874 non-null  object 
 5   n_votes             419874 non-null  int64  
 6   isbn                328665 non-null  object 
 7   text_reviews_count  419874 non-null  float64
 8   country_code        419874 non-null  object 
 9   language_code       340979 non-null  object 
 10  popular_shelves     419874 non-null  object 
 11  average_rating      419874 non-null  float64
 12  description         419874 non-null  object 
 13  authors             419874 non-null  object 
 14  publisher           347484 non-null  object 
 15  publication_year    358879 non-nul

In [5]:
# taking a sample of the smallest rating dataset to test for cleaning
sample_1star= df_1star.sample(10000, random_state=42)

In [7]:
#cleaning popular shelves column
print(sample_1star['popular_shelves'].iloc[0])

[{'count': '587', 'name': 'to-read'}, {'count': '76', 'name': 'romance'}, {'count': '75', 'name': 'diana-palmer'}, {'count': '26', 'name': 'contemporary-romance'}, {'count': '20', 'name': 'currently-reading'}, {'count': '17', 'name': 'harlequin'}, {'count': '14', 'name': 'western'}, {'count': '13', 'name': 'contemporary'}, {'count': '10', 'name': 'long-tall-texans'}, {'count': '10', 'name': 'books-i-own'}, {'count': '9', 'name': 'fiction'}, {'count': '9', 'name': 'cowboy'}, {'count': '8', 'name': 'palmer-diana'}, {'count': '8', 'name': 'series'}, {'count': '7', 'name': 'palmer'}, {'count': '7', 'name': 'harlequin-romance'}, {'count': '6', 'name': 'western-romance'}, {'count': '5', 'name': 'owned'}, {'count': '5', 'name': 'default'}, {'count': '5', 'name': 'my-library'}, {'count': '4', 'name': 'kindle'}, {'count': '4', 'name': 'read-in-2010'}, {'count': '3', 'name': 'audio-books'}, {'count': '3', 'name': '2010-11'}, {'count': '3', 'name': 'harlequin-wishlist'}, {'count': '3', 'name': 'b

In [9]:
#seeing which shelves have the highest counts
#function that extracts shelf names from string lists of the shelf dictionaires
def shelf_names(shelves_str):
    shelves_list = ast.literal_eval(shelves_str) #convert the string to a list of dicts
    if isinstance(shelves_list, list):
        return [shelf['name'] for shelf in shelves_list if 'name' in shelf] #extract 'name' value from each dict if it exists
    return []

shelf_counter = Counter()

In [11]:
#very large operation (takes about 100 minutes to run)
for row in sample_1star['popular_shelves'].dropna():
    shelf_counter.update(shelf_names(row))

print(shelf_counter.most_common(60))

[('to-read', 9929), ('currently-reading', 9297), ('owned', 8457), ('fiction', 8311), ('favorites', 8263), ('books-i-own', 7961), ('kindle', 7382), ('ebook', 7213), ('library', 7085), ('owned-books', 6950), ('to-buy', 6593), ('ebooks', 6318), ('wish-list', 5941), ('default', 5726), ('contemporary', 5486), ('my-books', 5381), ('audiobook', 5368), ('adult', 5238), ('romance', 5224), ('audiobooks', 5077), ('i-own', 4903), ('my-library', 4853), ('did-not-finish', 4748), ('dnf', 4737), ('audio', 4647), ('abandoned', 4567), ('favourites', 4412), ('e-book', 4404), ('series', 4208), ('novels', 4206), ('read-in-2015', 4022), ('own-it', 3976), ('books', 3873), ('book-club', 3829), ('fantasy', 3827), ('e-books', 3779), ('read-in-2016', 3764), ('read-in-2014', 3740), ('adult-fiction', 3735), ('maybe', 3717), ('young-adult', 3559), ('read-in-2013', 3381), ('read-in-2017', 3149), ('mystery', 3113), ('have', 3004), ('novel', 2992), ('reviewed', 2966), ('borrowed', 2906), ('ya', 2884), ('audible', 2854

In [13]:
import random

unique_shelves = list(shelf_counter.keys())
print(f"unique names: {len(unique_shelves)}")

unique names: 92620


In [45]:
import ast

blacklist = {
    'reading_status': [
        'read', 'currently reading', 'dnf', 'unread', 'tbr', 'reread', 'finished', 'finish'
    ],
    'ownership': [
        'owned', 'own', 'buy', 'bought', 'borrow', 'library', 'kindle', 'ebook', 'epub', 
        'paperback', 'nook', 'hardcover', 'download', 'ibooks', 'kobo', 'scribd'
    ],
    'rating_review': [
        'star', 'favorite', 'favourite', 'review', 'recommend', 'amazing', 'must', 
        'best', 'loved', 'meh'
    ],
    'promotion_format': [
        'audiobook', 'audio', 'netgalley', 'gift', 'challenge', 'award', 'edition', 
        'collection', 'release', 'published', 'sequel', 'shelve', 'scan', 'pdf', 'giveaway'
    ],
    'proper_nouns': [
        'neal', 'stephenson', 'amy', 'kate', 'robert', 'emily', 'veronica', 'june', 'sophia', 'palmer', 'sarah'
    ],
    'misc': [
        'storage', 'location', 'page', 'purchase', 'bore', 'new', 'hold', 'mine', 
        'drop', 'theme', 'funny', 'didnt', 'purchased', 'print', 'amazon', 'first',
    ]
}


# Flatten into a set of lowercase blacklist words
blacklist_set = set()
for group in blacklist.values():
    blacklist_set.update(word.lower() for word in group)

In [17]:
import ast

def clean_name(name):
    return name.lower().replace('-', ' ').replace('_', ' ').strip()

def extract_shelves(shelves_str):
    try:
        shelves_list = ast.literal_eval(shelves_str)
    except:
        return []

    if isinstance(shelves_list, list):
        return [(clean_name(shelf['name']), int(shelf.get('count', 0)))
                for shelf in shelves_list if 'name' in shelf]
    return []

In [39]:
def remove_blacklisted_shelves(cleaned_shelves, blacklist_words):
    """
    Improved: Only removes shelves if any **individual word** matches blacklist.
    """
    result = []
    for name, count in cleaned_shelves:
        shelf_words = name.lower().replace('-', ' ').replace('_', ' ').split()
        if not any(word in blacklist_words for word in shelf_words):
            result.append((name, count))
    return result

In [55]:
genre_mapping = {
    'cowboys': 'cowboy',
    'chick lit': 'chick lit',
    'adult fiction': 'adult fiction',
    'cowboy western': 'cowboy western',
    'genre western': 'western',
    'romantic suspense': 'romantic suspense',
    'action': 'action',
    'series romance': 'romance',
    'genre romance': 'romance',
    'romance modern': 'modern romance',
    'science fiction': 'science fiction',
    'sci fi': 'science fiction',
    'scifi': 'science fiction',
    'post apocalyptic': 'post apocalyptic',
    'sf': 'science fiction',
    'sci fi fantasy': 'science fiction fantasy',
    'dystopia': 'dystopian',
    'apocalyptic': 'apocalyptic',
    'science': 'science',
    'speculative fiction': 'speculative fiction',
    'fantasy sci fi': 'science fiction fantasy',
    'apocalypse': 'apocalyptic',
    'space opera': 'space opera',
    'science fiction fantasy': 'science fiction fantasy',
    'hard sci fi': 'hard science fiction',
    'sff': 'science fiction fantasy',
    'post apocalypse': 'post apocalyptic',
    'sf fantasy': 'science fiction fantasy',
    'sci fi and fantasy': 'science fiction fantasy',
    'hard scifi': 'hard science fiction',
    'sciencefiction': 'science fiction',
    'regency romance': 'regency romance',
    'romance historical': 'historical romance',
    'mf': 'm f',
    'historical romances': 'historical romance',
    'historicals': 'historical',
    'humorous': 'humor',
    'humour': 'humor',
    'humour comedy': 'humor',
    'young adult': 'young adult',
    'ya': 'young adult',
    'fairies': 'fairies',
    'faeries': 'fairies',
    'faerie': 'fairies',
    'fey': 'fae',
    'ya fantasy': 'young adult fantasy',
    'paranormal romance': 'paranormal romance',
    'historical fantasy': 'historical fantasy',
    'historical fic': 'historical fiction',
    'supernatural': 'supernatural',
    'faries': 'fairies',
    'classic lit': 'classic literature',
    'british lit': 'british literature',
    'brit lit': 'british literature',
    'english lit': 'english literature',
    'lit': 'literature',
    'feminist': 'feminism',
    'ya books': 'young adult books',
    'ya fiction': 'young adult fiction',
    'ya': 'young adult',
    'non fiction': 'nonfiction',
    'non fic': 'nonfiction',
    'memoirs': 'memoir',
    'distopian': 'dystopian',
    'ya dystopian': 'young adult dystopian',
    'ya lit': 'young adult literature'
}

In [57]:
from collections import defaultdict

def map_genres(tag_list, genre_mapping):
    tag_counts = defaultdict(int)

    for tag, count in tag_list:
        tag_clean = tag.lower()
        mapped_tag = genre_mapping.get(tag_clean, tag_clean)
        tag_counts[mapped_tag] += count

    # Convert back to list of tuples sorted by count (optional)
    aggregated = sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)
    return aggregated

In [None]:
def apply_cleaning_pipeline(shelves_str, genre_mapping, blacklist_words):
    # Step 1: Parse shelves from string
    shelves = extract_shelves(shelves_str)

    # Step 2: Map to cleaned genres
    cleaned = map_genres(filtered, genre_mapping)
    
    # Step 3: Remove blacklisted terms
    filtered = remove_blacklisted_shelves(shelves, blacklist_words)


    return cleaned

In [43]:
sample_1star[['cleaned_shelves']].head(20)

Unnamed: 0,cleaned_shelves
41230,"[(romance, 76), (contemporary romance, 26), (harlequin, 17), (western, 14), (contemporary, 13), (long tall texans, 10), (fiction, 9), (cowboy, 9), (series, 8), (harlequin romance, 7), (western romance, 6), (default, 5), (2010 11, 3), (harlequin wishlist, 3), (books i have, 3), (adult, 3), (cowboys, 3), (virgin heroine, 3), (april, 3), (harlequim, 2), (did not finish, 2), (series in progress, 2), (chick lit, 2), (books, 2), (on my shelf, 2), (adult fiction, 2), (cowboy western, 2), (terjemahan, 2), (genre western, 2), (undecided, 2), (paper back, 2), (not interested, 2), (gramedia, 2), (romantic suspense, 2), (action, 2), (part of a series, 2), (series romance, 2), (harlequinromance, 2), (meaghan, 1), (mills and boon, 1), (0 contemporary, 1), (genre romance, 1), (meh, 1), (harlequins, 1), (angsty, 1), (on the shelf, 1), (shelf 4 front, 1), (blom check, 1), (primary, 1), (romance modern, 1), (long tall texan, 1), (hero grovels, 1), (brooding hero, 1), (long tall texans series bk 34, 1), (harlequin modern romance, 1), (e books, 1)]"
95710,"[(science fiction, 1975), (sci fi, 1674), (fiction, 1014), (scifi, 453), (post apocalyptic, 211), (sf, 179), (sci fi fantasy, 138), (abandoned, 128), (dystopian, 114), (audible, 111), (space, 105), (fantasy, 100), (dystopia, 95), (apocalyptic, 93), (science, 73), (speculative fiction, 70), (scifi fantasy, 68), (novels, 63), (did not finish, 63), (fantasy sci fi, 58), (adult, 57), (apocalypse, 57), (space opera, 51), (wish list, 48), (book club, 46), (bill gates, 44), (didn t finish, 43), (science fiction fantasy, 42), (hard sci fi, 38), (maybe, 36), (sff, 34), (adventure, 32), (gave up on, 31), (novel, 31), (e books, 29), (post apocalypse, 29), (sf fantasy, 28), (e book, 28), (survival, 28), (default, 27), (sci fi and fantasy, 26), (hard scifi, 23), (couldn t finish, 23), (epic, 23), (hard science fiction, 22), (to get, 22), (literature, 21), (adult fiction, 21), (standalone, 20), (fantasy scifi, 20), (english, 20), (sciencefiction, 20), (speculative, 20), (thriller, 19), (gave up, 18), (signed, 18), (future, 18), (21st century, 17), (near future, 17)]"
142885,"[(romance, 150), (historical romance, 80), (historical, 78), (historical fiction, 53), (regency, 53), (humor, 30), (fiction, 19), (regency romance, 14), (e book, 13), (comedy, 11), (romance historical, 10), (free, 9), (adult, 8), (freebie, 8), (freebies, 7), (chick lit, 6), (e books, 6), (amazon, 5), (england, 4), (wish list, 4), (cross dressing, 4), (guilty pleasures, 4), (d l carter, 3), (humorous, 3), (free books, 3), (maybe, 3), (gender bender, 3), (regency era, 3), (fantasy, 3), (amazon freebies, 3), (humour, 3), (historicals, 3), (friendship, 2), (default, 2), (laugh out loud, 2), (shelfari humor, 2), (humour comedy, 2), (humorous romance, 2), (period, 2), (m f, 2), (friends to lovers, 2), (bookbub, 2), (fun, 2), (misc, 2), (mf, 2), (fiction humor, 2), (historical romances, 2), (drama, 2), (family, 2), (love story, 2)]"
238919,"[(young adult, 640), (dystopian, 421), (dystopia, 345), (ya, 335), (science fiction, 222), (post apocalyptic, 186), (series, 182), (sci fi, 157), (survival, 120), (fiction, 109), (apocalyptic, 101), (teen, 71), (adventure, 61), (wish list, 52), (apocalypse, 51), (fantasy, 48), (arc, 42), (maybe, 39), (ya fiction, 39), (monument 14, 36), (signed, 33), (end of the world, 32), (first in series, 29), (ya dystopian, 27), (realistic fiction, 27), (my books, 25), (scifi, 24), (romance, 23), (ya books, 23), (young adult fiction, 21), (dystopian post apocalyptic, 21), (teen fiction, 20), (suspense, 20), (did not finish, 19), (post apocalypse, 19), (male pov, 19), (thriller, 18), (horror, 17), (action adventure, 17), (high school, 15), (sci fi fantasy, 15), (disaster, 15), (speculative fiction, 14), (future, 14), (ya lit, 14), (want, 14), (contemporary, 13), (have, 13), (action, 13), (meh, 12), (ya dystopia, 12), (distopian, 12), (book club, 12), (friendship, 12), (first in a series, 12), (colorado, 12), (e books, 12), (english, 11), (arcs, 11), (2013 books, 10)]"
172222,"[(fantasy, 84), (young adult, 65), (historical fiction, 29), (ya, 26), (fairies, 25), (series, 24), (faeries, 21), (romance, 20), (fae, 18), (paranormal, 17), (magic, 13), (historical, 13), (urban fantasy, 8), (giveaways, 7), (the faerie ring, 7), (wish list, 7), (adventure, 6), (faerie, 6), (fey, 6), (ya fantasy, 6), (paranormal romance, 6), (e books, 5), (arc, 5), (faerie ring, 5), (have, 4), (fairy, 4), (fiction, 4), (historical fantasy, 4), (october 2012, 4), (kiki hamilton, 4), (historical romance, 3), (freebies, 3), (royalty, 3), (historical fic, 3), (young adult fiction, 3), (e book, 3), (need to get, 3), (giveaway, 3), (want, 3), (giveaway entry, 3), (fey books, 3), (the faerie ring series, 3), (book series, 3), (ya paranormal, 3), (2012 src, 3), (meh, 2), (love triangle, 2), (regency, 2), (h, 2), (books, 2), (august 2012, 2), (need to finish series, 2), (free, 2), (next in series, 2), (supernatural, 2), (faries, 2), (need, 2), (the fey, 2), (series to finish, 2)]"
303205,"[(fiction, 972), (young adult, 380), (chick lit, 235), (ya, 216), (contemporary, 145), (coming of age, 129), (boarding school, 102), (contemporary fiction, 76), (adult fiction, 73), (adult, 68), (book club, 60), (realistic fiction, 59), (2006, 55), (high school, 47), (novels, 47), (general fiction, 43), (literary fiction, 35), (chicklit, 33), (bookclub, 30), (novel, 27), (school, 26), (abandoned, 25), (my books, 24), (2005, 24), (ya fiction, 22), (american, 22), (adolescence, 21), (teen, 21), (did not finish, 19), (romance, 19), (family, 17), (friendship, 17), (wish list, 17), (curtis sittenfeld, 15), (ya contemporary, 14), (default, 14), (ya books, 14), (chic lit, 14), (didn t finish, 14), (2000s, 13), (literature, 13), (literary, 13), (book club books, 13), (usa, 13), (gave up on, 12), (ya lit, 12), (modern fiction, 12), (prep school, 12), (relationships, 11), (bildungsroman, 11), (drama, 11), (fluff, 11), (lit, 11), (female author, 10), (realistic, 10), (21st century, 10), (couldn t finish, 10), (could not finish, 10), (female authors, 10), (coming, 9)]"
191077,"[(dystopian and post apocalyptic, 2), (books, 1), (sci fi, 1), (fiction, 1), (future, 1), (dystopian, 1), (arc physical, 1), (catalog, 1), (science fiction, 1), (r2r, 1), (wish list, 1), (young adult, 1)]"
226358,"[(science fiction, 51), (fiction, 20), (default, 16), (sci fi, 14), (military, 14), (military fiction, 11), (military science fiction, 7), (sf, 6), (military sci fi, 6), (alternate history, 6), (ringo, 5), (scifi, 5), (need, 3), (thriller, 3), (calibre, 3), (fantasy, 3), (john ringo, 3), (sci fi fantasy, 3), (science fiction and fantasy, 3), (baen, 3), (war, 3), (gave up, 2), (audible, 2), (not interested, 2), (teotwawki, 2), (abandoned, 2), (novels, 2), (apocalypse, 2), (warriors, 2), (shame on you, 2), (2006 2010, 2), (did not finish, 2), (apocalyptic, 2), (action adventure, 2), (military sf, 2), (easton press, 2), (listened to, 2), (post apocalypse, 2), (quit despite a valiant try, 1), (physical, 1), (appeal intellect, 1), (to sort, 1), (alan 2, 1), (inventory digital, 1), (inventory, 1), (digital drm free, 1), (conservative, 1), (somewhere out there, 1), (science, 1), (bookbub, 1), (physical books, 1), (chads books, 1), (martial, 1), (0 janesville, 1), (hardback, 1), (other, 1), (pa, 1), (ku, 1), (sfi, 1), (general fiction, 1), (book box 2, 1), (fiction drama, 1), (2000s, 1), (featured, 1), (scif fi, 1), (for sale, 1), (autobooks, 1), (aanwezig, 1), (na w, 1), (lemmed, 1), (survival, 1), (autographed, 1), (andy, 1)]"
119477,"[(mafia, 130), (romance, 57), (series, 40), (dark, 31), (cliffhanger, 29), (abuse, 29), (high school, 27), (contemporary, 26), (alpha male, 18), (mob, 17), (contemporary romance, 17), (young adult, 17), (mafia romance, 16), (love triangle, 15), (ya, 13), (dark romance, 10), (april 2016, 9), (made men, 9), (mafia mob, 9), (mob mafia, 9), (bad boys, 9), (sarah brianne, 8), (bad boy, 7), (part of a series, 7), (crime, 7), (bully, 6), (angst, 6), (abused, 6), (boring, 5), (meh, 5), (mystery, 5), (na, 5), (suspense, 5), (drama, 5), (bullying, 5), (mob romance, 5), (coming soon, 5), (tear jerker, 4), (anti hero, 4), (abused heroine, 4), (college, 4), (cliffy, 4), (not interested, 4), (part of series, 4), (realistic fiction, 4), (highschool, 3), (alpha males, 3), (love, 3), (multiple pov, 3), (disappointing, 3), (intense, 3), (betrayal, 3), (na ya, 3), (maybe, 3), (scarred, 3), (2016 books, 3), (doormat heroine, 3), (mafioso, 3), (series family n friends, 2), (english, 2), (série, 2), (lost interest, 2), (loss, 2), (family, 2), (meeeh, 2), (rom, 2)]"
175002,"[(fiction, 39), (feminism, 6), (arc, 6), (womens fiction, 5), (did not finish, 4), (book club, 4), (abandoned, 4), (chick lit, 4), (literature, 3), (novels, 3), (romance, 3), (bookshelf, 2), (aging, 2), (women, 2), (feminist, 2), (contemporary, 2), (humor, 2), (contemporary fiction, 2), (erotica, 2), (usa and canada, 1), (online, 1), (adult fiction, 1), (wishlist 2017, 1), (drama, 1), (psychology, 1), (set aside, 1), (sports zip, 1), (continuing series, 1), (abandonados, 1), (female main character, 1), (asian, 1), (mom, 1), (couldn t finish, 1), (personal, 1), (megafavorits, 1), (feminis me, 1), (duelo, 1), (jenners pick, 1), (bad girls book club, 1), (research, 1), (elle magazine books, 1), (october, 1), (acls, 1), (woman, 1), (bib ce, 1), (not digitally available, 1), (romaani ulkomainen, 1), (litfic, 1), (calibre, 1), (mad money books, 1), (giveaways, 1), (couldn t get through it, 1), (north america, 1), (book club books, 1), (book lovers never go to bed alone, 1), (arc received free, 1), (adult, 1), (matriarchy, 1)]"


In [None]:
#attempting to clean the html first
