In [1]:
import gzip
import json
import re
import os
import sys
import numpy as np
import pandas as pd

In [2]:
def load_data(file_name, head = 100000):
    count = 0
    data = []
    with gzip.open(file_name) as fin:
        for l in fin:
            d = json.loads(l)
            count += 1
            data.append(d)

            #break if reaches the 100th line
            # if (head is not None) and (count > head):
            #     break
    return data

In [10]:
def load_data_and_process(file_name, head=2300000, chunk_size=50000, limit=1000000, process_func=None):
    """
    Load data from a gzip-compressed JSON file in chunks and apply a function to each chunk.

    Parameters:
    - file_name: str, path to the gzip-compressed JSON file
    - head: int, number of lines to read (default is None, which reads all lines)
    - chunk_size: int, number of lines per chunk (default is 10000)
    - process_func: callable, function to apply to each chunk (default is None)

    Returns:
    - results: list, list of results from applying the process_func to each chunk
    """
    chunk_id = 0
    count = 0
    data = []
    results = []

    with gzip.open(file_name, 'rt') as fin:
        for l in fin:
            d = json.loads(l)
            count += 1
            data.append(d)

            # If the chunk size is reached, process the chunk
            if count % chunk_size == 0:
                if process_func is not None:
                    result = process_func(data)
                    results.append(result)
                data = []
                print(chunk_id)
                chunk_id += 1

            # Break if the head limit is reached
            if (head is not None) and (count >= head):
                break

            data_sum  = 0
            for res in results:
                data_sum += len(res)

            if data_sum > limit:
                break

        # Process the remaining data if any
        if data and process_func is not None:
            result = process_func(data)
            results.append(result)

    return results

In [4]:
def find_books_lang_id(books):
    result = []
    for book in books:
        if book['language_code'] == 'ind':
            result.append(book)
    return result

In [5]:
def find_books_lang_jp(books):
    result = []
    for book in books:
        if book['language_code'] == 'jpn':
            result.append(book)
    return result

In [6]:
def find_books_lang_en(books):
    result = []
    for book in books:
        if book['language_code'] == 'eng':
            result.append(book)
    return result

In [7]:
def find_books_lang_kr(books):
    result = []
    for book in books:
        if book['language_code'] == 'kor':
            result.append(book)
    return result

In [8]:
def find_author_by_id(authors, search_id):
    for author in authors:
        if author['author_id'] == search_id:
            return author
    return None

In [12]:
data_books = load_data_and_process('./goodreads_books.json.gz', process_func=find_books_lang_id)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45


In [13]:
books = [item for sublist in data_books for item in sublist]
print(len(books))
books = pd.DataFrame(books)

26583


In [14]:
books_dropped = books.drop(columns=["text_reviews_count","series","country_code","popular_shelves","asin","is_ebook","kindle_asin","similar_books","format","edition_information","book_id","ratings_count","work_id"])
books_dropped['author_id'] = books_dropped['authors'].apply(lambda x: x[0]['author_id'] if len(x) > 0 else None)


In [15]:
books_dropped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26583 entries, 0 to 26582
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   isbn                  26583 non-null  object
 1   language_code         26583 non-null  object
 2   average_rating        26583 non-null  object
 3   description           26583 non-null  object
 4   link                  26583 non-null  object
 5   authors               26583 non-null  object
 6   publisher             26583 non-null  object
 7   num_pages             26583 non-null  object
 8   publication_day       26583 non-null  object
 9   isbn13                26583 non-null  object
 10  publication_month     26583 non-null  object
 11  publication_year      26583 non-null  object
 12  url                   26583 non-null  object
 13  image_url             26583 non-null  object
 14  title                 26583 non-null  object
 15  title_without_series  26583 non-null

In [16]:
authors = load_data('./goodreads_book_authors.json.gz')
authors = pd.DataFrame(authors)
books_dropped = books_dropped.merge(authors, on='author_id', how='left')

In [17]:
books_dropped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26583 entries, 0 to 26582
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   isbn                  26583 non-null  object
 1   language_code         26583 non-null  object
 2   average_rating_x      26583 non-null  object
 3   description           26583 non-null  object
 4   link                  26583 non-null  object
 5   authors               26583 non-null  object
 6   publisher             26583 non-null  object
 7   num_pages             26583 non-null  object
 8   publication_day       26583 non-null  object
 9   isbn13                26583 non-null  object
 10  publication_month     26583 non-null  object
 11  publication_year      26583 non-null  object
 12  url                   26583 non-null  object
 13  image_url             26583 non-null  object
 14  title                 26583 non-null  object
 15  title_without_series  26583 non-null

In [18]:
books_dropped = books_dropped.drop(columns=['average_rating_y', 'text_reviews_count', 'ratings_count'])
books_dropped = books_dropped.drop(columns=['authors','author_id'])
books_dropped.rename(columns={'name' : 'author_name', 'average_rating_x' : 'average_rating'}, inplace=True)

In [19]:
import requests
from bs4 import BeautifulSoup

In [20]:
def get_genre_from_url(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        content_tags = soup.find_all(class_='BookPageMetadataSection__genreButton')
        genre = [tag.get_text() for tag in content_tags]
        print(f" genre get from {url}")
        return {'url' : url,'genre': genre}
    except requests.RequestException as e:
        print(f"Request error for {url}: {e}")
        return {'url': url, 'genre': []}

In [21]:
books_dropped.to_csv('book_id_data.csv', index= False)

In [22]:
book_id_data = pd.read_csv('./book_id_data.csv')

In [23]:
book_id_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26583 entries, 0 to 26582
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   isbn                  8453 non-null   object 
 1   language_code         26583 non-null  object 
 2   average_rating        26582 non-null  float64
 3   description           20838 non-null  object 
 4   link                  26582 non-null  object 
 5   publisher             26006 non-null  object 
 6   num_pages             24502 non-null  float64
 7   publication_day       6975 non-null   float64
 8   isbn13                18660 non-null  object 
 9   publication_month     17604 non-null  float64
 10  publication_year      25762 non-null  float64
 11  url                   26582 non-null  object 
 12  image_url             26583 non-null  object 
 13  title                 26583 non-null  object 
 14  title_without_series  26583 non-null  object 
 15  author_name        

In [24]:
book_id_data.rename(columns={'isbn' : 'isbn10', 'title_without_series' : 'judul', 'publication_year' : 'tahun_terbit', 'num_pages' : 'jml_halaman', 'description' : 'deskripsi', 'author_name' : 'penulis', 'publisher' : 'penerbit', 'image_url' : 'image'}, inplace=True)
book_id_data['isbn'] = book_id_data['isbn13'].combine_first(book_id_data['isbn10'])
book_id_data_dropped = book_id_data.drop(columns=["isbn10","publication_day","isbn13","publication_month","link","title"])
book_id_data_dropped = book_id_data_dropped.dropna()

In [25]:
book_id_data_dropped.info()

<class 'pandas.core.frame.DataFrame'>
Index: 17682 entries, 2 to 26581
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   17682 non-null  object 
 1   average_rating  17682 non-null  float64
 2   deskripsi       17682 non-null  object 
 3   penerbit        17682 non-null  object 
 4   jml_halaman     17682 non-null  float64
 5   tahun_terbit    17682 non-null  float64
 6   url             17682 non-null  object 
 7   image           17682 non-null  object 
 8   judul           17682 non-null  object 
 9   penulis         17682 non-null  object 
 10  isbn            17682 non-null  object 
dtypes: float64(3), object(8)
memory usage: 1.6+ MB


In [2]:
import threading
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

In [27]:
max_threads = 20
results = []

In [28]:
with ThreadPoolExecutor(max_threads) as executor:
    future_to_url = {executor.submit(get_genre_from_url, url): url for url in book_id_data_dropped['url']}
    for future in as_completed(future_to_url):
        result = future.result()
        results.append(result)

 genre get from https://www.goodreads.com/book/show/1385724.Wayang_dan_Panggilan_Manusia
 genre get from https://www.goodreads.com/book/show/13224373-blood-beast
 genre get from https://www.goodreads.com/book/show/10225057-muslimah-nggak-gitu-deh
 genre get from https://www.goodreads.com/book/show/12361920-siapa-yang-menganyam-sarang-ini
 genre get from https://www.goodreads.com/book/show/10459243-ghosts-doctor-vol-01
 genre get from https://www.goodreads.com/book/show/2877339-his-secret-child---anak-rahasia
 genre get from https://www.goodreads.com/book/show/35210615-eren-s-play
 genre get from https://www.goodreads.com/book/show/8164648-a-man-to-call-my-own
 genre get from https://www.goodreads.com/book/show/15861985-masih-ada-kereta-yang-akan-lewat
 genre get from https://www.goodreads.com/book/show/24694893-beelzebub-vol-18
 genre get from https://www.goodreads.com/book/show/10399982-piyu
 genre get from https://www.goodreads.com/book/show/2863369-hello-darkness---selamat-datang-ke

In [29]:
genre_data = pd.DataFrame(results)
books_id_data = book_id_data_dropped.merge(genre_data, on='url', how='left')
books_id_data.info()
books_id_data.to_csv('Books_ID_Data_Fix.csv', index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17682 entries, 0 to 17681
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   17682 non-null  object 
 1   average_rating  17682 non-null  float64
 2   deskripsi       17682 non-null  object 
 3   penerbit        17682 non-null  object 
 4   jml_halaman     17682 non-null  float64
 5   tahun_terbit    17682 non-null  float64
 6   url             17682 non-null  object 
 7   image           17682 non-null  object 
 8   judul           17682 non-null  object 
 9   penulis         17682 non-null  object 
 10  isbn            17682 non-null  object 
 11  genre           17682 non-null  object 
dtypes: float64(3), object(9)
memory usage: 1.6+ MB


In [3]:
books_id_data = pd.read_csv('Books_ID_Data_Fix.csv')

In [4]:
books_id_data.loc[books_id_data['penulis'] == 'Tere Liye']

Unnamed: 0,language_code,average_rating,deskripsi,penerbit,jml_halaman,tahun_terbit,url,image,judul,penulis,isbn,genre
76,ind,4.07,"Sebenarnya, apakah itu perasaan? Keinginan? Ra...",Mahaka Publishing,429.0,2011.0,https://www.goodreads.com/book/show/5652131-su...,https://images.gr-assets.com/books/1319506027m...,Sunset Bersama Rosie,Tere Liye,9786028357029,"['Romance', 'Novels', 'Fiction', 'Drama', 'Fam..."
317,ind,4.31,"Kami bertiga teman baik. Remaja, murid kelas s...",Gramedia Pustaka Utama,392.0,2017.0,https://www.goodreads.com/book/show/35278713-b...,https://images.gr-assets.com/books/1496315625m...,Bintang,Tere Liye,9786020351179,"['Fantasy', 'Fiction', 'Novels', 'Adventure', ..."
1049,ind,4.21,Buku yang indah ditulis dalam kesadaran ibadah...,Republika,248.0,2005.0,https://www.goodreads.com/book/show/3281773-ha...,https://s.gr-assets.com/assets/nophoto/book/11...,Hafalan Shalat Delisa,Tere Liye,9789793210605,"['Novels', 'Fiction', 'Indonesian Literature',..."
1205,ind,4.27,"Di negeri para bedebah, kisah fiksi kalah seru...",Gramedia Pustaka Utama,440.0,2012.0,https://www.goodreads.com/book/show/15721334-n...,https://images.gr-assets.com/books/1340606900m...,Negeri Para Bedebah,Tere Liye,9789792285529,"['Fiction', 'Novels', 'Action', 'Indonesian Li..."
1672,ind,4.27,Bidadari-Bidadari Surga bercerita tentang peng...,Republika,365.0,2008.0,https://www.goodreads.com/book/show/3971237-bi...,https://images.gr-assets.com/books/1303476056m...,Bidadari Bidadari Surga,Tere Liye,9789791102261,"['Novels', 'Fiction', 'Indonesian Literature',..."
2204,ind,3.86,Kumpulan 24 sajak dengan ilustrasi terbaik dar...,Gramedia Pustaka Utama,104.0,2016.0,https://www.goodreads.com/book/show/33000215-d...,https://images.gr-assets.com/books/1479377681m...,"Dikatakan atau Tidak Dikatakan, Itu Tetap Cinta",Tere Liye,9786020332833,"['Poetry', 'Romance', 'Indonesian Literature',..."
2446,ind,4.22,Selamat datang di dunia yang tidak pernah kali...,Penerbit Republika,519.0,2011.0,https://www.goodreads.com/book/show/10116769-e...,https://images.gr-assets.com/books/1294137264m...,"Eliana (Serial Anak-Anak Mamak, Buku 4)",Tere Liye,9786028987042,"['Novels', 'Fiction', 'Indonesian Literature',..."
4179,ind,4.12,Ada tujuh miliar penduduk bumi saat ini. Jika ...,Gramedia Pustaka Utama,512.0,2016.0,https://www.goodreads.com/book/show/31341459-k...,https://images.gr-assets.com/books/1470212047m...,"Kau, Aku, dan Sepucuk Angpau Merah",Tere Liye,9786020331614,"['Romance', 'Novels', 'Fiction', 'Indonesian L..."
5277,ind,4.17,Kapan terakhir kali kita memeluk ayah kita? Me...,Gramedia Pustaka Utama,304.0,2011.0,https://www.goodreads.com/book/show/11016697-a...,https://images.gr-assets.com/books/1488428261m...,Ayahku (Bukan) Pembohong,Tere Liye,9789792269055,"['Novels', 'Fiction', 'Indonesian Literature',..."
6715,ind,4.15,"Namaku Raib, usiaku 15 tahun, kelas sepuluh. A...",Gramedia Pustaka Utama,440.0,2014.0,https://www.goodreads.com/book/show/18759843-bumi,https://images.gr-assets.com/books/1383884931m...,Bumi,Tere Liye,9786020301129,"['Fantasy', 'Fiction', 'Novels', 'Indonesian L..."


In [5]:
books_id_data_sorted = books_id_data.sort_values(by='tahun_terbit', ascending=False)
books_id_data_clean = books_id_data_sorted.drop_duplicates(subset=['judul'], keep='first')
books_id_data_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 17152 entries, 213 to 8218
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   17152 non-null  object 
 1   average_rating  17152 non-null  float64
 2   deskripsi       17152 non-null  object 
 3   penerbit        17152 non-null  object 
 4   jml_halaman     17152 non-null  float64
 5   tahun_terbit    17152 non-null  float64
 6   url             17152 non-null  object 
 7   image           17152 non-null  object 
 8   judul           17152 non-null  object 
 9   penulis         17152 non-null  object 
 10  isbn            17152 non-null  object 
 11  genre           17152 non-null  object 
dtypes: float64(3), object(9)
memory usage: 1.7+ MB


In [6]:
books_id_data_clean.loc[books_id_data_clean['penulis'] == 'Tere Liye']

Unnamed: 0,language_code,average_rating,deskripsi,penerbit,jml_halaman,tahun_terbit,url,image,judul,penulis,isbn,genre
6807,ind,3.82,"""Pecinta sejati tidak akan pernah menyerah seb...",Mahaka,300.0,2017.0,https://www.goodreads.com/book/show/35470282-h...,https://images.gr-assets.com/books/1497839403m...,Harga Sebuah Percaya,Tere Liye,9786029474121,"['Novels', 'Fantasy', 'Fiction', 'Romance', 'I..."
7046,ind,3.91,Persahabatan selalu spesial. Dan sahabat terba...,Gramedia Pustaka Utama,128.0,2017.0,https://www.goodreads.com/book/show/34776576-a...,https://images.gr-assets.com/books/1491266728m...,#AboutFriends,Tere Liye,9786020342696,"['Poetry', 'Nonfiction', 'Self Help', 'Indones..."
317,ind,4.31,"Kami bertiga teman baik. Remaja, murid kelas s...",Gramedia Pustaka Utama,392.0,2017.0,https://www.goodreads.com/book/show/35278713-b...,https://images.gr-assets.com/books/1496315625m...,Bintang,Tere Liye,9786020351179,"['Fantasy', 'Fiction', 'Novels', 'Adventure', ..."
4179,ind,4.12,Ada tujuh miliar penduduk bumi saat ini. Jika ...,Gramedia Pustaka Utama,512.0,2016.0,https://www.goodreads.com/book/show/31341459-k...,https://images.gr-assets.com/books/1470212047m...,"Kau, Aku, dan Sepucuk Angpau Merah",Tere Liye,9786020331614,"['Romance', 'Novels', 'Fiction', 'Indonesian L..."
11654,ind,3.97,Dia bagai malaikat bagi keluarga kami. Merengk...,Gramedia Pustaka Utama,264.0,2016.0,https://www.goodreads.com/book/show/30741382-d...,https://images.gr-assets.com/books/1466740304m...,Daun Yang Jatuh Tak Pernah Membenci Angin,Tere Liye,9786020331607,"['Romance', 'Novels', 'Fiction', 'Indonesian L..."
9690,ind,4.31,"Namanya Ali, 15 tahun, kelas X. Jika saja oran...",Gramedia Pustaka Utama,390.0,2016.0,https://www.goodreads.com/book/show/30827710-m...,https://images.gr-assets.com/books/1467429925m...,Matahari,Tere Liye,9786020332116,"['Fantasy', 'Fiction', 'Novels', 'Adventure', ..."
10624,ind,4.51,"Terima kasih untuk kesempatan mengenalmu, itu ...",Pt. Putra Bangsa,524.0,2016.0,https://www.goodreads.com/book/show/32467509-t...,https://images.gr-assets.com/books/1475659479m...,Tentang Kamu,Tere Liye,9786020822341,"['Novels', 'Fiction', 'Romance', 'Indonesian L..."
13356,ind,4.07,Sehari menjetang pertunangan Tegar dengan Seka...,Mahaka Publishing,426.0,2016.0,https://www.goodreads.com/book/show/33632332-s...,https://images.gr-assets.com/books/1483021882m...,Sunset & Rosie,Tere Liye,9786029474084,"['Romance', 'Novels', 'Fiction', 'Drama', 'Fam..."
13266,ind,4.17,Kapan terakhir kali kita memeluk ayah kita? Me...,Gramedia Pustaka Utama,304.0,2016.0,https://www.goodreads.com/book/show/31683007-a...,https://images.gr-assets.com/books/1472197488m...,Ayahku (Bukan) Pembohong,Tere Liye,9786020331584,"['Novels', 'Fiction', 'Indonesian Literature',..."
2204,ind,3.86,Kumpulan 24 sajak dengan ilustrasi terbaik dar...,Gramedia Pustaka Utama,104.0,2016.0,https://www.goodreads.com/book/show/33000215-d...,https://images.gr-assets.com/books/1479377681m...,"Dikatakan atau Tidak Dikatakan, Itu Tetap Cinta",Tere Liye,9786020332833,"['Poetry', 'Romance', 'Indonesian Literature',..."


In [7]:
books_id_data_clean['tahun_terbit'] = books_id_data_clean['tahun_terbit'].astype(int)
books_id_data_clean['jml_halaman'] = books_id_data_clean['jml_halaman'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books_id_data_clean['tahun_terbit'] = books_id_data_clean['tahun_terbit'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books_id_data_clean['jml_halaman'] = books_id_data_clean['jml_halaman'].astype(int)


In [8]:
books_id_data_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 17152 entries, 213 to 8218
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   17152 non-null  object 
 1   average_rating  17152 non-null  float64
 2   deskripsi       17152 non-null  object 
 3   penerbit        17152 non-null  object 
 4   jml_halaman     17152 non-null  int32  
 5   tahun_terbit    17152 non-null  int32  
 6   url             17152 non-null  object 
 7   image           17152 non-null  object 
 8   judul           17152 non-null  object 
 9   penulis         17152 non-null  object 
 10  isbn            17152 non-null  object 
 11  genre           17152 non-null  object 
dtypes: float64(1), int32(2), object(9)
memory usage: 1.6+ MB


In [36]:
list(books_id_data_clean['genre'])

["['Indonesian Literature', 'Anthologies']",
 "['Romance', 'Chick Lit']",
 "['Indonesian Literature']",
 '[]',
 "['Romance']",
 '[]',
 "['Fantasy', 'Comics', 'Manga', 'Indonesian Literature']",
 "['Manga', 'Romance', 'Young Adult', 'Graphic Novels', 'Humor', 'Slice Of Life', 'Comedy']",
 "['Mystery', 'Fiction', 'Crime', 'Mystery Thriller', 'Detective', 'Audiobook', 'Murder Mystery']",
 "['Fiction', 'Indonesian Literature', 'Short Stories']",
 "['Romance', 'Contemporary Romance', 'Indonesian Literature', 'Chick Lit', 'Novels', 'Adult', 'Family']",
 "['Islam']",
 '[]',
 "['Manga', 'Horror']",
 "['Classics', 'Fantasy', 'Romance', 'Fairy Tales', 'Fiction', 'Beauty and The Beast', 'France']",
 "['Romance', 'Contemporary Romance', 'Fiction', 'Westerns', 'Western Romance', 'Chick Lit']",
 "['Romance', 'Drama', 'Young Adult', 'Fiction', 'Family', 'Indonesian Literature', 'Contemporary']",
 "['Childrens', 'Classics', 'Picture Books', 'Fiction', 'Animals', 'Fantasy', 'Short Stories']",
 '[]',
 "

In [37]:
import ast
books_id_data_clean['genre'] = books_id_data_clean['genre'].apply(ast.literal_eval)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  books_id_data_clean['genre'] = books_id_data_clean['genre'].apply(ast.literal_eval)


In [38]:
empty_list_mask = books_id_data_clean['genre'].apply(lambda x: len(x) == 0)

In [39]:
book_id_proceed_genre = books_id_data_clean[~empty_list_mask]

In [40]:
book_id_proceed_genre.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11753 entries, 213 to 8218
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   11753 non-null  object 
 1   average_rating  11753 non-null  float64
 2   deskripsi       11753 non-null  object 
 3   penerbit        11753 non-null  object 
 4   jml_halaman     11753 non-null  int32  
 5   tahun_terbit    11753 non-null  int32  
 6   url             11753 non-null  object 
 7   image           11753 non-null  object 
 8   judul           11753 non-null  object 
 9   penulis         11753 non-null  object 
 10  isbn            11753 non-null  object 
 11  genre           11753 non-null  object 
dtypes: float64(1), int32(2), object(9)
memory usage: 1.1+ MB


In [41]:
from collections import Counter
import ast
def split_genres(genre_str):
    try:
        genres = ast.literal_eval(genre_str)
        if isinstance(genres, list):
            return genres
        else:
            return genre_str
    except:
        return genre_str

# Fungsi untuk menghitung genre unik dan total buku
def count_unique_genres(df, col_name='genre_list'):
    # Menggabungkan semua list genre menjadi satu list besar
    valid_genres = df[col_name].dropna()
    valid_genres.info()
    all_genres = [genre for sublist in valid_genres for genre in sublist]

    # Menghitung frekuensi setiap genre
    genre_counts = Counter(all_genres)

    return genre_counts

In [42]:
book_id_proceed_genre['genre_list'] = book_id_proceed_genre['genre'].apply(split_genres)

# Hitung genre unik dan total buku
genre_counts = count_unique_genres(book_id_proceed_genre)

# Konversi hasil ke DataFrame
genre_counts_df = pd.DataFrame(genre_counts.items(), columns=['genre', 'jumlah_buku'])
genre_counts_df

<class 'pandas.core.series.Series'>
Index: 11753 entries, 213 to 8218
Series name: genre_list
Non-Null Count  Dtype 
--------------  ----- 
11753 non-null  object
dtypes: object(1)
memory usage: 183.6+ KB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  book_id_proceed_genre['genre_list'] = book_id_proceed_genre['genre'].apply(split_genres)


Unnamed: 0,genre,jumlah_buku
0,Indonesian Literature,2422
1,Anthologies,68
2,Romance,3863
3,Chick Lit,701
4,Fantasy,2126
...,...,...
544,Womens Studies,1
545,Noir,1
546,Gardening,1
547,The World,1


In [43]:
list(genre_counts_df['genre'])

['Indonesian Literature',
 'Anthologies',
 'Romance',
 'Chick Lit',
 'Fantasy',
 'Comics',
 'Manga',
 'Young Adult',
 'Graphic Novels',
 'Humor',
 'Slice Of Life',
 'Comedy',
 'Mystery',
 'Fiction',
 'Crime',
 'Mystery Thriller',
 'Detective',
 'Audiobook',
 'Murder Mystery',
 'Short Stories',
 'Contemporary Romance',
 'Novels',
 'Adult',
 'Family',
 'Islam',
 'Horror',
 'Classics',
 'Fairy Tales',
 'Beauty and The Beast',
 'France',
 'Westerns',
 'Western Romance',
 'Drama',
 'Contemporary',
 'Childrens',
 'Picture Books',
 'Animals',
 'Thriller',
 'Historical Romance',
 'Historical',
 'Historical Fiction',
 'Regency',
 'Regency Romance',
 'Erotica',
 'Erotic Romance',
 'Seinen',
 'Comics Manga',
 'Zombies',
 'Science Fiction',
 'Action',
 'Literature',
 'Magical Realism',
 'Magic',
 'Mythology',
 'India',
 'Indian Literature',
 'Japanese Literature',
 'Light Novel',
 'Ghosts',
 'Harlequin',
 'Parenting',
 'Komik',
 'Middle Grade',
 'Adventure',
 'Education',
 'Nonfiction',
 'Teaching

In [44]:
book_id_proceed_genre.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11753 entries, 213 to 8218
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   11753 non-null  object 
 1   average_rating  11753 non-null  float64
 2   deskripsi       11753 non-null  object 
 3   penerbit        11753 non-null  object 
 4   jml_halaman     11753 non-null  int32  
 5   tahun_terbit    11753 non-null  int32  
 6   url             11753 non-null  object 
 7   image           11753 non-null  object 
 8   judul           11753 non-null  object 
 9   penulis         11753 non-null  object 
 10  isbn            11753 non-null  object 
 11  genre           11753 non-null  object 
 12  genre_list      11753 non-null  object 
dtypes: float64(1), int32(2), object(10)
memory usage: 1.2+ MB


In [45]:
book_id_proceed_genre = book_id_proceed_genre.dropna()

In [46]:
book_id_proceed_genre.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11753 entries, 213 to 8218
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   11753 non-null  object 
 1   average_rating  11753 non-null  float64
 2   deskripsi       11753 non-null  object 
 3   penerbit        11753 non-null  object 
 4   jml_halaman     11753 non-null  int32  
 5   tahun_terbit    11753 non-null  int32  
 6   url             11753 non-null  object 
 7   image           11753 non-null  object 
 8   judul           11753 non-null  object 
 9   penulis         11753 non-null  object 
 10  isbn            11753 non-null  object 
 11  genre           11753 non-null  object 
 12  genre_list      11753 non-null  object 
dtypes: float64(1), int32(2), object(10)
memory usage: 1.2+ MB


In [58]:
def categorize_genres(genres):
    # Dictionary mapping original genres to their respective categories
    genre_categories = {
        'Fiction': ['Comics', 'Manga', 'Graphic Novels', 'Shojo', 'Romance', 'Fantasy', 'Young Adult', 'Slice Of Life', 'Comedy', 'Mystery', 'Fiction', 'Crime', 'Mystery Thriller', 'Detective', 'Murder Mystery', 'Short Stories', 'Contemporary Romance', 'Novels', 'Adult', 'Horror', 'Classics', 'Fairy Tales', 'Westerns', 'Western Romance', 'Drama', 'Contemporary', 'Childrens', 'Picture Books', 'Thriller', 'Historical Romance', 'Historical', 'Historical Fiction', 'Regency', 'Regency Romance', 'Erotica', 'Erotic Romance', 'Seinen', 'Comics Manga', 'Science Fiction', 'Action', 'Literature', 'Magical Realism', 'Magic', 'Mythology', 'Ghosts', 'Harlequin', 'Parenting', 'Middle Grade', 'Adventure', 'Nonfiction', 'Teaching', 'Psychology', 'Inspirational', 'True Story', 'Collections', 'New Adult', 'Realistic Fiction', 'Young Adult Contemporary', 'Dystopia', 'Young Adult Fantasy', 'Shonen', 'Gothic', 'Paranormal', 'Coming Of Age', 'Teen', 'Supernatural', 'Shojo', 'Books About Books', 'Adult Fiction'],
        'Nonfiction': ['Audiobook', 'Education', 'Teaching', 'School', 'Parenting', 'Psychology', 'Inspirational', 'True Story', 'Management', 'Self Help', 'History', 'Religion', 'Travel', 'Time Travel', 'Philosophy', 'Economics', 'Politics', 'Political Science', 'Biography', 'Autobiography', 'Memoir'],
        'Childrens': ['Childrens', 'Picture Books'],
        'Classics': ['Classics'],
        'Comics' : ['Comics', 'Manga', 'Graphic Novels', 'Shojo', 'Seinen', 'Comics Manga', 'Shonen', ],
        'Historical Fiction': ['Historical Fiction', 'Historical'],
        'Thriller': ['Thriller', 'Mystery Thriller', 'Suspense', 'Detective', 'Psychological Thriller'],
        'Adventure': ['Adventure'],
        'Humor': ['Humor'],
        'Fantasy': ['Fantasy', 'Supernatural', 'Paranormal', 'Magic', 'Fairy Tales', 'Witches', 'Dragons', 'Wicca', 'Fairies'],
        'Crime': ['Crime', 'Mystery'],
        'Mystery': ['Mystery', 'Mystery Thriller', 'Detective', 'Murder Mystery'],
        'Contemporary': ['Contemporary', 'Slice Of Life'],
        'Romance': ['Romance', 'Contemporary Romance', 'Romantic Suspense', 'Historical Romance', 'Regency Romance', 'Erotic Romance', 'Paranormal Romance', 'Category Romance', 'Love', 'Harlequin', 'Mills and Boon'],
        'Supernatural': ['Supernatural', 'Paranormal', 'Horror', 'Ghost Stories', 'Gothic'],
        'Horror': ['Horror'],
        'Paranormal': ['Paranormal'],
        'Adult': ['Adult'],
        'Young Adult': ['Young Adult', 'Teen'],
        'Chick Lit': ['Chick Lit'],
        'Literature': ['Literature', 'Books About Books'],
        'Novels': ['Novels', 'Novel'],
        'Drama': ['Drama'],
        'Business': ['Business'],
        'Philosophy': ['Philosophy'],
        'Action': ['Action'],
        'Slice Of Life': ['Slice Of Life'],
        'Islam': ['Islam'],
        'Christmas': ['Christmas'],
        'Poetry': ['Poetry'],
        'Sport': ['Sport']
        # Add more categories as needed
    }
    
    # Function to map genre to category
    def map_genre(genre):
        genres = []
        for category, genres_list in genre_categories.items():
            if genre in genres_list:
                genres.append(category)
        if len(genres) == 0:
            genres.append('Other')
        return genres # If genre does not match any category
    
    # Apply mapping function to each genre
    unflat = [map_genre(genre) for genre in genres]
    return [genre for sublist in unflat for genre in sublist]

# Apply categorization to the DataFrame
book_id_proceed_genre['New Genres'] = book_id_proceed_genre['genre'].apply(categorize_genres)

In [59]:
book_id_proceed_genre['New Genres'] 

213                                       [Other, Fiction]
15076               [Fiction, Chick Lit, Fiction, Romance]
13292                                     [Other, Fiction]
11747                          [Fiction, Fiction, Romance]
13291                   [Other, Fiction, Fiction, Fantasy]
                               ...                        
4018     [Other, Fiction, Fiction, Childrens, Fiction, ...
9697     [Other, Fiction, Fiction, Childrens, Fiction, ...
8329     [Other, Fiction, Fiction, Childrens, Fiction, ...
295        [Other, Nonfiction, Philosophy, Islam, Fiction]
8218     [Fiction, Fantasy, Supernatural, Fiction, Fict...
Name: New Genres, Length: 11753, dtype: object

In [64]:
def count_and_decide(genres):
    # Count the occurrences of 'a' and 'b'
    count_a = genres.count('Fiction')
    count_b = genres.count('Nonfiction')
    
    # Determine the more frequent word between 'a' and 'b'
    if count_a >= count_b:
        most_frequent = 'Fiction'
    elif count_b > count_a:
        most_frequent = 'Nonfiction'
     # or 'b', in case of a tie you can decide

    others = [word for word in genres if word != 'Fiction' and word != 'Nonfiction']

    if count_a == 0 and count_b == 0:
        return others

    return [most_frequent] + others

In [65]:
book_id_proceed_genre['New Genres'] = book_id_proceed_genre['New Genres'].apply(count_and_decide)

In [51]:
def unique(genres):
    return list(set(genres))

In [66]:
book_id_proceed_genre['New Genres']

213                                       [Fiction, Other]
15076                        [Fiction, Chick Lit, Romance]
13292                                     [Fiction, Other]
11747                                   [Fiction, Romance]
13291                            [Fiction, Other, Fantasy]
                               ...                        
4018                [Fiction, Other, Childrens, Adventure]
9697                [Fiction, Other, Childrens, Adventure]
8329                [Fiction, Other, Childrens, Adventure]
295                    [Fiction, Other, Philosophy, Islam]
8218     [Fiction, Fantasy, Supernatural, Fantasy, Supe...
Name: New Genres, Length: 11753, dtype: object

In [67]:
book_id_proceed_genre['New Genres'] = book_id_proceed_genre['New Genres'].apply(unique)

In [68]:
def check_others(genres):
    if len(genres) == 1:
        return genres
    
    try:
        genres.remove('Other')
        return genres
    except:
        return genres

In [69]:
book_id_proceed_genre['New Genres'] = book_id_proceed_genre['New Genres'].apply(check_others)

In [70]:
book_id_proceed_genre['New Genres'] 

213                                     [Fiction]
15076               [Fiction, Chick Lit, Romance]
13292                                   [Fiction]
11747                          [Fiction, Romance]
13291                          [Fiction, Fantasy]
                           ...                   
4018              [Fiction, Childrens, Adventure]
9697              [Fiction, Childrens, Adventure]
8329              [Fiction, Childrens, Adventure]
295                  [Fiction, Philosophy, Islam]
8218     [Supernatural, Fiction, Fantasy, Horror]
Name: New Genres, Length: 11753, dtype: object

In [71]:
book_id_proceed_genre.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11753 entries, 213 to 8218
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   11753 non-null  object 
 1   average_rating  11753 non-null  float64
 2   deskripsi       11753 non-null  object 
 3   penerbit        11753 non-null  object 
 4   jml_halaman     11753 non-null  int32  
 5   tahun_terbit    11753 non-null  int32  
 6   url             11753 non-null  object 
 7   image           11753 non-null  object 
 8   judul           11753 non-null  object 
 9   penulis         11753 non-null  object 
 10  isbn            11753 non-null  object 
 11  genre           11753 non-null  object 
 12  New Genres      11753 non-null  object 
dtypes: float64(1), int32(2), object(10)
memory usage: 1.2+ MB


In [73]:
book_id_proceed_genre = book_id_proceed_genre.drop(columns=['genre'])
book_id_proceed_genre = book_id_proceed_genre.rename(columns={'New Genres': 'genre'})

book_id_proceed_genre.info()


<class 'pandas.core.frame.DataFrame'>
Index: 11753 entries, 213 to 8218
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   11753 non-null  object 
 1   average_rating  11753 non-null  float64
 2   deskripsi       11753 non-null  object 
 3   penerbit        11753 non-null  object 
 4   jml_halaman     11753 non-null  int32  
 5   tahun_terbit    11753 non-null  int32  
 6   url             11753 non-null  object 
 7   image           11753 non-null  object 
 8   judul           11753 non-null  object 
 9   penulis         11753 non-null  object 
 10  isbn            11753 non-null  object 
 11  genre           11753 non-null  object 
dtypes: float64(1), int32(2), object(9)
memory usage: 1.1+ MB


In [74]:
list(book_id_proceed_genre['genre'])

[['Fiction'],
 ['Fiction', 'Chick Lit', 'Romance'],
 ['Fiction'],
 ['Fiction', 'Romance'],
 ['Fiction', 'Fantasy'],
 ['Fiction',
  'Slice Of Life',
  'Humor',
  'Contemporary',
  'Romance',
  'Young Adult'],
 ['Fiction', 'Crime', 'Thriller', 'Mystery'],
 ['Fiction'],
 ['Fiction', 'Chick Lit', 'Adult', 'Romance', 'Novels'],
 ['Fiction', 'Islam'],
 ['Supernatural', 'Fiction', 'Fantasy', 'Horror'],
 ['Fiction', 'Classics', 'Fantasy', 'Romance'],
 ['Fiction', 'Chick Lit', 'Romance'],
 ['Drama', 'Fiction', 'Contemporary', 'Romance', 'Young Adult'],
 ['Fiction', 'Classics', 'Childrens', 'Fantasy'],
 ['Fiction', 'Fantasy'],
 ['Fiction', 'Crime', 'Mystery', 'Thriller'],
 ['Adult', 'Fiction', 'Historical Fiction', 'Romance'],
 ['Fiction', 'Romance'],
 ['Fiction', 'Novels'],
 ['Adult', 'Fiction', 'Romance', 'Contemporary'],
 ['Fiction', 'Supernatural', 'Horror', 'Fantasy'],
 ['Fiction', 'Romance'],
 ['Fiction', 'Novels'],
 ['Fiction', 'Young Adult', 'Action'],
 ['Fiction'],
 ['Fiction',
  'Slice

In [84]:
book_id_proceed_genre.loc[1,'tahun_terbit'] = 2004

In [88]:
book_id_proceed_genre = pd.read_csv('Final_books_id_data.csv')

In [94]:
book_id_proceed_genre.loc[11751,'tahun_terbit'] = 2010

book_id                                                       11752
language_code                                                   ind
average_rating                                                 4.33
deskripsi         Inilah karya yang melambungkan nama Ibnul Jauz...
penerbit                                                Darul Uswah
jml_halaman                                                     618
tahun_terbit                                                    201
url               https://www.goodreads.com/book/show/10452056-s...
image             https://s.gr-assets.com/assets/nophoto/book/11...
judul                                               Shaid Al-Khatir
penulis                                                    bn ljwzy
isbn                                                     9798143108
genre                            ['Fiction', 'Philosophy', 'Islam']
Name: 11751, dtype: object

In [96]:
book_id_proceed_genre.loc[11752,'tahun_terbit'] = 2013

In [103]:
book_id_proceed_genre.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11753 entries, 1 to 11753
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   language_code   11753 non-null  object 
 1   average_rating  11753 non-null  float64
 2   deskripsi       11753 non-null  object 
 3   penerbit        11753 non-null  object 
 4   jml_halaman     11753 non-null  int64  
 5   tahun_terbit    11753 non-null  int64  
 6   url             11753 non-null  object 
 7   image           11753 non-null  object 
 8   judul           11753 non-null  object 
 9   penulis         11753 non-null  object 
 10  isbn            11753 non-null  object 
 11  genre           11753 non-null  object 
dtypes: float64(1), int64(2), object(9)
memory usage: 1.2+ MB


In [98]:
book_id_proceed_genre = book_id_proceed_genre.drop(columns=['book_id'])

In [102]:
book_id_proceed_genre['books_id'] = range(1, len(book_id_proceed_genre) + 1)
book_id_proceed_genre.set_index('books_id', inplace=True)

book_id_proceed_genre.to_csv('Final_books_id_data.csv')

In [None]:
{'isbn': '9799731232', 
 'text_reviews_count': '956', 
 'series': ['353465'], 
 'country_code': 'US', 
 'language_code': 'ind', 
 'popular_shelves': 
 [{'count': '9520', 'name': 'to-read'}, 
  {'count': '873', 'name': 'currently-reading'}, 
  {'count': '179', 'name': 'favorites'}, 
  {'count': '152', 'name': 'fiction'}, 
  {'count': '124', 'name': 'indonesia'}, 
  {'count': '74', 'name': 'indonesian'}, 
  {'count': '71', 'name': 'historical-fiction'}, 
  {'count': '48', 'name': 'literature'}, 
  {'count': '43', 'name': 'indonesian-literature'}, 
  {'count': '39', 'name': 'owned'}, 
  {'count': '34', 'name': 'history'}, 
  {'count': '34', 'name': 'sastra'}, 
  {'count': '32', 'name': 'sastra-indonesia'}, 
  {'count': '30', 'name': 'classics'}, 
  {'count': '28', 'name': 'novel'}, 
  {'count': '28', 'name': 'asia'}, 
  {'count': '27', 'name': 'pramoedya-ananta-toer'}, 
  {'count': '22', 'name': 'romance'}, 
  {'count': '20', 'name': 'novels'}, 
  {'count': '19', 'name': 'classic'}, 
  {'count': '18', 'name': 'pramoedya'}, 
  {'count': '17', 'name': 'punya'}, 
  {'count': '16', 'name': 'bumi-manusia'}, 
  {'count': '16', 'name': 'historical'}, 
  {'count': '14', 'name': 'fiksi'}, 
  {'count': '13', 'name': 'wish-list'}, 
  {'count': '13', 'name': 'pram'}, 
  {'count': '11', 'name': 'books-i-own'}, 
  {'count': '10', 'name': 'adult'}, 
  {'count': '9', 'name': 'asian-lit'}, 
  {'count': '9', 'name': 'to-buy'}, 
  {'count': '9', 'name': 'fiksi-indonesia'}, 
  {'count': '9', 'name': 'novel-indonesia'},
  {'count': '8', 'name': '1'}, 
  {'count': '8', 'name': 'sejarah'}, 
  {'count': '8', 'name': 'roman'}, 
  {'count': '8', 'name': 'indonesian-author'}, 
  {'count': '8', 'name': 'indonesian-books'}, 
  {'count': '7', 'name': 'school'}, 
  {'count': '7', 'name': 'colonialism'}, 
  {'count': '7', 'name': 'southeast-asia'}, 
  {'count': '7', 'name': 'owned-books'}, 
  {'count': '7', 'name': 'favorite'}, 
  {'count': '7', 'name': 'fiksi-sejarah'}, 
  {'count': '6', 'name': 'literary-fiction'}, 
  {'count': '6', 'name': 'world-literature'}, 
  {'count': '6', 'name': 'recommended'}, 
  {'count': '6', 'name': 'indonesian-fiction'}, 
  {'count': '5', 'name': 'series'}, 
  {'count': '5', 'name': '20th-century'}, 
  {'count': '5', 'name': 'indonesian-lit'}, 
  {'count': '5', 'name': 'drama'}, 
  {'count': '5', 'name': 'asian'}, 
  {'count': '5', 'name': 'bahasa-indonesia'}, 
  {'count': '4', 'name': 'around-the-world'}, 
  {'count': '4', 'name': 'contemporary'}, 
  {'count': '4', 'name': 'family'}, 
  {'count': '4', 'name': 'done'}, 
  {'count': '4', 'name': 'koleksi'}, 
  {'count': '4', 'name': 'politics'}, 
  {'count': '4', 'name': 'borrowed'}, 
  {'count': '4', 'name': 'classic-literature'},
  {'count': '4', 'name': 'sociology'}, 
  {'count': '4', 'name': 'banned-books'}, 
  {'count': '4', 'name': 'lokal'},
  {'count': '4', 'name': 'literary'}, 
  {'count': '4', 'name': 'my-favorite'}, 
  {'count': '3', 'name': 'adult-fiction'}, 
  {'count': '3', 'name': 'reading-the-world'}, 
  {'count': '3', 'name': 'owned-book'}, 
  {'count': '3', 'name': 'indo'}, 
  {'count': '3', 'name': 'translation'}, 
  {'count': '3', 'name': 'translated'}, 
  {'count': '3', 'name': 'coming-of-age'}, 
  {'count': '3', 'name': 'library'}, 
  {'count': '3', 'name': 'my-shelf'}, 
  {'count': '3', 'name': 'politic'}, 
  {'count': '3', 'name': 'sastra-klasik-indonesia'}, 
  {'count': '3', 'name': 'cultural'}, 
  {'count': '3', 'name': 'asian-literature'}, 
  {'count': '3', 'name': 'fiction-literature'}, 
  {'count': '3', 'name': 'waiting-list'}, 
  {'count': '3', 'name': 'political'}, 
  {'count': '3', 'name': 'indonesia-literature'}, 
  {'count': '3', 'name': 'not-mine'}, 
  {'count': '3', 'name': 'roman-sejarah'}, 
  {'count': '3', 'name': 'to-find'}, 
  {'count': '3', 'name': 'literature-sastra'}, 
  {'count': '3', 'name': 'non-western'}, 
  {'count': '3', 'name': 'tetralogi'}, 
  {'count': '3', 'name': 'favorit'}, 
  {'count': '3', 'name': '19th-century'}, 
  {'count': '3', 'name': 'asli-indonesia'}, 
  {'count': '3', 'name': 'pramudya'}, 
  {'count': '3', 'name': 'collection'}, 
  {'count': '3', 'name': 'fiksi-novel'}, 
  {'count': '3', 'name': 'sastraindonesia'}, 
  {'count': '3', 'name': 'my-collection'}, 
  {'count': '3', 'name': 'klasik'}, 
  {'count': '3', 'name': 'indonesianliterature'}], 
  'asin': '', 
  'is_ebook': 'false', 
  'average_rating': '4.38', 
  'kindle_asin': '', 
  'similar_books': ['1379444', '1334844', '984819', '1455669', '2016000', '16174176', '3492825', '6468957', '15995172', '1439798', '1283619', '1096114', '1455480', '1722607', '2090345', '1481902', '1492432', '1446004'], 
  'description': 'Roman Tetralogi Buru mengambil latar belakang dan cikal bakal nation Indonesia di awal abad ke-20. Dengan membacanya waktu kita dibalikkan sedemikian rupa dan hidup di era membibitnya pergerakan nasional mula-mula, juga pertautan rasa, kegamangan jiwa, percintaan, dan pertarungan kekuatan anonim para srikandi yang mengawal penyemaian bangunan nasional yang kemudian kelak melahirkan Indonesia modern.\nRoman bagian pertama; Bumi Manusia, sebagai periode penyemaian dan kegelisahan dimana Minke sebagai aktor sekaligus kreator adalah manusia berdarah priyayi yang semampu mungkin keluar dari kepompong kejawaannya menuju manusia yang bebas dan merdeka, di sudut lain membelah jiwa ke-Eropa-an yang menjadi simbol dan kiblat dari ketinggian pengetahuan dan peradaban.\nPram menggambarkan sebuah adegan antara Minke dengan ayahnya yang sangat sentimentil: Aku mengangkat sembah sebagaimana biasa aku lihat dilakukan punggawa terhadap kakekku dan nenekku dan orangtuaku, waktu lebaran. Dan yang sekarang tak juga kuturunkan sebelum Bupati itu duduk enak di tempatnya. Dalam mengangkat sembah serasa hilang seluruh ilmu dan pengetahuan yang kupelajari tahun demi tahun belakangan ini. Hilang indahnya dunia sebagaimana dijanjikan oleh kemajuan ilmu .... Sembah pengagungan pada leluhur dan pembesar melalui perendahan dan penghinaan diri! Sampai sedatar tanah kalau mungkin! Uh, anak-cucuku tak kurelakan menjalani kehinaan ini.\n"Kita kalah, Ma," bisikku.\n"Kita telah melawan, Nak, Nyo, sebaik-baiknya, sehormat-hormatnya."', 
  'format': 'Paperback', 
  'link': 'https://www.goodreads.com/book/show/1398034.Bumi_Manusia', 
  'authors': [{'author_id': '101823', 'role': ''}], 
  'publisher': 'Lentera Dipantara', 
  'num_pages': '535', 
  'publication_day': '', 
  'isbn13': '9789799731234', 
  'publication_month': '', 
  'edition_information': '', 
  'publication_year': '2005', 
  'url': 'https://www.goodreads.com/book/show/1398034.Bumi_Manusia', 
  'image_url': 'https://images.gr-assets.com/books/1464891625m/1398034.jpg', 
  'book_id': '1398034', 
  'ratings_count': '8834', 
  'work_id': '1881100', 
  'title': 'Bumi Manusia', 
  'title_without_series': 'Bumi Manusia'}
