# Import Library

In [5]:
!pip install google-play-scraper



In [10]:
from google_play_scraper import reviews, Sort
import pandas as pd

In [11]:
def scrape_reviews(app_id, review_count=30000, lang='id', country='id', filename='playstore_reviews.csv'):
    """
    Scrape reviews from Google Play Store and extract specific columns

    Parameters:
    app_id (str): The application ID from Google Play Store
    review_count (int): Number of reviews to scrape

    Returns:
    pandas.DataFrame: DataFrame containing selected review information
    """
    print(f"Starting to scrape {review_count} reviews...")

    # Scrape reviews - properly unpack the tuple returned by reviews()
    result, continuation_token = reviews(
        app_id,
        lang=lang,
        country=country,
        sort=Sort.MOST_RELEVANT,
        count=review_count
    )

    # Create DataFrame with the review results
    df = pd.DataFrame(result)

    # Select only required columns
    selected_columns = {
        'reviewId': 'id',
        'content': 'content',
        'score': 'score',
        'at': 'date'
    }

    # Create new DataFrame with selected columns and renamed headers
    df_selected = df[selected_columns.keys()].rename(columns=selected_columns)

    print("Saving to CSV...")
    # Save to CSV
    df_selected.to_csv(filename, index=False)
    print("Done!")

    print(f"\nTotal reviews scraped: {len(df_selected)}")

    return df_selected

# Scrapping Dataset

Dalam notebook ini, kita akan melakukan scraping data dari google play store menggunakan library `google-play-scraper` untuk mendapatkan review aplikasi BPJS atau Jamsostek Mobile dengan 150.000 review yang paling relevan.

In [None]:
app_id = 'com.bpjstku'
filename = 'ulasan_aplikasi_bpjs_mobile_100K.csv'
reviews_df = scrape_reviews(app_id, review_count=150000, filename=filename)

# Display first few rows and shape of the DataFrame
print(f"DataFrame shape: {reviews_df.shape}")
print("\nFirst few rows:")
print(reviews_df.head())

Starting to scrape 150000 reviews...
Saving to CSV...
Done!

Total reviews scraped: 108000
DataFrame shape: (108000, 4)

First few rows:
                                     id  \
0  f0c8161b-3014-4d04-9e71-aa78520b5899   
1  00cd22a2-b8b1-424b-a0c0-090aa3d136ee   
2  493aade3-c29d-4a44-8454-c74a66643993   
3  a9cba0b7-17f4-42f0-b80b-b0f9cdc8c17b   
4  9d20718e-a57b-42b3-82f9-792b3206a1ed   

                                             content  score  \
0  Kenapa aplikasinya sering error? Kemarin² ga b...      1   
1  Aplikasi JMO (Jamsostek ketenagakerjaan ini, s...      5   
2  Dulu aplikasi ini bagus, tapi kenapa mau cek s...      2   
3  saat melakukan pengkinan data malah ke refresh...      1   
4  Katanya aplikasi intansi pemerintah, tapi laya...      1   

                 date  
0 2025-01-22 19:02:07  
1 2025-02-01 10:00:59  
2 2025-01-05 18:10:29  
3 2025-01-13 14:30:42  
4 2025-01-30 11:12:14  


Setelah melakukan scraping 150.000 review, ternyata data yang berhasil diambil hanya sekitar 108.000 review. Hal ini dikarenakan google play store hanya memiliki 108.000 data paling relevan yang bisa diambil.