In [22]:
import re
import json
import requests
import csv
from datetime import datetime

def display_progress(page_num, total_pages):
    """Display progress of scraping."""
    print(f"Scraping page {page_num + 1} of {total_pages}...")

# Extract shop ID and item ID from the product URL
product_url = 'https://shopee.com.my/-Malaysia-3-Year-Warranty-Dark-Knight-DW5600-Petak-G-Sports-Men-Women-Unisex-Kid-Watch-Jam-Tangan-Lelaki-Perempuan-i.88257056.5972469036?sp_atk=8dc92c62-cebc-470d-84e8-f575ab6de5bf&xptdk=8dc92c62-cebc-470d-84e8-f575ab6de5bf'
match = re.search(r'i\.(\d+)\.(\d+)', product_url)
if not match:
    print("Invalid URL")
    exit()

shop_id, item_id = match.group(1), match.group(2)

# Define the number of pages to scrape
num_pages_to_scrape = 5

# Open CSV file for writing
with open('revshopee.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['Username', 'Date', 'Comment']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    # Loop through the desired number of pages
    for page_num in range(num_pages_to_scrape):
        display_progress(page_num, num_pages_to_scrape)

        # Fetch ratings data from Shopee API
        offset = page_num * 20  # Calculate offset based on page number
        ratings_url = f'https://shopee.com.my/api/v2/item/get_ratings?filter=0&flag=1&itemid={item_id}&limit=20&offset={offset}&shopid={shop_id}&type=0'
        data = requests.get(ratings_url).json()

        # Extract and write ratings to CSV
        for rating in data['data']['ratings']:
            author_username = rating['author_username']
            
            timestamp = rating['mtime']
            review_date = datetime.utcfromtimestamp(timestamp).strftime('%d-%m-%Y')  # Convert to desired format
            
            # Combine all lines of the comment into one string
            comment_lines = [line.strip() for line in rating['comment'].split('\n')]
            full_comment = ' '.join(comment_lines)
            
            # Write to CSV file
            writer.writerow({'Username': author_username, 'Date': review_date, 'Comment': full_comment})


print("Comments extracted and saved to revshopee.csv")

Scraping page 1 of 5...
Scraping page 2 of 5...
Scraping page 3 of 5...
Scraping page 4 of 5...
Scraping page 5 of 5...
Comments extracted and saved to revshopee.csv
