In [1]:
# Group Members: Ayman Fikry bin Asmajuda & Muhammad Khairin Asnawi bin Rosli
# Student ID: IS01081779 & IS01082068
# Platform: Shopee
# Product: Samsung Galaxy Z Flip 5

In [2]:
# Import libraries
import re 
import json 
import requests 
import csv 
from datetime import datetime 

In [3]:
# URL of the product on Shopee
product_url = 'https://shopee.com.my/Samsung-Galaxy-Z-Flip-5-8GB-256GB-8GB-512GB-Original-Malaysia-New-Set-i.54618012.14397879422'

In [4]:
# Function to extract user reviews from Shopee website
def scrape_shopee_reviews(url):
    
    # Extract shop id & item id from the URL
    r = re.search(r'i\.(\d+)\.(\d+)', url)
    if r:
        
        # Insert into shop_id & item_id variables
        shop_id, item_id = r.group(1), r.group(2) 
    else:
        
        # If shop id & item id not found, print and return failure
        print("Invalid URL") 
        return

    # Initialize empty list to store extracted reviews
    reviews_data = [] 
    
    # To set start pagination
    offset = 0 
    
    # Loop as long as offset is under 100, to limit 5 page as each page has 20 reviews
    while offset < 100:  
        
        # Request reviews from Shopee's API
        ratings_url = f'https://shopee.com.my/api/v2/item/get_ratings?filter=0&flag=1&itemid={item_id}&limit=20&offset={offset}&shopid={shop_id}&type=0'
        
        # Get the response data in JSON format
        data = requests.get(ratings_url).json()

        # To loop for each review
        for rating in data['data']['ratings']:
            
            # Extract reviewer's name into variable
            reviewer_name = rating['author_username']
            
            # Extract date as string
            review_date_str = rating['mtime']  
            
            # Convert date to appropriate date format
            review_date = datetime.utcfromtimestamp(review_date_str).strftime('%Y-%m-%d')  
            
            #Extract content of the comment
            review_content = rating['comment']
            
            # Append all the extracted data into the reviews_data list
            reviews_data.append([reviewer_name, review_date, review_content])

        # Check and break if no more reviews in the page
        if len(data['data']['ratings']) < 20:
            break

        # Increment offset by 20 to extract for next page
        offset += 20

    # Return the extarcted reviews_data
    return reviews_data

In [5]:
# Function to save reviews data into CSV file
def save_to_csv(reviews_data, filename):
    
    # Open the CSV file and automatically close when all codes inside are executed
    with open(filename, 'w', newline='', encoding='utf-8') as file:
        
        # Initialize writer object
        writer = csv.writer(file)
        
        # Insert the headers inside the CSV file
        writer.writerow(["Reviewer Name", "Review Date", "Review Content"])
        
        # Insert the extracted reviews_data inisde the CSV file
        writer.writerows(reviews_data)
        
    # Print statement when data is saved
    print(f"Data saved to {filename}")
   

In [6]:
# Scrape reviews from the product URL
reviews = scrape_shopee_reviews(product_url)

In [7]:
if reviews:
    
    # Call the save_to_csv function to save the extracted review data
    save_to_csv(reviews, 'Shopee_Product_Reviews.csv')
    
else: 
    # Print statement when data extraction process failed
    print("Failure to save data!")
    

Data saved to Shopee_Product_Reviews.csv
