In [1]:
import requests
import pandas as pd
import time

In [11]:
class UniqloAPIScraper:
    def __init__(self, base_url: str):
        self.base_url = base_url
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
        }
        self.products_data = []

    def fetch_data(self, page: int):
        """Fetch product data from Uniqlo's API for a specific page."""
        offset = (page - 1) * 10  # API paginates by 10 items per page
        api_url = f"{self.base_url}?offset={offset}&limit=10"

        try:
            response = requests.get(api_url, headers=self.headers)
            response.raise_for_status()
            data = response.json()
            
            # Extract products from the `result` field
            products = data.get("result", {}).get("items", [])
            return products
        except requests.RequestException as e:
            print(f"Error fetching data for page {page}: {e}")
            return []

    def scrape(self):
        """Collect product data across multiple pages."""
        page = 1
        while True:
            print(f"Fetching data for page {page}...")
            products = self.fetch_data(page)

            if not products:  # Stop if no products are found on the page
                print(f"No products found on page {page}. Stopping collection.")
                break

            for product in products:
                product_info = {
                    "Product Name": product.get("name", "N/A"),
                    "Price": product.get("prices", {}).get("base", {}).get("value", "N/A"),
                    "Currency": product.get("prices", {}).get("base", {}).get("currency", {}).get("code", "N/A"),
                    "Colors": [color.get("name", "N/A") for color in product.get("colors", [])],
                    "URL": f'https://www.uniqlo.com/vn/vi/products/{product.get("productId", "N/A")}?colorCode={product.get("colors", [{}])[0].get("code", "N/A")}&sizeCode={product.get("sizes", [{}])[0].get("code", "N/A")}',
                    "Rating (Average)": product.get("rating", {}).get("average", "N/A"),
                    "Total Ratings": sum(product.get("rating", {}).get("rateCount", {}).values()),
                }
                self.products_data.append(product_info)

            page += 1

            # Add a delay to avoid overloading the server
            time.sleep(2)

        print(f"Collection complete. Total products collected: {len(self.products_data)}")

    def save_to_csv(self, filename: str):
        """Save the collected data to a CSV file."""
        if not self.products_data:
            print("No data to save!")
            return
        df = pd.DataFrame(self.products_data)
        df.to_csv(filename, sep=',', index=False, encoding='utf-8')
        print(f"Data has been saved to {filename}")

In [12]:
if __name__ == "__main__":
    base_url = "https://www.uniqlo.com/vn/api/commerce/v3/vi/products"
    
    scraper = UniqloAPIScraper(base_url)
    scraper.scrape()
    scraper.save_to_csv("uniqlo_products.csv")

Fetching data for page 1...
Fetching data for page 2...
Fetching data for page 3...
Fetching data for page 4...
Fetching data for page 5...
Fetching data for page 6...
Fetching data for page 7...
Fetching data for page 8...
Fetching data for page 9...
Fetching data for page 10...
Fetching data for page 11...
Fetching data for page 12...
Fetching data for page 13...
Fetching data for page 14...
Fetching data for page 15...
Fetching data for page 16...
Fetching data for page 17...
Fetching data for page 18...
Fetching data for page 19...
Fetching data for page 20...
Fetching data for page 21...
Fetching data for page 22...
Fetching data for page 23...
Fetching data for page 24...
Fetching data for page 25...
Fetching data for page 26...
Fetching data for page 27...
Fetching data for page 28...
Fetching data for page 29...
Fetching data for page 30...
Fetching data for page 31...
Fetching data for page 32...
Fetching data for page 33...
Fetching data for page 34...
Fetching data for page 