In [1]:
import requests
import json
import time
import csv
import copy

LOCALE = "en"
MARKET = "us" #not used here

# http headers copied from browser DevTools
COMMON_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
    "Accept": "application/json, text/plain, */*",
    "Origin": "https://www.ikea.com",
    "Referer": "https://www.ikea.com/",
    "x-client-id": "a1047798-0fc4-446e-9616-0afe3256d0d7", #change when no longer avaliable
}

# modified from request payload from browser DevTools
BASE_PAYLOAD = {
    "filter": {
        "and": [
            {"field": "sourceLangCode", "value": "en"},
        ],
        "not": [],
    },
    "sort": [
        {"field": "submissionOn", "direction": "desc"},
    ],
    "page": {"size": 20, "number": 1},
}


def fetch_all_reviews(product_id, market=MARKET, locale=LOCALE):
    # construct url
    rating_url = f"https://web-api.ikea.com/tugc/public/v5/rating/{market}/{locale}/{product_id}"
    reviews_url = f"https://web-api.ikea.com/tugc/public/v5/reviews/{market}/{locale}/{product_id}"

    session = requests.Session()

    print(f"product_id={product_id}")
    # first send GET request for each product
    r = session.get(rating_url, headers=COMMON_HEADERS)
    print("GET rating status:", r.status_code)

    all_reviews = []
    # scrap reviews page by page
    page_size = BASE_PAYLOAD["page"]["size"]
    page_number = 1

    while True:
        payload = copy.deepcopy(BASE_PAYLOAD)
        payload["page"]["number"] = page_number
        headers = COMMON_HEADERS.copy()
        headers["Content-Type"] = "application/json"

        print(f"\nFetching product {product_id} page {page_number} ...")
        # send POST request
        resp = session.post(reviews_url, headers=headers, json=payload)
        print("POST status:", resp.status_code)
        
        # ensure code is 200
        if resp.status_code != 200:
            print("Body:", resp.text[:500])
            break
        # parse json
        data = resp.json()
        print("Type of data:", type(data))
        
        # in case it is a list
        if isinstance(data, list):
            reviews = data
        else:
            print("Top-level keys (dict):", list(data.keys()))
            reviews = (
                data.get("data")
                or data.get("reviews")
                or data.get("items")
                or []
            )

        if page_number == 1 and reviews:
            print("Sample review:", json.dumps(reviews[0], ensure_ascii=False, indent=2))

        if not reviews:
            print("No more reviews, stop.")
            break

        # add product id for each review
        for r in reviews:
            r["productId"] = product_id

        all_reviews.extend(reviews)
        print(f"Got {len(reviews)} reviews, total {len(all_reviews)}")
        
        # reaching end of reviews
        if len(reviews) < page_size:
            print("Last page reached.")
            break

        page_number += 1
        time.sleep(0.5)

    return all_reviews

In [2]:
def save_to_csv(reviews, filename="ikea_reviews_multi.csv"):
    if not reviews:
        print("no reviews got")
        return

    print("First review keys:", reviews[0].keys())

    fields = [
        'productId',
        'itemKey', 'id', 'type', 'sourceCountryCode', 'sourceLangCode',
        'title', 'text', 'primaryRating', 'secondaryRatings', 'isRecommended',
        'verifiedPurchase', 'verifiedReviewer', 'anonymousReviewer',
        'positiveFeedbacksCount', 'negativeFeedbacksCount', 'submissionOn',
        'response', 'hasMedia', 'medias', 'reviewer', 'updatedOn'
    ]
    
    # store as csv
    with open(filename, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.writer(f)
        writer.writerow(fields)
        for r in reviews:
            row = [r.get(k, "") for k in fields]
            writer.writerow(row)

    print(f"Saved {len(reviews)} reviews to {filename}")


In [3]:
if __name__ == "__main__":
    PRODUCT_IDS = [
        "70511903",#VESTERÖY Pocket spring mattress, medium firm/white, Twin
        "50511857",#VALEVÅG Pocket spring mattress, medium firm/white, Queen
        "30511877",#VÅGSTRANDA Pocket spring mattress, medium firm/white, Queen
        "60511913",#ÅNNELAND Hybrid mattress, firm/white, Queen
        "80348520",#KRUMMELUR Foam mattress for crib, 27 1/2x52
        # some of the mattresses with higest amount of visable reviews
    ]

    all_reviews = []

    for pid in PRODUCT_IDS:
        reviews = fetch_all_reviews(pid)
        print(f"Product {pid} fetched {len(reviews)} reviews")
        all_reviews.extend(reviews)

    print("\nTotal fetched for all products:", len(all_reviews))
    if all_reviews:
        save_to_csv(all_reviews)
    else:
        print("No reviews fetched for any product.")



=== 预热 session, product_id=70511903 ===
GET rating status: 200

Fetching product 70511903 page 1 ...
POST status: 200
Type of data: <class 'list'>
Sample review: {
  "itemKey": {
    "itemNo": "00511906",
    "itemType": "art",
    "sourceItemNo": "00511906"
  },
  "id": "2463ad58-7811-4f29-8387-127fd3d13954",
  "type": "web-profile",
  "sourceCountryCode": "ca",
  "sourceLangCode": "en",
  "title": "not for Malm bedframe",
  "text": "this mattress is too low for the Malm bed frame",
  "primaryRating": {
    "ratingRange": 5,
    "ratingValue": 1
  },
  "secondaryRatings": null,
  "isRecommended": false,
  "verifiedPurchase": true,
  "verifiedReviewer": false,
  "anonymousReviewer": false,
  "positiveFeedbacksCount": 0,
  "negativeFeedbacksCount": 0,
  "submissionOn": "2025-11-10T17:18:23Z",
  "response": {
    "text": "Thank you for your review. We are sorry that this product did not meet your expectations. We will be sure to forward your comments to the appropriate department for ev