In [188]:
import requests
import pandas as pd
import json
import os
from dotenv import load_dotenv
from openai import OpenAI


os.getenv("API_KEY")
client = OpenAI()

In [191]:
def get_products(page=1, page_size=90, category="tents"):
    headers = {
        'authority': 'www.rei.com',
        'accept': '*/*',
        'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8,hu;q=0.7',
        'priority': 'u=1, i',
        'referer': f'https://www.rei.com/c/{category}?page={page}',
        'sec-ch-ua': '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
    }

    params = {
        'json': 'true',
        'page': page,
        'pagesize': page_size,
    }

    return requests.get(f'https://www.rei.com/c/{category}', params=params, headers=headers)

def get_reviews(product_id, limit=12, offset=0):
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,hu;q=0.7',
        'Connection': 'keep-alive',
        'Origin': 'https://www.rei.com',
        'Referer': 'https://www.rei.com/product/189281/alpine-mountain-gear-weekender-tent-4',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'cross-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
    }

    params = {
        "resource": "reviews",
        "action": "REVIEWS_N_STATS",
        "filter": [
            f"productid:eq:{product_id}",
            "contentlocale:eq:en*,en_US,en_US",
            "isratingsonly:eq:false"
        ],
        "filter_reviews": "contentlocale:eq:en*,en_US,en_US",
        "include": "authors,products",
        "filteredstats": "reviews",
        "Stats": "Reviews",
        "limit": limit,
        "offset": offset,
        "sort": "submissiontime:desc",
        "passkey": "thvpbov9ywkkl4nkhbeq0wm1i",
        "apiversion": "5.5",
        "displaycode": "15372-en_us"
    }


    return requests.get(
        'https://api.bazaarvoice.com/data/reviews.json',
        headers=headers, params=params
    )
    
def review_themes(review_json_string):
  return client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
      {
        "role": "system",
        "content": [
          {
            "type": "text",
            "text": "You will be provided a product review in the following json format:\n'{\"rating\": 1, \"title\": \"Poor flap zipper\", \"ReviewText\": \"After one use, I was unable to get the zipper to work properly with the front flap.  REI did take it back with no issues.\", \"OriginalProductName\": \"ALPS Mountaineering Camp Creek 6 Tent\"}'\n\nyou will categorise the themes related to the review outputted in a valid json format like so:\n{\"themes\":[\"theme1\", \"theme2\", \"theme3\"]}"
          }
        ]
      },
      {
        "role": "user",
        "content": review_json_string
      }
    ],
    temperature=1,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
  )

In [192]:
product_response = get_products()
product_response_json = product_response.json()

product_ids = [x["prodId"] for x in product_response_json["searchResults"]["results"]]

In [193]:
review_list = []
for product_id in product_ids[:10]:
    print(product_id)
    
    review_response = get_reviews(product_id)
    review_list.append(review_response)
    

189281
227740
204311
202274
189270
185632
189282
204308
202982
164811


In [194]:
list_of_clean_reviews = []
for review_response in review_list:
    review_response_json = review_response.json()
    reviews = review_response_json["Results"]
    clean_reviews = [{"review_id": review["Id"], "product_id": review["ProductId"], "rating": review["Rating"], 
                      "title": review["Title"], "ReviewText": review["ReviewText"], 
                      "OriginalProductName": review["OriginalProductName"]} for review in reviews]
    list_of_clean_reviews.extend(clean_reviews)

In [195]:
bad_reviews = [x for x in list_of_clean_reviews if x["rating"] < 3]

In [206]:
for review_payload in bad_reviews:
    print(review_payload)
    review_payload_string = json.dumps(review_payload)
    themes_response = review_themes(review_payload_string)
    theme_string = themes_response.choices[0].message.content
    theme_response = json.loads(theme_string)
    review_payload["themes"] = theme_response["themes"]
    
    

{'review_id': '346891208', 'product_id': '189281', 'rating': 1, 'title': 'Poor zipper', 'ReviewText': 'The zipper broke on 1st use, just separated in the middle. And we could not get it back on the track because there is no start anywhere.', 'OriginalProductName': 'Alpine Mountain Gear Weekender Tent 4', 'themes': ['zipper quality', 'durability', 'customer service']}
{'review_id': '256830169', 'product_id': '189281', 'rating': 1, 'title': 'Product delivered broken', 'ReviewText': 'Tent arrived with the shock-cord in one of the tent poles already snapped.  When I tried to assemble a second pole that shock-cord snapped too.  Good thing we set it up before attempting to go camping.  This was purchased via the REI Outlet.', 'OriginalProductName': 'Alpine Mountain Gear Weekender Tent 4'}
{'review_id': '345639673', 'product_id': '204311', 'rating': 1, 'title': 'Arrived Used and With Broken Parts', 'ReviewText': 'Ordered this for family camping trips. Thank goodness I put it together ahead of

In [220]:
bad_reviews

[{'review_id': '346891208',
  'product_id': '189281',
  'rating': 1,
  'title': 'Poor zipper',
  'ReviewText': 'The zipper broke on 1st use, just separated in the middle. And we could not get it back on the track because there is no start anywhere.',
  'OriginalProductName': 'Alpine Mountain Gear Weekender Tent 4',
  'themes': ['zipper quality', 'durability', 'customer service']},
 {'review_id': '256830169',
  'product_id': '189281',
  'rating': 1,
  'title': 'Product delivered broken',
  'ReviewText': 'Tent arrived with the shock-cord in one of the tent poles already snapped.  When I tried to assemble a second pole that shock-cord snapped too.  Good thing we set it up before attempting to go camping.  This was purchased via the REI Outlet.',
  'OriginalProductName': 'Alpine Mountain Gear Weekender Tent 4',
  'themes': ['quality control', 'shipping issues', 'product durability']},
 {'review_id': '345639673',
  'product_id': '204311',
  'rating': 1,
  'title': 'Arrived Used and With Bro

In [218]:
list_of_themes = []
for item in bad_reviews:
    list_of_themes.extend(item["themes"])

In [219]:
list_of_themes

['zipper quality',
 'durability',
 'customer service',
 'quality control',
 'shipping issues',
 'product durability',
 'poor quality control',
 'missing parts',
 'customer service',
 'product availability',
 'product quality',
 'customer service',
 'return policy',
 'product comparison',
 'Assembly Difficulty',
 'Lack of Storage',
 'Packing Difficulty',
 'Waterproof Ability',
 'Return Policy',
 'difficulty in setup',
 'pole fit issues',
 'disappointment',
 'leaking zipper',
 'inadequate rain fly coverage',
 'not suitable for rainy conditions',
 'quality',
 'design',
 'expectations',
 'pricing',
 'customer service',
 'trust',
 'waterproofing',
 'reliability',
 'rain protection',
 'Poor Quality',
 'Limited Instructions',
 'Difficult Assembly',
 'Inadequate Rain Protection',
 'missing instructions',
 'poor customer support',
 'difficulty in setup',
 'Durability',
 'Quality',
 'Customer Service',
 'poor quality',
 'durability',
 'customer service',
 'Incomplete directions',
 'Issues with r