source: https://medium.com/@lucafiaschi/turning-customers-feedback-into-action-an-llm-blueprint-for-app-review-analysis-7f5d39d08f6e

In [3]:
import json
import pandas as pd
import numpy as np

from joblib import Parallel, delayed
import datetime
import time

import openai
import os
from google import genai

from yaml_helper import YamlParser

In [4]:
df = pd.read_csv("data/headhway_review_rating.csv")

In [5]:
df.loc[df.rating < 4].to_csv("data/headhway_review_rating_sample.csv", index=False)

In [6]:
df = df.loc[df.rating < 4]

In [7]:
df

Unnamed: 0,rating,content
0,1,this app is the worst! please avoid it at any ...
1,1,i was charged my membership fee 3 times for on...
2,1,i thought this was an app that would let me si...
3,1,this is a terrible company. they lure you in ...
4,1,content is very superficial .. and then the ty...
...,...,...
14069,1,Poor books.after a few nooksstarts yo think al...
14070,1,Took $120 out after i canceled before the tria...
14071,2,"Pretty sure the ""writers"" are AI. Lots of non-..."
14079,1,Too complicated and too expensive. You have to...


In [41]:
creds = YamlParser("creds/openai.yml").read()
OPENAI_API_KEY = creds["OPENAI_API_KEY"]

In [42]:
try:
    with open("prompts/message_sentiments_prompt.md", 'r', encoding='utf-8') as file:
        MARKDOWN_PROMPT = file.read()
        # Now the 'markdown_content' variable holds the entire content of the file.
        # You can now print it, process it, etc.
        # For example:
        # print(markdown_content)
except FileNotFoundError:
    print("Error: The file could not be found.")
except Exception as e:
    print(f"An error occurred: {e}")

print(MARKDOWN_PROMPT)

## Classification Instructions  
You are a review classification expert. Given a customer review, analyze it according to the following steps:  
      
### Instructions Details  
    1. Content Screening:  
      - First, check if the review contains any content that should be flagged (inappropriate_content, hate_speech, spam, threat, private_information)  
      - If flagged content is found, note this in the metadata but continue with classification  
    2. Primary Classification:
      - For each relevant review, identify a specific category and subcategory that applies based on the hierarchy table posted below.  
      - Multiple classifications are not allowed. If the review touches multiple areas please select just the category that is most relevant.
    3. Evaluate Sentiment:  
      - Determine the sentiment of the review (positive, negative, neutral, mixed)  
      - If the review is negative, provide a list of key points, keywords
        - keywords must be from list of keyw

In [43]:
def retry_on_error(max_retries=5, sleep_time=5):
    """
    Handy decorator to fight the crashes in the API
    """

    def decorator_retry(func):
        def wrapper(*args, **kwargs):
            retries = 0
            while retries < max_retries:
                try:
                    result = func(*args, **kwargs)
                    return result
                except Exception as e:
                    print(f"An error occurred: {e}")
                    retries += 1
                    time.sleep(sleep_time)
            return None

        return wrapper

    return decorator_retry

In [44]:
# --- Setup Logging ---
import logging

date_string = datetime.datetime.now().date().isoformat()
log_file_path = f"logs/{date_string}.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file_path),
        # logging.StreamHandler() # Also print to console
    ]
)

In [45]:
@retry_on_error()
def classify_topic(text, sleep_time=0.001, number=1):
    openai.api_key = OPENAI_API_KEY
    prompt = MARKDOWN_PROMPT%(text)

    client = openai.OpenAI(api_key=OPENAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4.1",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=2048,
        n=1,
        temperature=0.1,
    )

    result = json.loads(response.choices[0].message.content.strip())
    result['content'] = text
    time.sleep(sleep_time)
    logging.info(f"{number} - {result['content']}")
    return result

In [None]:
res = []

for index, row in df.iterrows():
    res.append(classify_topic(text=row.get('content'), number=index))

In [46]:
# res = Parallel(n_jobs=1, verbose=2)(delayed(classify_topic)(text=row.get('content'), number=index) for index, row in df.iterrows())

# df_result = pd.DataFrame(res)

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  37 tasks      | elapsed:   33.2s


An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29678, Requested 1430. Please try again in 2.216s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29638, Requested 1430. Please try again in 2.136s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29659, Requested 1430. Please try again in 2.178s. Visit https://platform.openai.com/acco

[Parallel(n_jobs=2)]: Done 158 tasks      | elapsed:  5.2min


An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29725, Requested 1291. Please try again in 2.032s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29726, Requested 1291. Please try again in 2.034s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29799, Requested 1291. Please try again in 2.18s. Visit https://platform.openai.com/accou

[Parallel(n_jobs=2)]: Done 361 tasks      | elapsed: 13.1min


An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29739, Requested 1261. Please try again in 2s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29751, Requested 1314. Please try again in 2.13s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}
An error occurred: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4.1 in organization org-YTnjieofFEpNu1s5z7Oh6TDq on tokens per min (TPM): Limit 30000, Used 29925, Requested 1220. Please try again in 2.29s. Visit https://platform.openai.com/account/ra

KeyboardInterrupt: 

In [None]:
# res = Parallel(n_jobs=1, verbose=2)(delayed(print)("try:\n" + text) for text in df.content)