source: https://medium.com/@lucafiaschi/turning-customers-feedback-into-action-an-llm-blueprint-for-app-review-analysis-7f5d39d08f6e

In [1]:
import json
import pandas as pd
import numpy as np

from joblib import Parallel, delayed
import datetime
import time

import openai
import os
from google import genai

from yaml_helper import YamlParser

In [136]:
df = pd.read_csv("data/headhway_review_rating.csv")

In [138]:
df.loc[df.rating < 4]

In [3]:
df.loc[df.rating < 4].to_csv("data/headhway_review_rating_sample.csv", index=False)

In [4]:
df = df.loc[df.rating < 4].reset_index(drop=True)

In [103]:
df = pd.read_csv("data/bad_df.csv")

In [104]:
creds = YamlParser("creds/openai.yml").read()
OPENAI_API_KEY = creds["OPENAI_API_KEY"]

GEMINI_API_KEY = YamlParser("creds/gemini.yml").read()["GEMINI_KEY"]

In [105]:
try:
    with open("prompts/message_sentiments_prompt.md", 'r', encoding='utf-8') as file:
        MARKDOWN_PROMPT = file.read()
        # Now the 'markdown_content' variable holds the entire content of the file.
        # You can now print it, process it, etc.
        # For example:
        # print(markdown_content)
except FileNotFoundError:
    print("Error: The file could not be found.")
except Exception as e:
    print(f"An error occurred: {e}")

print(MARKDOWN_PROMPT)

In [43]:
def retry_on_error(max_retries=5, sleep_time=5):
    """
    Handy decorator to fight the crashes in the API
    """

    def decorator_retry(func):
        def wrapper(*args, **kwargs):
            retries = 0
            while retries < max_retries:
                try:
                    result = func(*args, **kwargs)
                    return result
                except Exception as e:
                    print(f"An error occurred: {e}")
                    retries += 1
                    time.sleep(sleep_time)
            return None

        return wrapper

    return decorator_retry

In [44]:
# --- Setup Logging ---
import logging

date_string = datetime.datetime.now().date().isoformat()
log_file_path = f"logs/{date_string}.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file_path),
        # logging.StreamHandler() # Also print to console
    ]
)

In [45]:
openai.api_key = OPENAI_API_KEY
client = openai.OpenAI(api_key=OPENAI_API_KEY)

# client = openai.OpenAI(
#     api_key=GEMINI_API_KEY,
#     base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
# )
@retry_on_error()
def classify_topic(text, sleep_time=0.001, number=1):
    openai.api_key = OPENAI_API_KEY
    prompt = MARKDOWN_PROMPT%(text)
    response = client.chat.completions.create(
        model="gpt-4.1",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=2048,
        n=1,
        temperature=0.1,
    )

    result = json.loads(response.choices[0].message.content.strip())
    result['content'] = text
    time.sleep(sleep_time)
    logging.info(f"{number} - {result['content']}")
    return result

In [None]:
res = []

for index, row in df.iterrows():
    res.append(classify_topic(text=row.get('content'), number=index))

In [46]:
# res = Parallel(n_jobs=1, verbose=2)(delayed(classify_topic)(text=row.get('content'), number=index) for index, row in df.iterrows())

# df_result = pd.DataFrame(res)

In [None]:
# res = Parallel(n_jobs=1, verbose=2)(delayed(print)("try:\n" + text) for text in df.content)