In [None]:
import pandas as pd
from prisma_llm import AI
import os

In [None]:
# Load the usability cases
usability_df = pd.read_csv("./llm/usability_cases.csv")


In [None]:
ai = AI()

In [None]:
def build_subcategory_prompt(description):
    prompt = f"""
I have a set of tech support case descriptions that have already been classified as **Usability** cases.

I want to identify the specific usability-related issues customers are reporting.  
Please analyze each case carefully and classify it into **one of the following sub-categories**, or propose a new one if necessary:

- Reset and Recovery
- Configuration and Setup
- User Interface Issues
- Integration Issues
- Error Messages
- Data Export and Import
- Feature Requests
- Manual or Documentation Issues
- Instructions Clarity Problems
- Unexpected Software Behavior

**Important**:
- Carefully read the case description and select the sub-category that best fits the issue.
- If none of the above categories fit well, choose "**Other Usability Issue**" and optionally suggest a better-fitting sub-category.
- If you suggest a new sub-category, please ensure it is relevant to usability issues and not a general tech support issue.
- **Answer with only the sub-category name** (e.g., "Software User Interface Problems"). No explanations.

---

Case: \"{description}\"

Answer:
"""
    return prompt


In [None]:
def categorize_case(description):
    response = ai.chat(build_subcategory_prompt(description))
    return response

In [None]:
def subcategorize_usability_cases_in_batches(usability_df, batch_size=5000):
    total_cases = len(usability_df)
    for start in range(0, total_cases, batch_size):
        end = min(start + batch_size, total_cases)
        batch = usability_df.iloc[start:end].copy()

        # Apply the sub-categorization function to each case
        batch["sub_category"] = batch["description"].apply(categorize_case)

        # Save the batch to a CSV file
        batch.to_csv(f"./llm/subcategorized_usability_cases_{start}_{end}.csv", index=False)

        # Print progress and distribution
        print(f"✅ Sub-categorized cases from {start} to {end} and saved to second_subcategorized_usability_cases_{start}_{end}.csv")
        print(batch["sub_category"].value_counts())
        print("----------")


In [None]:
subcategorize_usability_cases_in_batches(usability_df, batch_size=2500)

In [None]:
# Merge the files into a single DataFrame
subcategorized_files = [f for f in os.listdir("./llm/") if f.startswith("subcategorized_usability_cases_")]
subcategorized_dfs = []
for file in subcategorized_files:
    df = pd.read_csv(os.path.join("./llm/", file))
    subcategorized_dfs.append(df)
subcategorized_usability_df = pd.concat(subcategorized_dfs, ignore_index=True)

# Save the final sub-categorized DataFrame to a CSV file
subcategorized_usability_df.to_csv("./llm/subcategorized_usability_cases.csv", index=False)
