In [1]:
from openai import OpenAI
import pandas as pd
from tqdm import tqdm
import os

In [2]:
# Load the API key from the .env file
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
data = pd.read_csv('data/case_studies_scraped.csv')

In [4]:
def get_chat_answer(instruction):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=instruction, 
    )
    response = dict(response).get('choices')[0]
    response = dict(response).get('message')
    response = dict(response).get('content')

    return response

def process_row(i, row, section, prompt):
    case_study = row['text']
    
    if len(str(case_study).split()) < 5:
        return i, ''
    
    instruction = prompt + f'\nHere is a case study text: \n{case_study}'
    messages = [{'role': 'user', 'content': instruction}]
    answer = get_chat_answer(messages)
    return i, answer

# Function to fill section in parallel using threads
def fill_section(section, prompt, row_limit=737, max_workers=4):
    global data
    prompt = prompt.replace('\n', ' ')

    if section not in data.columns:
        print('Such column does not exist')
        return

    results = []
    # Process rows sequentially instead of using concurrency
    for i, row in tqdm(data[:row_limit].iterrows(), total=min(row_limit, len(data))):
       
        i, answer = process_row(i, row, section, prompt)
        results.append((i, answer))

    for i, answer in results:
        data.at[i, section] = answer

    return data

In [5]:
# Problem Solved

# make an empty column
data['problem_solved'] = None

# write a prompt with examples of problems that the solution might solve
prompt = """Analyze the following case study from a SaaS company that provides AI chatbot/assistant solutions. 

Your task: Identify the MAIN PROBLEM that the AI solution solved for the customer.

Choose ONE from this list:
- customer support overload
- high costs of customer support
- high response time
- lack of customer support
- lack of sales process
- low CSAT score
- low conversions
- low product visibility
- low sales
- low user engagement

If the main problem isn't listed above, provide a concise 2-3 word description.

Important:
1. Respond in lowercase only
2. Use the exact wording provided
3. Focus only on the primary problem being solved.
4. DO not provide any other information than the problem description."""


# execute the function
fill_section('problem_solved', prompt)

100%|██████████| 14/14 [00:05<00:00,  2.70it/s]


Unnamed: 0,Competitor Name,Case study link,text,problem_solved
0,Floatbot,https://floatbot.ai/case-studies/utility-case-...,Utilities Company | Case Study | Floatbot New ...,customer support overload
1,Assembled,https://www.assembled.com/case-study/autodesk,Autodesk | Assembled ProductsAIÂ FORÂ SUPPORTA...,customer support overload
2,Aisera,https://content.aisera.com/case-studies/reputa...,,
3,Assembled,https://www.assembled.com/case-study/brooks-ru...,Brooks Running | Assembled ProductsAIÂ FORÂ SU...,high response time
4,Salesloft,https://www.salesloft.com/resources/case-studi...,Boosting Business Growth with AI: How Ulrich U...,customer support overload
5,Assembled,https://www.assembled.com/case-study/capital-o...,Capital on Tap | Assembled ProductsAIÂ FORÂ SU...,customer support overload
6,Assembled,https://www.assembled.com/case-study/classpass,Classpass | Assembled ProductsAIÂ FORÂ SUPPORT...,customer support overload
7,Intercom,https://www.intercom.com/customers/code-for-am...,Code for America uses conversational support t...,customer support overload
8,iAdvize,https://www.iadvize.com/en/success-story/vande...,Case Study | Consumer Electronics Retailer Boo...,low csat score
9,Assembled,https://www.assembled.com/case-study/dailypay,,


In [6]:
data.to_csv(f'data/case_studies_analyzed.csv', index = None)