In [7]:
## IMPORTS
import openai
import pandas as pd
import numpy as np
import json
from prompts import PRODUCT_PROMPT, ISSUE_TYPE_PROMPT, SERVICES_PROMPT, RELATIONSHIP_PROMPT, RESOLUTION_COMPLETION_PROMPT
import os 
from dotenv import load_dotenv
from tqdm import tqdm

load_dotenv()

# Load environment variables from .env file
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Set OpenAI API key
client = openai.OpenAI(api_key=OPENAI_API_KEY)

In [8]:
data_name = "twcs_structured_UniqueCount-4000_time-20250420-1907"

In [9]:
data = pd.read_excel(f'..\\..\\data\processed\sample\\{data_name}.xlsx')

In [10]:
## Product Extraction
def extract_product(text):

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": PRODUCT_PROMPT},
            {"role": "user", "content": text}
        ],
        temperature=0,
        top_p=0.95
    )

    return response.choices[0].message.content

## Issue Type Extraction
def extract_issue_type(text):

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": ISSUE_TYPE_PROMPT},
            {"role": "user", "content": text}
        ],
        temperature=0,
        top_p=0.95
    )

    return response.choices[0].message.content

## Services Extraction
def extract_services(text):

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": SERVICES_PROMPT},
            {"role": "user", "content": text}
        ],
        temperature=0,
        top_p=0.95
    )

    return response.choices[0].message.content

## Relationship Extraction
def extract_relationship(conversation_text, entities):
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": RELATIONSHIP_PROMPT},
            {"role": "user", "content": f"""
                Here is the conversation text: \n'{conversation_text}'.
                Extracted entities: \n{entities}.
                Identify relationships between these elements and provide RDF triples.
                """}
        ],
        temperature=0,
        top_p=0.95
    )

    return response.choices[0].message.content

def resolution_completion(conversation_text, relationship_text):

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": RESOLUTION_COMPLETION_PROMPT},
            {"role": "user", "content": f"""
                Here is the conversation text: \n'{conversation_text}'.
                Triples:: \n{relationship_text}.
                """}
        ],
        temperature=0,
        top_p=0.95
    )

    return response.choices[0].message.content

In [11]:
def safe_json_load(value):
    if pd.isna(value):  # catches NaN, None, etc.
        return {}
    if isinstance(value, str) and value.strip():
        try:
            return json.loads(value)
        except json.JSONDecodeError:
            return {}
    elif isinstance(value, dict):
        return value
    return {}

def process_dataframe(df):
    processed_data = []

    for _, row in df.iterrows():
        product_data = safe_json_load(row.get("Product", ""))
        service_data = safe_json_load(row.get("Services", ""))
        issue_data = safe_json_load(row.get("Issue Type", ""))

        entities = {
            "products": product_data.get("product", []) or [],
            "services": service_data.get("service", []) or [],
            "issue_types": issue_data.get("issue_type", []) or []
        }

        # ✨ Replace any np.nan with None (JSON safe)
        entities_clean = json.loads(json.dumps(entities, allow_nan=False))

        processed_data.append({
            "entities": json.dumps(entities_clean)
        })

    return pd.DataFrame(processed_data)


In [12]:

# Apply the function to the DataFrame with progress tracking
tqdm.pandas(desc="Processing structured conversations")

# Extract issue types, products, and services
data['Issue Type'] = data['structured_conversations'].progress_apply(extract_issue_type)
data['Product'] = data['structured_conversations'].progress_apply(extract_product)
data['Services'] = data['structured_conversations'].progress_apply(extract_services)

# Extract entities from the DataFrame
processed_df = process_dataframe(data)

# Merge the processed DataFrame with the original DataFrame
data = pd.concat([data, processed_df], axis=1)

# Extract relationships with progress tracking
data['relationship'] = data.progress_apply(
    lambda row: extract_relationship(row['cleaned_conversations'], row['entities']), axis=1
)

# Extract resolution completion with progress tracking
data['resolution'] = data.progress_apply(
    lambda row: resolution_completion(row['cleaned_conversations'], row['relationship']), axis=1
)

Processing structured conversations: 100%|██████████| 5041/5041 [1:09:41<00:00,  1.21it/s]
Processing structured conversations: 100%|██████████| 5041/5041 [1:09:59<00:00,  1.20it/s]
Processing structured conversations: 100%|██████████| 5041/5041 [1:01:39<00:00,  1.36it/s]
Processing structured conversations:  35%|███▌      | 1788/5041 [1:07:58<2:03:39,  2.28s/it]


KeyboardInterrupt: 

In [14]:
data.to_excel(f'..\\..\\data\processed\extraction_output\\{data_name}_extracted.xlsx', index=False)

In [13]:
data

Unnamed: 0,user_id,conversations,company_name,cleaned_conversations,structured_conversations,Issue Type,Product,Services,entities
0,268784,Customer: @SpotifyCares hi I’ve just received ...,SpotifyCares,Customer hi Ive just received this offer from ...,"[{'Company_name': 'SpotifyCares'}, {'conversat...","{\n ""Company_name"": ""SpotifyCares"",\n ""issue...","{\n ""Company_name"": ""SpotifyCares"",\n ""produ...","{\n ""Company_name"": ""SpotifyCares"",\n ""servi...","{""products"": [], ""services"": [], ""issue_types""..."
1,268784,Customer: @British_Airways flew to Heathrow th...,British_Airways,Customer flew to Heathrow this morning and tho...,"[{'Company_name': 'British_Airways'}, {'conver...","{\n ""Company_name"": ""British_Airways"",\n ""is...","{\n ""Company_name"": ""British_Airways"",\n ""pr...","{\n ""Company_name"": ""British_Airways"",\n ""se...","{""products"": [], ""services"": [], ""issue_types""..."
2,526050,Customer: @116035 can bill pay be set up to pa...,BofA_Help,Customer can bill pay be set up to pay the ful...,"[{'Company_name': 'BofA_Help'}, {'conversation...","{\n ""Company_name"": ""BofA_Help"",\n ""issue_ty...","{\n ""Company_name"": ""BofA_Help"",\n ""product""...","{\n ""Company_name"": ""BofA_Help"",\n ""service""...","{""products"": [""bill pay"", ""credit card""], ""ser..."
3,526050,Customer: @116035 can bill pay be set up to pa...,BofA_Help,Customer can bill pay be set up to pay the ful...,"[{'Company_name': 'BofA_Help'}, {'conversation...","{\n ""Company_name"": ""BofA_Help"",\n ""issue_ty...","{\n ""Company_name"": ""BofA_Help"",\n ""product""...","{\n ""Company_name"": ""BofA_Help"",\n ""service""...","{""products"": [""bill pay"", ""credit card""], ""ser..."
4,526050,Customer: @116035 can bill pay be set up to pa...,BofA_Help,Customer can bill pay be set up to pay the ful...,"[{'Company_name': 'BofA_Help'}, {'conversation...","{\n ""Company_name"": ""BofA_Help"",\n ""issue_ty...","{\n ""Company_name"": ""BofA_Help"",\n ""product""...","{\n ""Company_name"": ""BofA_Help"",\n ""service""...","{""products"": [""bill pay"", ""credit card""], ""ser..."
...,...,...,...,...,...,...,...,...,...
5036,755527,Customer: @115858 whyyyyy is the “I️” messed u...,AppleSupport,Customer whyyyyy is the I messed up every time...,"[{'Company_name': 'AppleSupport'}, {'conversat...","{\n ""Company_name"": ""AppleSupport"",\n ""issue...","{\n ""Company_name"": ""AppleSupport"",\n ""produ...","{\n ""Company_name"": ""AppleSupport"",\n ""servi...","{""products"": [], ""services"": [""iOS""], ""issue_t..."
5037,663177,"Customer: Dear @115821 , \n\nYou told me my pa...",AmazonHelp,Customer Dear \n\nYou told me my package would...,"[{'Company_name': 'AmazonHelp'}, {'conversatio...","{\n ""Company_name"": ""AmazonHelp"",\n ""issue_t...","{\n ""Company_name"": ""AmazonHelp"",\n ""product...","{\n ""Company_name"": ""AmazonHelp"",\n ""service...","{""products"": [""AmazonPrime""], ""services"": [""Am..."
5038,559872,Customer: @AppleSupport my phone keeps freezin...,AppleSupport,Customer my phone keeps freezing with newest i...,"[{'Company_name': 'AppleSupport'}, {'conversat...","{\n ""Company_name"": ""AppleSupport"",\n ""issue...","{\n ""Company_name"": ""AppleSupport"",\n ""produ...","{\n ""Company_name"": ""AppleSupport"",\n ""servi...","{""products"": [""phone"", ""iOS update""], ""service..."
5039,273726,Customer: @131742 wer can I find a code for tr...,askvisa,Customer wer can I find a code for transap\nCo...,"[{'Company_name': 'askvisa'}, {'conversation':...","{\n ""Company_name"": ""askvisa"",\n ""issue_type...","{\n ""Company_name"": ""askvisa"",\n ""product"": ...","{\n ""Company_name"": ""askvisa"",\n ""service"": ...","{""products"": [], ""services"": [], ""issue_types""..."
