In [1]:
from transformers import pipeline
import pandas as pd
import torch

In [2]:
# Load the data
file_path = "..\\..\\Data\processed\Intents\ExtractedIntents_UniqueCount-10_time-20241222-1556.xlsx"
data = pd.read_excel(file_path)

# Extract the structured conversations and intents
structured_conversations = data['structured_conversations']
intents = data['intents']

In [3]:
# Load the Llama pipeline
model_id = "meta-llama/Llama-3.2-3B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [22]:
def extract_relationships(conversation_text, entities):
    """
    Extract relationships between entities and intents from structured conversation text.
    """
    # Define the system prompt for Llama
    system_prompt = """
    You are a relationship extraction model. Your task is to identify meaningful relationships between entities and intents in the provided structured conversations. 
    The relationships are represented as RDF triples (subject, predicate, object). 
    Common relationships include:
    - hasIssue: Links a product/service to an issue.
    - appliesTo: Links an issue to an intent.
    - resolvesWith: Links an issue to a resolution.
    - providedBy: Links a product/service to a provider.
    
    Provide your response in this format:
    [
        {"subject": "<entity>", "predicate": "<relationship>", "object": "<entity or intent>"}
    ]

    What you MUST do:
    - YOU HAVE TO USE OUTPUT STRUCTURE AND DO NOT ADD ANY EXTRA TEXT BEFORE OR AFTER THE OUTPUT STRUCTURE.
    - JUST ANSWER WITH OUTPUT STRUCTURE.
    """

    # Create messages for the pipeline
    messages = [
        {"role": "system", "content": system_prompt},
        {
            "role": "user",
            "content": f"""
            Here is the conversation text: '{conversation_text}'.
            Extracted entities and intents: {entities}.
            Identify relationships between these elements and provide RDF triples.
            """,
        },
    ]
    
    # Generate relationships using Llama pipeline
    output = pipe(messages, max_new_tokens=256)

    # Extract and return the generated content
    return output[0]["generated_text"][-1].get("content", "") if output else "No relationships found."


In [23]:
results = []

for index, row in data.iterrows():
    conversation_text = row['structured_conversations']
    entities = row['intents']  # Assuming 'intents' contains the extracted entities
    
    # Extract relationships
    relationships = extract_relationships(conversation_text, entities)
    
    # Store the results
    results.append({"conversation": conversation_text, "entities": entities, "relationships": relationships})
    break


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [27]:
print(results[0].get("relationships"))

[
    {"subject": "sprintcare", "predicate": "providedBy", "object": "Company_name"},
    {"subject": "sprintcare", "predicate": "hasIssue", "object": "poor service"},
    {"subject": "poor service", "predicate": "appliesTo", "object": "Complaint"},
    {"subject": "poor service", "predicate": "appliesTo", "object": "Request for Assistance"},
    {"subject": "account review", "predicate": "appliesTo", "object": "Request for Assistance"}
]


In [7]:
# # Save results to a JSON file
# import json

# output_path = "relationships_output.json"
# with open(output_path, "w") as f:
#     json.dump(results, f, indent=4)
