In [1]:
import openai
import json
from pydantic import BaseModel, ValidationError
from typing import List, Literal, Optional
from getpass import getpass

In [2]:
api_key = getpass("🔐 Enter your OpenAI API key: ")
client = openai.OpenAI(api_key=api_key)

🔐 Enter your OpenAI API key: ··········


In [10]:
# --- Pydantic Models ---

class Claim(BaseModel):
    text: str

class Premise(BaseModel):
    text: str

class LinkedPremise(BaseModel):
    premise_text: str
    linked_claim_text: Optional[str] #can be none

class LinkingOutput(BaseModel):
    linked_premises: List[LinkedPremise]

In [13]:
# --- Function to Link Premises to Claims ---

def link_premises_to_claims(claims: List[Claim], premises: List[Premise]) -> LinkingOutput:
    system_prompt = """
You are an expert in argument mining. Your job is to link PREMISES to the CLAIMS they support or attack.

Instructions:
- You will receive a list of CLAIMS and a list of PREMISES.
- Each premise belongs to at least one claim.
- linked_claim_text (must match a claim or be null if no match)
- Return a valid JSON response in this format:

{
  "linked_premises": [
    {
      "premise_text": "...",
      "linked_claim_text": "..."
    }
  ]
}
Only return the JSON. Do not include explanations.
"""

    # Build the user prompt
    user_prompt = "CLAIMS:\n"
    for claim in claims:
        user_prompt += f"- {claim.text}\n"

    user_prompt += "\nPREMISES:\n"
    for premise in premises:
        user_prompt += f"- {premise.text}\n"

    # Call OpenAI with the new SDK style
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt.strip()},
            {"role": "user", "content": user_prompt.strip()}
        ],
        temperature=0.2
    )

    raw_output = response.choices[0].message.content

    try:
        data = json.loads(raw_output)
        return LinkingOutput(**data)
    except (json.JSONDecodeError, ValidationError) as e:
        print("❌ Failed to parse or validate OpenAI output.")
        print("Raw Output:\n", raw_output)
        raise e

In [14]:
# --- Example Usage ---

if __name__ == "__main__":
    claims_data = [
    {
      "text": "Remote work improves employee productivity."
    },
    {
      "text": "EVs have zero tailpipe emissions."
    },
    {
      "text": "AI-generated images mimic style but lack original thought."
    },
    {
      "text": "3D-printed organs with the help of AI could reduce transplant wait times."
    }]
    premises_data = [
    {
      "text": "People working remotely often report fewer distractions and better focus."
    },
    {
      "text": "Most EVs are charged with electricity generated from fossil fuels."
    }
  ]

    # Create Pydantic model instances from the data
    claims = [Claim(**c) for c in claims_data]
    premises = [Premise(**p) for p in premises_data] # Removed stance="pro"

    result = link_premises_to_claims(claims, premises)
    print(result.model_dump_json(indent=2))

{
  "linked_premises": [
    {
      "premise_text": "People working remotely often report fewer distractions and better focus.",
      "linked_claim_text": "Remote work improves employee productivity."
    },
    {
      "premise_text": "Most EVs are charged with electricity generated from fossil fuels.",
      "linked_claim_text": "EVs have zero tailpipe emissions."
    }
  ]
}


In [15]:
from google.colab import files
import json

# Save the linked output
with open("output.json", "w") as f:
    json.dump(result.model_dump(), f, indent=2)

# Download to local machine
files.download("output.json")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>