In [1]:
%pip install -r "requirements_outlines.txt"

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import transformers
import accelerate
import outlines
import json
import ipywidgets
from outlines import from_transformers, Generator
from pydantic import BaseModel, Field, conlist
from typing import List, Literal, Annotated, Optional

In [6]:
model = from_transformers(
    transformers.AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", device_map="auto"),
    transformers.AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct"),
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
import torch, gc

# Delete the model object
del model
gc.collect()

# Clear PyTorch cache on GPU
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()


In [7]:
# Defining the pydantic class which ensures the structured output from the llm
class BlameAnalysis(BaseModel):
    text: str = Field(description="The exact original sentence being analyzed")
    blame: bool = Field(description="Whether blame is present in the sentence")
    blamee: Optional[str] = Field(
        default=None,
        description="Who or what is being blamed (must not be empty if blame=true)"
    )
    arguments: Optional[str] = Field(
        default=None,
        description="What the blamee is being blamed for - the specific negative outcome (must not be empty if blame=true)"
    )

In [8]:
sentences = [
    # Blame
    "The government is responsible for the rising cost of healthcare.",
    "Corruption among politicians has led to the misuse of public funds.",
    "Corporate lobbying is to blame for weak environmental policies.",
    "The ruling party has failed to address income inequality.",
    "Ineffective leadership caused the crisis in public housing.",

    # No blame
    "Healthcare costs have been increasing steadily over the past decade.",
    "Income inequality continues to be a pressing social issue.",
    "Public housing availability has declined in recent years.",
    "Environmental policies remain a major topic of debate.",
    "The distribution of public funds is a central issue in current politics."
]


In [9]:
generator = Generator(model, BlameAnalysis)

In [10]:
for sentence in sentences:
    prompt = f"""Perform blame identification on the following sentence.
    Sentence: {sentence}

    Rules:
    - Start by determining whether blame is present at all in the sentence
    - Identify who is being blamed, what they are being blamed for, and the arguments used
    - Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
    - The "text" field must be EXACTLY the sentence provided above - do not modify it
    - If blame=true, "blamee" must NOT be empty and "arguments" must contain the specific outcome they are blamed for
    - Do not leave arguments as an empty string

    Semantic roles:
    - Blamee: The patient receiving the blame (who or what is being blamed)
    - Argument: What is the blamee being blamed for (the negative outcome)

    Output your analysis in JSON format."""
    result = generator(prompt, max_new_tokens=128)
    data = json.loads(result)
    #print(json.dumps(data, indent=2))
    # Parsing json for saving
    result_out = BlameAnalysis.model_validate_json(result)
    # (Over)Writing to file to avoid duplicates
    with open("result_blame.json", "w") as f:
       json.dump(result_out.model_dump(), f, indent=2)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


KeyboardInterrupt: 

In [None]:
data = json.loads(result)
print(json.dumps(data, indent=2))

In [None]:
# Define the structured output schema for semantic role labeling
class Argument(BaseModel):
    role: str = Field(description="Semantic role (e.g., ARG0/Agent, ARG1/Patient, ARGM-TMP/Time, ARGM-LOC/Location)")
    text: str = Field(description="The text span filling this role")

class Predicate(BaseModel):
    predicate: str = Field(description="The main predicate/verb")
    agent: str = Field(description="ARG0: The entity performing the action (who/what does it)")
    patient: str = Field(description="ARG1: The entity affected by the action (who/what is affected)", default="")
    arguments: List[Argument] = Field(description="List of semantic arguments for this predicate")

class SemanticRoleLabeling(BaseModel):
    sentence: str = Field(description="The original sentence")
    predicates: List[Predicate] = Field(description="List of predicates with their semantic roles")

# Create a structured generator
generator = Generator(model, SemanticRoleLabeling)

# Example text for semantic role labeling
text = "John ate an apple in the park yesterday."

# Create the prompt
prompt = f"""Perform semantic role labeling on the following sentence. Identify all predicates and their semantic arguments.

Sentence: {text}

Common semantic roles:
- ARG0: Agent (who performs the action)
- ARG1: Patient/Theme (what is affected by the action)
- ARG2: Recipient, Beneficiary, or Instrument
- ARGM-TMP: Temporal (when)
- ARGM-LOC: Location (where)
- ARGM-MNR: Manner (how)
- ARGM-PRP: Purpose (why)

Analyze the sentence and output the semantic role labeling in the specified JSON format."""

# Generate structured output
result = generator(prompt, max_new_tokens=2048)

'''
# Print the results
print("Original sentence:", result.sentence)
print("\nSemantic Role Labeling:")
for pred in result.predicates:
    print(f"\nPredicate: {pred.predicate}")
    for arg in pred.arguments:
        print(f"  {arg.role}: {arg.text}")

'''