In [6]:
%pip install -r "requirements_outlines.txt"

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [22]:
import transformers
import accelerate
import outlines
import json
import ipywidgets
from outlines import from_transformers, Generator
from pydantic import BaseModel, Field, conlist
from typing import List, Literal, Annotated, Optional

In [10]:
model = from_transformers(
    transformers.AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", device_map="auto"),
    transformers.AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct"),
)

Fetching 4 files: 100%|██████████| 4/4 [00:27<00:00,  6.95s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.73it/s]


In [None]:
model = from_transformers(
    transformers.AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0"),
    transformers.AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0"),
)

In [18]:
# Define the structured output schema for semantic role labeling
class Blame(BaseModel):
    blame: bool = Field(description="Is blame present in the given sentence?")
    blamee: str = Field(description="The patient that is being blamed for something (Who / What is affected)")
    arguments: Annotated[
        List[str],
        Field(min_length=1, max_length=3, description="At least one argument that specifies what the blamee is being blamed for, this must be shorter than the full sentence")
    ]
    text: str = Field(description="The exact substring from the sentence that shows the blame")
    #blame: bool = Field(description="Is blame present in the given sentence?")
    #blamee: str = Field(description="The patient that is being blamed for something (Who / What is affected)")
    #arguments: List[str] = Field(description="List of arguments that the blamee is being blamed for causing")
    #text: str = Field(description="Part of the original text that identifies the blame")

#class BlamerBlamee(BaseModel):
    #blamer: str = Field(description="Blamer: The agent blaming another patient")
    #blamee: str = Field(description="Blamee: The patient that is being blamed for something. (Who / What is affected)")
    #arguments: List[Blame] = Field(description="List of arguments that the blamee is being blamed for causing")

class Blaming(BaseModel):
    sentence: str = Field(description="The original sentence")
    arguments: Annotated[
        List[Blame],
        Field(min_length=1, max_length=1, description="List of who or what is being blamed, and for what they are being blamed")
    ]
    #arguments: conlist(Blame, min_items=0, max_items=2)# = Field(description="List of who or what is being blamed, and for what they are being blamed")

# Create a structured generator
generator = Generator(model, Blaming)

# Example text for semantic role labeling
text = "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places."

# Create the prompt
prompt = f"""Perform blame identification only on the following sentence, do not generate alternate sentences. 

Sentence: {text}

Rules:
- Start by looking into whether or not blame is present at all in the sentence 
- then identify who is being blamed, what they are being blamed for, and the arguments the person blaming them are using.
- Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
- The "text" field must be EXACTLY the sentence provided above - do not modify it
- Do NOT create alternative phrasings or opposite statements
- If blame=true, "blamee" must NOT be empty and "arguments" must contain the specific outcome they are blamed for (which could be "economic recession", "increased crime rates") NEVER pass an empty token here.
- Do not leave arguments as an empty string.

The relevant semantic roles:
- Blamee: The patient receiving the blame (who or what is being blamed for something)
- Argument: What is the blamee being blamed for.

Analyze the sentence and output your result as concisely as possible in the specified JSON format."""

# Generate structured output
result = generator(prompt, max_new_tokens=512)



Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [19]:
print(result)

{"sentence": "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places.", "arguments": [ {"blame": false, "blamee": "", "arguments": [""], "text": "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places."} ]}


In [20]:
data = json.loads(result)
print(json.dumps(data, indent=2))

{
  "sentence": "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places.",
  "arguments": [
    {
      "blame": false,
      "blamee": "",
      "arguments": [
        ""
      ],
      "text": "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places."
    }
  ]
}


In [25]:
# Define the Pydantic model for structured output
class BlameAnalysis(BaseModel):
    text: str = Field(description="The exact original sentence being analyzed")
    blame: bool = Field(description="Whether blame is present in the sentence")
    blamee: Optional[str] = Field(
        default=None,
        description="Who or what is being blamed (must not be empty if blame=true)"
    )
    arguments: Optional[str] = Field(
        default=None,
        description="What the blamee is being blamed for - the specific negative outcome (must not be empty if blame=true)"
    )

# Create a structured generator
generator = Generator(model, BlameAnalysis)

# Define your prompt
sentence = "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places"

prompt = f"""Perform blame identification on the following sentence.

Sentence: {sentence}

Rules:
- Start by determining whether blame is present at all in the sentence
- Identify who is being blamed, what they are being blamed for, and the arguments used
- Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
- The "text" field must be EXACTLY the sentence provided above - do not modify it
- If blame=true, "blamee" must NOT be empty and "arguments" must contain the specific outcome they are blamed for
- Do not leave arguments as an empty string

Semantic roles:
- Blamee: The patient receiving the blame (who or what is being blamed)
- Argument: What is the blamee being blamed for (the negative outcome)

Output your analysis in JSON format."""

# Generate structured output
result = generator(prompt, max_new_tokens=256)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [27]:
# Define the Pydantic model for structured output
class BlameAnalysis(BaseModel):
    text: str = Field(description="The exact original sentence being analyzed")
    blame: bool = Field(description="Whether blame is present in the sentence")
    blamee: Optional[str] = Field(
        default=None,
        description="Who or what is being blamed (must not be empty if blame=true)"
    )
    arguments: Optional[str] = Field(
        default=None,
        description="What the blamee is being blamed for - the specific negative outcome (must not be empty if blame=true)"
    )

In [33]:
prompt = f"""Perform blame identification on the following sentence.

Sentence: {current_sent}

Rules:
- Start by determining whether blame is present at all in the sentence
- Identify who is being blamed, what they are being blamed for, and the arguments used
- Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
- The "text" field must be EXACTLY the sentence provided above - do not modify it
- If blame=true, "blamee" must NOT be empty and "arguments" must contain the specific outcome they are blamed for
- Do not leave arguments as an empty string

Semantic roles:
- Blamee: The patient receiving the blame (who or what is being blamed)
- Argument: What is the blamee being blamed for (the negative outcome)

Output your analysis in JSON format."""

NameError: name 'current_sent' is not defined

In [29]:
sentences = [
    # Blame
    "The government is responsible for the rising cost of healthcare.",
    "Corruption among politicians has led to the misuse of public funds.",
    "Corporate lobbying is to blame for weak environmental policies.",
    "The ruling party has failed to address income inequality.",
    "Ineffective leadership caused the crisis in public housing.",

    # No blame
    "Healthcare costs have been increasing steadily over the past decade.",
    "Income inequality continues to be a pressing social issue.",
    "Public housing availability has declined in recent years.",
    "Environmental policies remain a major topic of debate.",
    "The distribution of public funds is a central issue in current politics."
]


In [31]:
generator = Generator(model, BlameAnalysis)

In [37]:
for sentence in sentences:
    prompt = f"""Perform blame identification on the following sentence.
    Sentence: {sentence}

    Rules:
    - Start by determining whether blame is present at all in the sentence
    - Identify who is being blamed, what they are being blamed for, and the arguments used
    - Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
    - The "text" field must be EXACTLY the sentence provided above - do not modify it
    - If blame=true, "blamee" must NOT be empty and "arguments" must contain the specific outcome they are blamed for
    - Do not leave arguments as an empty string

    Semantic roles:
    - Blamee: The patient receiving the blame (who or what is being blamed)
    - Argument: What is the blamee being blamed for (the negative outcome)

    Output your analysis in JSON format."""
    result = generator(prompt, max_new_tokens=128)
    data = json.loads(result)
    #print(json.dumps(data, indent=2))
    # Parsing json for saving
    result_out = BlameAnalysis.model_validate_json(result)
    # Appending to file to support multiple runs
    with open("result_blame.json", "a") as f:
       json.dump(result_out.model_dump(), f, indent=2)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [26]:
data = json.loads(result)
print(json.dumps(data, indent=2))

{
  "text": "The immigrants are causing the loss of danish culture, but they are also very relevant when looking at increased turkish pizza places",
  "blame": true,
  "blamee": "immigrants",
  "arguments": "loss of danish culture"
}


In [None]:
result_out = Blaming.model_validate_json(result)

In [None]:
# Saving to file chunk
with open("srl_result_blame.json", "w") as f:
       json.dump(result_out.model_dump(), f, indent=2)

In [25]:
print(result_out)

sentence='John ate an apple in the park yesterday' predicates=[Predicate(predicate='ARGM', arguments=[Argument(role='ARGM_LOC', text='in'), Argument(role='ARGM_MNR', text='at'), Argument(role='ARGM_PRP', text='the'), Argument(role='ARGM_LOC', text='park'), Argument(role='ARGM_MNR', text='in'), Argument(role='ARGM_PRP', text='the'), Argument(role='ARGM_LOC', text='last'), Argument(role='ARGM_MNR', text='night')])]


In [None]:
# Define the structured output schema for semantic role labeling
class Argument(BaseModel):
    role: str = Field(description="Semantic role (e.g., ARG0/Agent, ARG1/Patient, ARGM-TMP/Time, ARGM-LOC/Location)")
    text: str = Field(description="The text span filling this role")

class Predicate(BaseModel):
    predicate: str = Field(description="The main predicate/verb")
    agent: str = Field(description="ARG0: The entity performing the action (who/what does it)")
    patient: str = Field(description="ARG1: The entity affected by the action (who/what is affected)", default="")
    arguments: List[Argument] = Field(description="List of semantic arguments for this predicate")

class SemanticRoleLabeling(BaseModel):
    sentence: str = Field(description="The original sentence")
    predicates: List[Predicate] = Field(description="List of predicates with their semantic roles")

# Create a structured generator
generator = Generator(model, SemanticRoleLabeling)

# Example text for semantic role labeling
text = "John ate an apple in the park yesterday."

# Create the prompt
prompt = f"""Perform semantic role labeling on the following sentence. Identify all predicates and their semantic arguments.

Sentence: {text}

Common semantic roles:
- ARG0: Agent (who performs the action)
- ARG1: Patient/Theme (what is affected by the action)
- ARG2: Recipient, Beneficiary, or Instrument
- ARGM-TMP: Temporal (when)
- ARGM-LOC: Location (where)
- ARGM-MNR: Manner (how)
- ARGM-PRP: Purpose (why)

Analyze the sentence and output the semantic role labeling in the specified JSON format."""

# Generate structured output
result = generator(prompt, max_new_tokens=2048)

'''
# Print the results
print("Original sentence:", result.sentence)
print("\nSemantic Role Labeling:")
for pred in result.predicates:
    print(f"\nPredicate: {pred.predicate}")
    for arg in pred.arguments:
        print(f"  {arg.role}: {arg.text}")

'''