In [None]:
%pip install -r "requirements_outlines.txt"

In [4]:
import transformers
import accelerate
import outlines
import json
import pandas as pd
import torch
import tqdm
import gc
import ast
from outlines import from_transformers, Generator, models
from pydantic import BaseModel, Field
from typing import List, Optional

In [4]:
'''
This is a chunk for clearing model cache if it becomes necessary to switch to another model without having to reset
'''

# Delete the model object
del model
gc.collect()

# Clear PyTorch cache on GPU
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

# This is a comment to test git


In [5]:
def print_gpu_memory():
    if torch.cuda.is_available():
        print("Cuda available")
        print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
        print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

# Call this before and after model loading
print_gpu_memory()

Cuda available
GPU memory allocated: 0.00 GB
GPU memory reserved: 0.00 GB


In [16]:
model = from_transformers(
    transformers.AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B", device_map="auto", dtype=torch.bfloat16),
    transformers.AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
)
#"meta-llama/Llama-3.2-1B"

In [3]:
'''
This is the DeepSeek 14b model, which at first glance seems to perform better than the Llama model. 
Definitely worth considering if this should be used instead.
'''

model = from_transformers(
    transformers.AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", device_map="auto", torch_dtype=torch.bfloat16),
    transformers.AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-14B")
)

`torch_dtype` is deprecated! Use `dtype` instead!
Fetching 4 files: 100%|██████████| 4/4 [00:34<00:00,  8.60s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.17s/it]


In [4]:
# Defining the pydantic class which ensures the structured output from the llm
class BlameAnalysis(BaseModel):
    text: str = Field(description="The exact original sentence being analyzed")
    blame: bool = Field(description="Whether blame is present in the sentence")
    blamee: Optional[str] = Field(
        default=None,
        description="Who or what is being blamed (must not be empty if blame=true)"
    )
    arguments: Optional[str] = Field(
        default=None,
        description="What the blamee is being blamed for - the specific negative outcome (must not be empty if blame=true)"
    )

In [24]:
# Defining the pydantic class which ensures the structured output from the llm
class BlameAnalysis(BaseModel):
    text: str = Field(description="The exact original sentence being analyzed, never generate new tokens here")
    blame: bool = Field(description="Whether blame is present in the sentence, you must be very certain in your response here")

In [5]:
text_data = pd.read_csv("/work/RuneEgeskovTrust#9638/Bachelor/Bachelor_project/annotation_data_translated_version_03_10.csv", encoding='utf-8')

In [43]:
hello = ast.literal_eval(text_data.loc[2]["da_segmented_text"])
hello[651]

''

In [6]:

paragraph_entry = {}
for i, text in enumerate(text_data["da_segmented_text"]): #check if i is sctually number


    da_segmented_sentences = ast.literal_eval(text_data.loc[i]["da_segmented_text"])

    sentece_entry = {}
    for p, sentence in enumerate(da_segmented_sentences):
        sentece_entry[p] = sentence
    
    paragraph_entry[i] = sentece_entry



In [7]:
paragraph_entry

{0: {0: 'Mødet er åbnet.',
  1: 'I henhold til grundloven er Folketinget i dag  trådt sammen til sit første møde i det nye folketingsår.',
  2: 'Som det medlem, der længst har været medlem af Tinget,  \xa0\xa0påhviler det mig, jf. Folketingets forretningsordens § 2,  stk. 3, at lede valget af Tingets Præsidium og  tingsekretærer.  \xa0\xa0\xa0\xa0\xa0Til formand for Folketinget har samtlige Tingets  medlemmer indstillet hr. Erling Olsen, Socialdemokratiet.',
  3: 'Da  der således ikke foreligger andre indstillinger, vil jeg  betragte indstillingen som vedtaget.',
  4: '(Ophold).',
  5: 'Den er  vedtaget.',
  6: 'Til næstformænd har Tingets fire største partier - ud  over det parti, som formanden tilhører - udpeget følgende  medlemmer:  \xa0\xa0\xa0\xa0\xa0Til første næstformand hr. Ivar Hansen, Venstre, til  anden næstformand hr. Henning Grove, Det Konservative  Folkeparti, til tredje næstformand fru Margrete Auken,  Socialistisk Folkeparti, og til fjerde næstformand fru  Elisabeth Arn

In [25]:
generator = Generator(model, BlameAnalysis)

In [48]:
json_out = {}

# Iterate through outer keys (0 to 36000)
for outer_key, inner_dict in tqdm.tqdm(paragraph_entry.items(), desc="LLama blame (GPU)"):
    json_out[outer_key] = {}  # Initialize nested dict for this outer key
    
    # Iterate through inner keys (0, 1, 2, ...)
    for inner_key, sentence in inner_dict.items():
        if sentence.strip() == "":
            pass
        else:
            prompt = f"""Perform blame identification on the following sentence.
            Sentence: {sentence}

            Rules:
            - Start by determining whether blame is present at all in the sentence
            - Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
            - The "text" field must be EXACTLY the sentence provided above - do not modify it

            Output your analysis in JSON format."""
            
            with torch.no_grad():
                result = generator(prompt, max_new_tokens=256, use_cache=False)
            
            result_out = BlameAnalysis.model_validate_json(result)
            
            # Store in nested structure matching input
            json_out[outer_key][inner_key] = result_out.model_dump()
            
            with open("result_blame.json", "a") as f:
                json.dump({
                    "paragraph": outer_key,
                    "sentence": inner_key,
                    "result": result_out.model_dump()
                }, f, indent=2)
                f.write("\n")

# Save complete nested structure
with open("result_blame_complete.json", "w") as f:
    json.dump(json_out, f, indent=2)

torch.cuda.empty_cache()

LLama blame (GPU):   0%|          | 0/36314 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
LLama blame (GPU):   0%|          | 1/36314 [00:04<45:48:30,  4.54s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end gener

ValidationError: 1 validation error for BlameAnalysis
  Invalid JSON: EOF while parsing a string at line 1 column 514 [type=json_invalid, input_value='{"text":"Erhvervsministe...00 - 20.00 - 21.00 - 22', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid

In [47]:
paragraph_entry[2][676]

''

In [28]:
from accelerate import Accelerator
import torch
import tqdm
import json
from itertools import islice

# 1. Initialize accelerator
accelerator = Accelerator()  # auto-detects GPUs, mixed precision, etc.
device = accelerator.device

# 2. Move your model to the accelerator
#generator = generator.to(device)  # or use pipeline(..., device_map='auto')
generator = accelerator.prepare(generator)

# 3. Helper for batching
def batch_iterable(iterable, batch_size):
    it = iter(iterable)
    while batch := list(islice(it, batch_size)):
        yield batch

# 4. Flatten input data
flat_sentences = []
for outer_key, inner_dict in paragraph_entry.items():
    for inner_key, sentence in inner_dict.items():
        flat_sentences.append((outer_key, inner_key, sentence))

# 5. Batched inference
BATCH_SIZE = 16
json_out = {}

for batch in tqdm.tqdm(batch_iterable(flat_sentences, BATCH_SIZE)):
    prompts = [
        f"""Perform blame identification on the following sentence.
        Sentence: {sentence}

        Rules:
        - Start by determining whether blame is present at all in the sentence
        - Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
        - The "text" field must be EXACTLY the sentence provided above - do not modify it

        Output your analysis in JSON format. /no_think"""
        for (_, _, sentence) in batch
    ]

    with torch.no_grad():
        # accelerator handles distributing model and data automatically
        results = []
        for p in prompts:
            result = generator(p, max_new_tokens=512, use_cache=False)
            results.append(result)


    for (outer_key, inner_key, _), result in zip(batch, results):
        result_out = BlameAnalysis.model_validate_json(result)
        json_out.setdefault(outer_key, {})[inner_key] = result_out.model_dump()

        # incremental write
        with open("result_blame.json", "a") as f:
            json.dump({
                "paragraph": outer_key,
                "sentence": inner_key,
                "result": result_out.model_dump()
            }, f, indent=2)
            f.write("\n")

# save full results
with open("result_blame_complete.json", "w") as f:
    json.dump(json_out, f, indent=2)


0it [00:00, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
0it [00:06, ?it/s]


KeyboardInterrupt: 

In [20]:
import torch
import tqdm
import json
import time
import threading
from queue import Queue
from itertools import islice
from accelerate import Accelerator

# ---------- CONFIG ----------
BATCH_SIZE = 8                      # Adjust based on GPU memory
SAVE_INTERVAL = 200                 # Write every 200 results
CHECKPOINT_INTERVAL_SEC = 300       # Also checkpoint full dict every 5 min
OUTPUT_FILE = "result_blame.json"
CHECKPOINT_FILE = "result_blame_complete.json"
# -----------------------------

# ---------- SETUP ----------
accelerator = Accelerator()
device = accelerator.device

#generator = generator.to(device)
generator = accelerator.prepare(generator)

# Flatten nested dict structure
flat_sentences = []
for outer_key, inner_dict in paragraph_entry.items():
    for inner_key, sentence in inner_dict.items():
        flat_sentences.append((outer_key, inner_key, sentence))

def batch_iterable(iterable, batch_size):
    it = iter(iterable)
    while batch := list(islice(it, batch_size)):
        yield batch

# ---------- ASYNC WRITER ----------
write_queue = Queue()

def writer_thread():
    """Background writer to handle async JSON appends."""
    with open(OUTPUT_FILE, "a") as f:
        while True:
            batch = write_queue.get()
            if batch is None:
                break
            for item in batch:
                json.dump(item, f)
                f.write("\n")
            f.flush()

writer = threading.Thread(target=writer_thread, daemon=True)
writer.start()

# ---------- MAIN LOOP ----------
json_out = {}
buffer = []
last_checkpoint_time = time.time()

for batch in tqdm.tqdm(
    batch_iterable(flat_sentences, BATCH_SIZE),
    total=len(flat_sentences) // BATCH_SIZE,
    desc="LLama blame (multi-GPU)"
):
    prompts = [
        f"""Perform blame identification on the following sentence.
        Sentence: {sentence}

        Rules:
        - Start by determining whether blame is present at all in the sentence
        - Set blame=true ONLY if someone/something is being blamed for causing a negative outcome
        - The "text" field must be EXACTLY the sentence provided above - do not modify it

        Output your analysis in JSON format. /no_think"""
        for (_, _, sentence) in batch
    ]

    # Outlines generator only supports single-string input → iterate
    results = []
    with torch.no_grad():
        for p in prompts:
            result = generator(p, max_new_tokens=256, use_cache=False)
            results.append(result)

    # Process and buffer results
    for (outer_key, inner_key, _), result in zip(batch, results):
        result_out = BlameAnalysis.model_validate_json(result)
        json_out.setdefault(outer_key, {})[inner_key] = result_out.model_dump()

        buffer.append({
            "paragraph": outer_key,
            "sentence": inner_key,
            "result": result_out.model_dump()
        })

        # Flush to async writer when buffer fills
        if len(buffer) >= SAVE_INTERVAL:
            write_queue.put(buffer.copy())
            buffer.clear()
            torch.cuda.empty_cache()

    # Periodic checkpoint of full dict
    if (time.time() - last_checkpoint_time) > CHECKPOINT_INTERVAL_SEC:
        with open(CHECKPOINT_FILE, "w") as f:
            json.dump(json_out, f, indent=2)
        last_checkpoint_time = time.time()

# ---------- FINAL FLUSH ----------
if buffer:
    write_queue.put(buffer.copy())

write_queue.put(None)  # tell writer to stop
writer.join()

# Final save of complete structure
with open(CHECKPOINT_FILE, "w") as f:
    json.dump(json_out, f, indent=2)

torch.cuda.empty_cache()
print("✅ Processing complete — results written to disk.")


LLama blame (multi-GPU):   0%|          | 0/49877 [00:00<?, ?it/s]

LLama blame (multi-GPU):   0%|          | 5/49877 [00:53<148:09:07, 10.69s/it]


ValidationError: 1 validation error for BlameAnalysis
  Invalid JSON: EOF while parsing a string at line 1 column 891 [type=json_invalid, input_value='{"text": "Rules: Rules: ...s: Rules: Rules: Rules:', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid