In [1]:
from processing.adversarial_llm_ner import AdversarialLLMNER
import json
from processing.mistral_processor import MistralProvider
from processing.ollama_processor import LlamaProvider

text = "In 2020, Sarah Mitchell, the CEO of GlobalTech Innovations, announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the COVID-19 pandemic on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the marketing and sales departments."

# Single-model LLM NER

In [25]:
mistral_provider = MistralProvider()
result = mistral_provider.run(text, None)
masked_text = result["masked_text"]
mapping = result["mapping"]
print("Masked Text:\n", result["masked_text"])
print("\nMapping:")
for tag, value in result["mapping"].items():
    print(f"{value}: {tag}")

Masked Text:
 In 2020, [PER_1] Mitchell, the CEO of GlobalTech Innovations, announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the COVID-19 pandemic on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the marketing and sales departments.

Mapping:
"Sarah Mitchell": "PER_1"


In [12]:
ollama_provider = LlamaProvider()

result = ollama_provider.run(text)
print("MASKED TEXT")
masked_text = result["masked_text"]
print(masked_text)
mappings = result["mapping"]
print("MAPPINGS")
print(mappings)

MASKED TEXT
In [DATE_1], [PER_1], the CEO of [ORG_1], announced a major restructuring within the [ORG_1]. This decision, made after months of deliberations, aimed at addressing the impact of the [EVENT_1] on the [ORG_1]. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the [PRODUCT].
MAPPINGS
{'2020': 'DATE_1', 'Sarah Mitchell': 'PER_1', 'GlobalTech Innovations': 'ORG_1', 'COVID-19 pandemic': 'EVENT_1'}


# Adversarial LLM NER

### Example when prompt was structured as "You are a PII attacker..."

In [2]:
adversarial = AdversarialLLMNER(model_anonymizer="llama3.2", model_adversarial="llama3.2", max_steps=3)
result = adversarial.refine_anonymization(text)
print("Best anonymization result:")
print(json.dumps(result, indent=2, ensure_ascii=False))


--- Step 1 ---
trial 0
Anonymized: {'anonymized_text': 'In 2020, Sarah Mitchell, the CEO of GlobalTech Innovations, announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the COVID-19 pandemic on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the marketing and sales departments.None', 'mapping': {}}
Reidentified guess: I cannot provide information that could be used to identify or locate an individual. Is there anything else I can help you with?
Overlap Score: 0
Best anonymization result:
{
  "step": 1,
  "anonymized_text": "In 2020, Sarah Mitchell, the CEO of GlobalTech Innovations, announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the COVID-19 pandemic on the business. While some empl

## LLama + LLama

In [2]:
adversarial = AdversarialLLMNER(model_anonymizer="llama3.5", model_adversarial="llama3.5", max_steps=3)
result = adversarial.refine_anonymization(text)
print("Best anonymization result:")
print(json.dumps(result, indent=2, ensure_ascii=False))


--- Step 1 ---
trial 0
====
RESULT
{'anonymized_text': '[PER_1], the CEO of [ORG_1], announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of [EVENT_1] on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the [LOC_1], [LOC_2].', 'mapping': {'Sarah Mitchell': 'PER_1', 'GlobalTech Innovations': 'ORG_1', 'COVID-19 pandemic': 'EVENT_1'}}
====
Anonymized: {'anonymized_text': '[PER_1], the CEO of [ORG_1], announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of [EVENT_1] on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the [LOC_1], [LOC_2].', 'mapping': {'Sarah Mitchell'

In [4]:
result['mapping']

{'Sarah Mitchell': 'PER_1',
 'GlobalTech Innovations': 'ORG_1',
 'CEO': '[TITLE]',
 '2020': 'DATE_1',
 'COVID-19 pandemic': 'EVENT_1'}

## LLama + Mistral

In [5]:
adversarial = AdversarialLLMNER(model_anonymizer="llama3.5", model_adversarial="mistral", max_steps=3)
result = adversarial.refine_anonymization(text)
print("Best anonymization result:")
print(json.dumps(result, indent=2, ensure_ascii=False))


--- Step 1 ---
trial 0
====
RESULT
{'anonymized_text': '[PER_1] announced a major [EVENT] within the [ORG_1]. This decision, made after months of deliberations, aimed at addressing the impact of the [LOC_2] on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The [ EVENT ] plan was executed in stages and primarily affected the [ PRODUCT ] and [ PRODUCT ] departments.', 'mapping': {'2020': 'DATE_1', 'Sarah Mitchell': 'PER_1', 'GlobalTech Innovations': 'ORG_1', 'COVID-19 pandemic': '[LOC_2]', 'job cuts': '[EVENT]', 'months of deliberations': '[EVENT]', 'long-term survival': '[EVENT]', 'stages': '[EVENT]', 'marketing': '[PRODUCT]', 'sales': '[PRODUCT]'}}
====
Anonymized: {'anonymized_text': '[PER_1] announced a major [EVENT] within the [ORG_1]. This decision, made after months of deliberations, aimed at addressing the impact of the [LOC_2] on the business. While some employees feared job cuts, others praised the move as neces

In [6]:
result['mapping']

{'2020': 'DATE_1',
 'Sarah Mitchell': 'PER_1',
 'GlobalTech Innovations': 'ORG_1',
 'COVID-19 pandemic': '[LOC_2]',
 'job cuts': '[EVENT]',
 'months of deliberations': '[EVENT]',
 'long-term survival': '[EVENT]',
 'stages': '[EVENT]',
 'marketing': '[PRODUCT]',
 'sales': '[PRODUCT]'}

## Mistral + Llama

In [7]:
adversarial = AdversarialLLMNER(model_anonymizer="mistral", model_adversarial="llama3.5", max_steps=3)
result = adversarial.refine_anonymization(text)
print("Best anonymization result:")
print(json.dumps(result, indent=2, ensure_ascii=False))


--- Step 1 ---
trial 0
====
RESULT
{'anonymized_text': 'In 2020, Sarah Mitchell, the CEO of GlobalTech Innovations, announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the COVID-19 pandemic on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the marketing and sales departments.', 'mapping': {}}
====
Anonymized: {'anonymized_text': 'In 2020, Sarah Mitchell, the CEO of GlobalTech Innovations, announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the COVID-19 pandemic on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the marketing and sales departments.', 'map

In [8]:
result["mapping"]

{'Sarah Mitchell': 'PER_1',
 'GlobalTech Innovations': 'ORG_1',
 '2020': 'DATE_1'}

In [10]:
adversarial = AdversarialLLMNER(model_anonymizer="mistral", model_adversarial="mistral", max_steps=3)
result = adversarial.refine_anonymization(text)
print("Best anonymization result:")
print(json.dumps(result, indent=2, ensure_ascii=False))


--- Step 1 ---
trial 0
====
RESULT
{'anonymized_text': 'In [DATE_1], [PER_1], the CEO of [ORG_1], announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the [EVENT_1] on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructuring plan was executed in stages and primarily affected the [LOC_1] and [LOC_2] departments.', 'mapping': {'2020': 'DATE_1', 'Sarah Mitchell': 'PER_1', 'GlobalTech Innovations': 'ORG_1', 'COVID-19 pandemic': 'EVENT_1', '[LOC_1]': 'LOC_1', '[LOC_2]': 'LOC_2'}}
====
Anonymized: {'anonymized_text': 'In [DATE_1], [PER_1], the CEO of [ORG_1], announced a major restructuring within the company. This decision, made after months of deliberations, aimed at addressing the impact of the [EVENT_1] on the business. While some employees feared job cuts, others praised the move as necessary for long-term survival. The restructur

In [11]:
result["mapping"]

{'2020': 'DATE_1',
 'Sarah Mitchell': 'PER_2',
 'GlobalTech Innovations': 'ORG_1',
 'COVID-19 pandemic': 'EVENT_1'}

# Summary and visualization

In [26]:
text = "In 2020, Sarah Mitchell, the CEO of GlobalTech Innovations, \
    announced a major restructuring within the company. \
    This decision, made after months of deliberations, \
    aimed at addressing the impact of the COVID-19 pandemic on the business.\
    While some employees feared job cuts, others praised the move as necessary \
    for long-term survival. The restructuring plan was executed in stages and \
    primarily affected the marketing and sales departments."

llama = {'2020': 'DATE_1', 
'Sarah Mitchell': 'PER_1', 
'GlobalTech Innovations': 'ORG_1', 
'COVID-19 pandemic': 'EVENT_1'}

mistral = {"Sarah Mitchell": "PER_1"}

llama_llama = {'Sarah Mitchell': 'PER_1',
 'GlobalTech Innovations': 'ORG_1',
 'CEO': '[TITLE]',
 '2020': 'DATE_1',
 'COVID-19 pandemic': 'EVENT_1'}

llama_mistral = {'2020': 'DATE_1',
 'Sarah Mitchell': 'PER_1',
 'GlobalTech Innovations': 'ORG_1',
 'COVID-19 pandemic': '[LOC_2]',
 'job cuts': '[EVENT]',
 'months of deliberations': '[EVENT]',
 'long-term survival': '[EVENT]',
 'stages': '[EVENT]',
 'marketing': '[PRODUCT]',
 'sales': '[PRODUCT]'}

mistral_llama = {'Sarah Mitchell': 'PER_1',
 'GlobalTech Innovations': 'ORG_1',
 '2020': 'DATE_1'}

mistral_mistral = {'2020': 'DATE_1',
 'Sarah Mitchell': 'PER_2',
 'GlobalTech Innovations': 'ORG_1',
 'COVID-19 pandemic': 'EVENT_1'}

list_of_experiments = [llama, mistral, llama_llama, llama_mistral, mistral_llama, mistral_mistral]
list_of_names = ["LLaMA", "Mistral", "LLaMA & LLaMA", "LLaMA & MISTRAL", "MISTRAL & LLaMA", "MISTRAL & MISTRAL"]



In [27]:
import re
from IPython.display import HTML, display

def create_html_with_entities(text, entity_mappings):
    sorted_entities = sorted(entity_mappings.items(), key=lambda x: len(x[0]), reverse=True)
    def replace_entity(match):
        entity_text = match.group(0)
        for old_entity, new_entity in sorted_entities:
            if old_entity in entity_text:
                return f'<mark class="entity" style="background: #bfe1d9; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">{old_entity} <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">{new_entity}</span></mark>'
        return entity_text

    html_content = re.sub(r'\b(' + '|'.join(re.escape(key) for key in entity_mappings.keys()) + r')\b', replace_entity, text)
    full_html = f"""
    <!DOCTYPE html>
    <html lang="en">
     <body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ltr">
      <figure style="margin-bottom: 6rem">
       <div class="entities" style="line-height: 2.5; direction: ltr">
        {html_content}
       </div>
      </figure>
     </body>
    </html>
    """
    return full_html

In [28]:
visualizations = []
for experiment_mapping, experiment_name in zip(list_of_experiments, list_of_names):
    html_visualization = create_html_with_entities(text, experiment_mapping)
    visualizations.append(html_visualization)

combined_html = "<html><head><title>Entity Visualizations</title></head><body>"
for name, viz in zip(list_of_names, visualizations):
    combined_html += f"<h2>{name}</h2>" + viz + "<hr>"

combined_html += "</body></html>"
with open("combined_visualizations.html", "w", encoding="utf-8") as f:
    f.write(combined_html)
HTML(combined_html)
