## We can extract NE from a text

In [None]:
from gliner import GLiNER
from collections import Counter, defaultdict
import re
import json
from tqdm import tqdm

# Load model (change model name if needed)
model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1").cuda()

def smart_split_text(text, max_chunk_len=1024):
    sentence_endings = re.compile(r'([.!?]["\']?\s+)')
    sentences = []
    start = 0
    for match in sentence_endings.finditer(text):
        end = match.end()
        sentences.append(text[start:end])
        start = end
    if start < len(text):
        sentences.append(text[start:])
    chunks = []
    current_chunk = ''
    for sent in sentences:
        if len(current_chunk) + len(sent) <= max_chunk_len:
            current_chunk += sent
        else:
            if current_chunk:
                chunks.append(current_chunk)
            current_chunk = sent
    if current_chunk:
        chunks.append(current_chunk)
    return chunks

def process_in_chunks(text, func, *args, **kwargs):
    chunks = smart_split_text(text)
    results = []
    for chunk in tqdm(chunks):
        result = func(chunk, *args, **kwargs)
        results.append(result)
    return results

def deduplicate_entities_majority_voting(entities):
    entity_types = defaultdict(list)
    for ent in entities:
        entity_types[ent['text']].append(ent['label'])
    deduped = []
    for text, labels in entity_types.items():
        most_common = Counter(labels).most_common(1)[0][0]
        deduped.append({'text': text, 'label': most_common})
    return deduped



Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 11014.45it/s]


In [None]:
text = """In 1492, Christopher Columbus sailed from Spain to the New World. Queen Isabella supported his journey. The Santa Maria was his flagship."""
entity_types = "person, location, date, organization, artifact"  # You can change this list
threshold = 0.5  # You can adjust the threshold

print("Entered fast entity extraction task")
label_list = [l.strip() for l in entity_types.split(",") if l.strip()]
print("Calling process_in_chunks")
chunk_results = process_in_chunks(text, model.predict_entities, label_list, threshold=threshold)
print("process_in_chunks finished")
entities = [ent for chunk in chunk_results for ent in chunk]
print(f"Entities found: {entities}")
unique_entities = deduplicate_entities_majority_voting(entities)
print(f"Unique entities after deduplication: {unique_entities}")
if unique_entities:
    print("### Extracted Entities")
    for entity in unique_entities:
        print(f"{entity['text']}  ➔  {entity['label']}")
else:
    print("No entities found.")

In [33]:
# ---- Load text from dummy.txt ----
with open("dummy.txt", "r", encoding="utf-8") as f:
    text = f.read()

entity_types = "person, location, date, organization, artifact"  # You can change this list
threshold = 0.85  # You can adjust the threshold

print("Entered fast entity extraction task")
label_list = [l.strip() for l in entity_types.split(",") if l.strip()]
print("Calling process_in_chunks")
chunk_results = process_in_chunks(text, model.predict_entities, label_list, threshold=threshold)
print("process_in_chunks finished")
entities = [ent for chunk in chunk_results for ent in chunk]
print(f"Entities found: {entities}")
unique_entities = deduplicate_entities_majority_voting(entities)
print(f"Unique entities after deduplication: {unique_entities}")
if unique_entities:
    print("### Extracted Entities")
    for entity in unique_entities:
        print(f"{entity['text']}  ➔  {entity['label']}")
else:
    print("No entities found.")

Entered fast entity extraction task
Calling process_in_chunks


  0%|          | 0/59 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 59/59 [02:29<00:00,  2.54s/it]

process_in_chunks finished
Entities found: [{'start': 85, 'end': 93, 'text': 'Arrakeen', 'label': 'location', 'score': 0.961354672908783}, {'start': 302, 'end': 310, 'text': 'Arrakeen', 'label': 'location', 'score': 0.957675576210022}, {'start': 720, 'end': 732, 'text': 'Houses Minor', 'label': 'organization', 'score': 0.9432129859924316}, {'start': 985, 'end': 993, 'text': 'Arrakeen', 'label': 'location', 'score': 0.9608456492424011}, {'start': 108, 'end': 115, 'text': 'Caladan', 'label': 'location', 'score': 0.9300065636634827}, {'start': 173, 'end': 180, 'text': 'Caladan', 'label': 'location', 'score': 0.9545857906341553}, {'start': 300, 'end': 307, 'text': 'Caladan', 'label': 'location', 'score': 0.9620287418365479}, {'start': 366, 'end': 373, 'text': 'Arrakis', 'label': 'location', 'score': 0.9455699920654297}, {'start': 534, 'end': 541, 'text': 'Arrakis', 'label': 'location', 'score': 0.9488928914070129}, {'start': 801, 'end': 815, 'text': 'Gurney Halleck', 'label': 'person', 'sc




## Can we improve the entities list using post processing ?

### LLMs

In [None]:
# For each unique entity, collect all sentences where it appears

# Split text into sentences


sentences = re.split(r'(?<=[.!?])\s+', text)

# Build a dict: entity_text -> [sentences]
entity_sentences = {ent['text']: [] for ent in unique_entities}
for sent in sentences:
    for ent in unique_entities:
        if ent['text'] in sent:
            entity_sentences[ent['text']].append(sent)

# Example: print sentences for each entity
for ent, sents in entity_sentences.items():
    print(f"\nEntity: {ent}")
    for s in sents:
        print(f"  - {s}")


Entity: Arrakeen
  - The Duke Leto Atreides leaned against a parapet of the landing control tower outside Arrakeen.
  - To his left, the lights of Arrakeen glowed in the haze – yellow .
  - He saw lights of a moving vehicle coming toward the landing field from Arrakeen.
  - Over the exit of the Arrakeen landing field, crudely carved as though with a poor instrument, there was an inscription that Muad’Dib was to repeat many times.

On that first day when Muad’Dib rode through the streets of Arrakeen with his family, some of the people along the way recalled the legends and the prophecy and they ventured to shout: “Mahdi!” But their shout was more a question than a statement, for as yet they could only hope he was the one foretold as the Lisan al-Gaib, the Voice from the Outer World.

Entity: Houses Minor
  - Nor the Houses Minor who controlled the interior trade of Arrakis .

Entity: Caladan
  - He looked up and out of the dust at the unwinking stars, thought: Around one of those littl

In [38]:
# --- Handle case-insensitive duplicates before substring relations ---
deduped = {}
for ent in unique_entities:
    key = (ent['text'].lower(), ent['label'].lower())
    if key in deduped:
        # Optionally, keep the longer text as canonical
        if len(ent['text']) > len(deduped[key]['text']):
            deduped[key]['text'] = ent['text']
    else:
        deduped[key] = {'text': ent['text'], 'label': ent['label']}
unique_entities = list(deduped.values())

# Now find substring relations on deduplicated entities
substring_relations = []
entity_texts = [ent['text'] for ent in unique_entities]

for i, ent1 in enumerate(entity_texts):
    for j, ent2 in enumerate(entity_texts):
        if i != j and (ent1 in ent2 or ent2 in ent1):
            substring_relations.append((ent1, ent2))

if substring_relations:
    print("Entities with substring relations:")
    for ent1, ent2 in substring_relations:
        print(f'"{ent1}" <-> "{ent2}"')
else:
    print("No substring relations found among entities.")

Entities with substring relations:
"Arrakeen" <-> "Arrakeen landing field"
"Gurney Halleck" <-> "Halleck"
"Gurney Halleck" <-> "Gurney"
"The Harkonnens" <-> "Harkonnens"
"The Harkonnens" <-> "Harkonnen"
"Halleck" <-> "Gurney Halleck"
"Gurney" <-> "Gurney Halleck"
"Hawat" <-> "Thufir Hawat"
"Thufir" <-> "Thufir Hawat"
"Harkonnens" <-> "The Harkonnens"
"Harkonnens" <-> "Harkonnen"
"Leto" <-> "Duke Leto Atreides"
"Arrakeen landing field" <-> "Arrakeen"
"Thufir Hawat" <-> "Hawat"
"Thufir Hawat" <-> "Thufir"
"Duncan Idaho" <-> "Idaho"
"Idaho" <-> "Duncan Idaho"
"Harkonnen machine" <-> "Harkonnen"
"Duke Leto Atreides" <-> "Leto"
"Harkonnen" <-> "The Harkonnens"
"Harkonnen" <-> "Harkonnens"
"Harkonnen" <-> "Harkonnen machine"


In [36]:
# Initialize Mistral client and API key
import os
from mistralai import Mistral

# Set your API key (make sure to set the environment variable or paste your key here)
api_key = os.environ.get("MISTRAL_API_KEY", "YOUR_MISTRAL_API_KEY_HERE")
mistral_model = "mistral-large-latest"
mistral_client = Mistral(api_key=api_key)

def are_entities_equivalent_mistral(ent1, ent2, context):
    prompt = (
        f"Given the following context, do the entities '{ent1}' and '{ent2}' refer to the same thing? "
        "If one of the entities is a title (like 'the Duke'), try to determine if it refers to the other entity in particular. "
        "If yes, answer 'yes' and explain why. If not, answer 'no' and do not explain. "
        "Start your answer with 'yes' or 'no'.\n"
        f"Context:\n{context}\n"
        f"Entity 1: {ent1}\nEntity 2: {ent2}\n"
        "Answer:"
    )
    response = mistral_client.chat.complete(
        model=mistral_model,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

In [40]:
import time

print(f"unique_entities: {unique_entities}")
# Build a mapping from entity text to its group (for fusion)
entity_groups = {ent['text']: {ent['text']} for ent in unique_entities}

# For each substring relation, use Mistral to decide if they should be fused
for ent1, ent2 in substring_relations:
    context_sents = set(entity_sentences.get(ent1, []) + entity_sentences.get(ent2, []))
    context = " ".join(context_sents)
    # wait a few seconds to avoid rate limits
    time.sleep(2)
    answer = are_entities_equivalent_mistral(ent1, ent2, context)
    print(f"{ent1} <-> {ent2}: {answer}")
    print('----------------------------------------------------------')
    if answer.lower().startswith("yes"):
        # Merge groups
        group1 = entity_groups[ent1]
        group2 = entity_groups[ent2]
        merged = group1.union(group2)
        for ent in merged:
            entity_groups[ent] = merged

# Deduplicate groups
fused_groups = []
seen = set()
for group in entity_groups.values():
    group_tuple = tuple(sorted(group))
    if group_tuple not in seen:
        seen.add(group_tuple)
        fused_groups.append(group)

# For each group, keep the entity with the most words as canonical
fused_entities = []
for group in fused_groups:
    canonical = max(group, key=lambda x: len(x.split()))
    # Get the most common label among the group
    labels = [ent['label'] for ent in unique_entities if ent['text'] in group]
    from collections import Counter
    label = Counter(labels).most_common(1)[0][0]
    fused_entities.append({'text': canonical, 'aliases': list(group), 'label': label})

# Print fused entities
for ent in fused_entities:
    print(f"{ent['text']} (aliases: {ent['aliases']}) ➔ {ent['label']}")

unique_entities: [{'text': 'Arrakeen', 'label': 'location'}, {'text': 'Houses Minor', 'label': 'organization'}, {'text': 'Caladan', 'label': 'location'}, {'text': 'Arrakis', 'label': 'location'}, {'text': 'Gurney Halleck', 'label': 'person'}, {'text': 'The Harkonnens', 'label': 'organization'}, {'text': 'The Duke', 'label': 'person'}, {'text': 'Halleck', 'label': 'person'}, {'text': 'Gurney', 'label': 'person'}, {'text': 'Hawat', 'label': 'person'}, {'text': 'Thufir', 'label': 'person'}, {'text': 'Harkonnens', 'label': 'organization'}, {'text': 'Leto', 'label': 'person'}, {'text': 'Arrakeen landing field', 'label': 'location'}, {'text': 'Muad’Dib', 'label': 'person'}, {'text': 'Paul', 'label': 'person'}, {'text': 'map board', 'label': 'artifact'}, {'text': 'Thufir Hawat', 'label': 'person'}, {'text': 'Duncan Idaho', 'label': 'person'}, {'text': 'Idaho', 'label': 'person'}, {'text': 'Guild', 'label': 'organization'}, {'text': 'garrison village', 'label': 'location'}, {'text': 'Landsraad

saving the aliases enables better understanding of the entities

This is API dependent let us try Gliner multitask 

In [None]:
from gliner import GLiNER
from gliner.multitask import GLiNERQuestionAnswerer
model_id = 'knowledgator/gliner-multitask-v1.0'
model = GLiNER.from_pretrained(model_id).cuda()


Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 28170.70it/s]


In [None]:
answerer = GLiNERQuestionAnswerer(model=model)
# Build a mapping from entity text to its group (for fusion)
entity_groups = {ent['text']: {ent['text']} for ent in unique_entities}

for ent1, ent2 in substring_relations:
    context_sents = set(entity_sentences.get(ent1, []) + entity_sentences.get(ent2, []))
    context = " ".join(context_sents)
    # Formulate a question for the QA model
    question = f"Do the entities '{ent1}' and '{ent2}' refer to the same thing in this context?"
    qa_result = answerer(context, questions=question)
    answer = qa_result[0]['answer'].strip().lower() if qa_result and 'answer' in qa_result[0] else ""
    print(f"{ent1} <-> {ent2}: {answer}")
    print('----------------------------------------------------------')
    if answer.startswith("yes"):
        # Merge groups
        group1 = entity_groups[ent1]
        group2 = entity_groups[ent2]
        merged = group1.union(group2)
        for ent in merged:
            entity_groups[ent] = merged

# Deduplicate groups
fused_groups = []
seen = set()
for group in entity_groups.values():
    group_tuple = tuple(sorted(group))
    if group_tuple not in seen:
        seen.add(group_tuple)
        fused_groups.append(group)

# For each group, keep the entity with the most words as canonical
fused_entities = []
from collections import Counter
for group in fused_groups:
    canonical = max(group, key=lambda x: len(x.split()))
    labels = [ent['label'] for ent in unique_entities if ent['text'] in group]
    label = Counter(labels).most_common(1)[0][0]
    fused_entities.append({'text': canonical, 'aliases': list(group), 'label': label})

# Print fused entities
for ent in fused_entities:
    print(f"{ent['text']} (aliases: {ent['aliases']}) ➔ {ent['label']}")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Arrakeen <-> Arrakeen landing field: 
----------------------------------------------------------
Gurney Halleck <-> Halleck: 
----------------------------------------------------------
Gurney Halleck <-> Gurney: 
----------------------------------------------------------
The Harkonnens <-> Harkonnens: 
----------------------------------------------------------
The Harkonnens <-> Harkonnen: 
----------------------------------------------------------


KeyboardInterrupt: 

In [21]:
from gliner.multitask import GLiNERQuestionAnswerer

answerer = GLiNERQuestionAnswerer(model=model)
entity_groups = {ent['text']: {ent['text']} for ent in unique_entities}

for ent1, ent2 in substring_relations:
    context_sents = set(entity_sentences.get(ent1, []) + entity_sentences.get(ent2, []))
    context = " ".join(context_sents)
    question = f"Do the entities '{ent1}' and '{ent2}' refer to the same thing in this context? Answer yes or no."
    qa_result = answerer(context, questions=question)
    answer = qa_result[0]['answer'].strip().lower() if qa_result and 'answer' in qa_result[0] else ""
    print(f"{ent1} <-> {ent2}: {answer}")
    print('----------------------------------------------------------')
    if answer.startswith("yes"):
        group1 = entity_groups[ent1]
        group2 = entity_groups[ent2]
        merged = group1.union(group2)
        for ent in merged:
            entity_groups[ent] = merged

# Deduplicate groups
fused_groups = []
seen = set()
for group in entity_groups.values():
    group_tuple = tuple(sorted(group))
    if group_tuple not in seen:
        seen.add(group_tuple)
        fused_groups.append(group)

# For each group, keep the entity with the most words as canonical
fused_entities = []
from collections import Counter
for group in fused_groups:
    canonical = max(group, key=lambda x: len(x.split()))
    labels = [ent['label'] for ent in unique_entities if ent['text'] in group]
    label = Counter(labels).most_common(1)[0][0]
    fused_entities.append({'text': canonical, 'aliases': list(group), 'label': label})

for ent in fused_entities:
    print(f"{ent['text']} (aliases: {ent['aliases']}) ➔ {ent['label']}")

Arrakeen <-> Arrakeen landing field: 
----------------------------------------------------------
Gurney Halleck <-> Halleck: 
----------------------------------------------------------
Gurney Halleck <-> Gurney: 
----------------------------------------------------------
The Harkonnens <-> Harkonnens: 
----------------------------------------------------------
The Harkonnens <-> Harkonnen: 
----------------------------------------------------------


KeyboardInterrupt: 

Unfortunately the Question answerer cannon answer those questions or the information extrcators can't answer the questions. We go back to the output of mistral now and try to deduce relations between characters

In [41]:
def extract_json(text):
    # Try to extract JSON block from markdown or plain output
    match = re.search(r'```json\s*(\[[\s\S]*?\])\s*```', text)
    if match:
        return json.loads(match.group(1))
    match = re.search(r'(\[[\s\S]*?\])', text)
    if match:
        return json.loads(match.group(1))
    raise ValueError("No JSON found in text")

def batch_find_relations_mistral(main_ent, other_ents, entity_sentences, max_context_words=1000):
    prompt = (
        "Given the following entity pairs and their context, for each pair, state the most likely relation between them. "
        "If there is no clear relation, answer 'no relation'.\n"
        "Output the answer as a list of JSON objects, one per pair, with keys: entity1, entity2, relation.\n"
        "Example:\n"
        "[{\"entity1\": \"A\", \"entity2\": \"B\", \"relation\": \"friend\"}, {\"entity1\": \"A\", \"entity2\": \"C\", \"relation\": \"no relation\"}]\n"
    )
    for ent2 in other_ents:
        context_sents = set(entity_sentences.get(main_ent, []) + entity_sentences.get(ent2, []))
        context = " ".join(context_sents)
        # Truncate context to max_context_words
        context_words = context.split()
        if len(context_words) > max_context_words:
            context = " ".join(context_words[:max_context_words])
        prompt += f"\nEntity 1: {main_ent}\nEntity 2: {ent2}\nContext: {context}\nRelation:"
    response = mistral_client.chat.complete(
        model=mistral_model,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

relations_found = []
n = len(fused_entities)
for i in range(n - 1):
    main_ent = fused_entities[i]['text']
    other_ents = [fused_entities[j]['text'] for j in range(i + 1, n)]
    time.sleep(2)  # To avoid rate limits
    batch_relations = batch_find_relations_mistral(main_ent, other_ents, entity_sentences)
    print(f"Relations for {main_ent}:\n{batch_relations}\n{'-'*60}")
    try:
        parsed = extract_json(batch_relations)
        relations_found.extend(parsed)
    except Exception as e:
        print("Failed to parse Mistral output:", e)
        print(batch_relations)

# Now relations_found is a list of dicts: {'entity1': ..., 'entity2': ..., 'relation': ...}

Relations for Arrakeen landing field:
```json
[
  {"entity1": "Arrakeen landing field", "entity2": "Houses Minor", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "Caladan", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "Arrakis", "relation": "located on"},
  {"entity1": "Arrakeen landing field", "entity2": "Gurney Halleck", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "The Harkonnens", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "The Duke", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "Thufir Hawat", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "Duke Leto Atreides", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "Muad’Dib", "relation": "no relation"},
  {"entity1": "Arrakeen landing field", "entity2": "Paul", "relation": "no relation"},
  {"entity1": "Arra

The LLM is capable of analyzing the text, context and coming up with relation between entities while being zero shot.

## Extracting relations 

Lead #1 Gliner multitask

In [None]:
from gliner import GLiNER
from gliner.multitask import GLiNERRelationExtractor
# Load multitask GLiNER model and relation extractor
model_id = 'knowledgator/gliner-multitask-v1.0'
model = GLiNER.from_pretrained(model_id).cuda()
relation_extractor = GLiNERRelationExtractor(model=model)

In [1]:

# Provided fiction text
text = """
The Duke Leto Atreides leaned against a parapet of the landing control tower outside Arrakeen. The night’s first moon, an oblate silver coin, hung well above the southern horizon. Beneath it, the jagged cliffs of the Shield Wall shone like parched icing through a dust haze. To his left, the lights of Arrakeen glowed in the haze – yellow . . . white . . . blue.

He thought of the notices posted now above his signature all through the populous places of the planet: “Our Sublime Padishah Emperor has charged me to take possession of this planet and end all dispute.”

The ritualistic formality of it touched him with a feeling of loneliness. Who was fooled by that fatuous legalism? Not the Fremen, certainly. Nor the Houses Minor who controlled the interior trade of Arrakis . . . and were Harkonnen creatures almost to a man .

They have tried to take the life of my son!

The rage was difficult to suppress.

He saw lights of a moving vehicle coming toward the landing field from Arrakeen. He hoped it was the guard and troop carrier bringing Paul. The delay was galling even though he knew it was prompted by caution on the part of Hawat’s lieutenant.

They have tried to take the life of my son!

He shook his head to drive out the angry thoughts, glanced back at the field where five of his own frigates were posted around the rim like monolithic sentries.

Better a cautiousdelay  than . . .

The lieutenant was a good one, he reminded himself. A man marked for advancement, completely loyal.

“Our Sublime Padishah Emperor . . . ”

If the people of this decadent garrison city could only see the Emperor’s private note to his “Noble Duke” – the disdainful allusions to veiled men and women: ” – but what else is one to expect of barbarians whose dearest dream is to live outside the ordered security of the faufreluches?”

The Duke felt in this moment that his own dearest dream was to end all class distinctions and never again think of deadly order. He looked up and out of the dust at the unwinking stars, thought: Around one of those little lights circles Caladan . . . but I’ll never again see my home . The longing for Caladan was a sudden pain in his breast. He felt that it did not come from within himself, but that it reached out to him from Caladan. He could not bring himself to call this dry wasteland of Arrakis his home, and he doubted he ever would.

I must mask my feelings , he thought. For the boy’s sake. If ever he’s to have a home, this must be it. I may think of Arrakis as a hell I’ve reached before death, but he must find here that which will inspire him. There must be something .

A wave of self-pity, immediately despised and rejected, swept through him, and for some reason he found himself recalling two lines from a poem Gurney Halleck often repeated –

“My lungs taste the air of Time

Blown past falling sands . . . ”

Well, Gurney would find plenty of falling sands here, the Duke thought. The central wastelands beyond those moon-frosted cliffs were desert – barren rock, dunes, and blowing dust, an uncharted dry wilderness with here and there along its rim and perhaps scattered through it, knots of Fremen. If anything could buy a future for the Atreides line, the Fremen just might do it.

Provided the Harkonnens hadn’t managed to infect even the Fremen with their poisonous schemes.

They have tried to take the life of my son!

A scraping metal racket vibrated through the tower, shook the parapet beneath his arms. Blast shutters dropped in front of him, blocking the view.

Shuttle’s coming in , he thought. Time to go down and get to work . He turned to the stairs behind him, headed down to the big assembly room, trying to remain calm as he descended, to prepare his face for the coming encounter.

They have tried to take the life of my son!

The men were already boiling in from the field when he reached the yellow-domed room. They carried their spacebags over their shoulders, shouting and roistering like students returning from vacation.

“Hey! Feel that under your dogs? That’s gravity, man!” “How many G’s does this place pull? Feels heavy.” “Nine-tenths of a G by the book.”

The crossfire of thrown words filled the big room.

“Did you get a good look at this hole on the way down? Where’s all the loot this place’s supposed to have?” “The Harkonnens took it with ’em!” “Me for a hot shower and a soft bed!” “Haven’t you heard, stupid? No showers down here. You scrub your ass with sand!” “Hey! Can it! The Duke!”

The Duke stepped out of the stair entry into a suddenly silent room.

Gurney Halleck strode along at the point of the crowd, bag over one shoulder, the neck of his nine-string baliset clutched in the other hand. They were long-fingered hands with big thumbs, full of tiny movements that drew such delicate music from the baliset.

The Duke watched Halleck, admiring the ugly lump of a man, noting the glass-splinter eyes with their gleam of savage understanding. Here was a man who lived outside the faufreluches while obeying their every precept. What was it Paul had called him? “Gurney, the valorous .”
"""

# Fiction labels as relation types
relations = [
    "parent", "child", "spouse", "sibling", "friend", "enemy", "mentor",
    "ruler of", "member of", "located in", "artifact owned by", "killed by",
    "rescued by", "no relation"
]
entities = ["person", "organization", "location", "artifact", "group", "event", "object", "fac", "gpe"]



# Extract relations
predictions = relation_extractor(text, entities=entities, relations=relations)
print("Extracted relations:")
for rel in predictions:
    print(rel)

  from .autonotebook import tqdm as notebook_tqdm
Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 18486.16it/s]
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Extracted relations:
[]


In [None]:
# 2. Extract relations from a sample historical text
text = """
The French Revolution (French: Révolution française [ʁevɔlysjɔ̃ fʁɑ̃sɛːz]) was a period of political and societal change in France which began with the Estates General of 1789 and ended with the Coup of 18 Brumaire on 9 November 1799. Many of the revolution's ideas are considered fundamental principles of liberal democracy,[1] and its values remain central to modern French political discourse.[2]

The causes of the revolution were a combination of social, political, and economic factors which the ancien régime ("old regime") proved unable to manage. A financial crisis and widespread social distress led to the convocation of the Estates General in May 1789, its first meeting since 1614. The representatives of the Third Estate broke away and re-constituted themselves as a National Assembly in June. The Storming of the Bastille in Paris on 14 July was followed by radical measures by the Assembly, among them the abolition of feudalism, state control over the Catholic Church, and a declaration of rights. The next three years were dominated by a struggle for political control. King Louis XVI's attempted flight to Varennes in June 1791 further discredited the monarchy, and military defeats after the outbreak of the French Revolutionary Wars in April 1792 led to an armed insurrection on 10 August 1792. The monarchy was replaced by the French First Republic in September, and Louis XVI was executed in January 1793.

After another revolt in June 1793, the constitution was suspended, and political power passed from the National Convention to the Committee of Public Safety, dominated by radical Jacobins led by Maximilien Robespierre. About 16,000 people were sentenced by the Revolutionary Tribunal and executed in the Reign of Terror, which ended in July 1794 with the Thermidorian Reaction. Weakened by external threats and internal opposition, the Committee of Public Safety was replaced in November 1795 by the Directory. Its instability ended in the coup of 18 Brumaire and the establishment of the Consulate, with Napoleon Bonaparte as First Consul.
"""

relations = [
    "parent", "child", "spouse", "sibling", "successor", "predecessor", "ruler of", "member of",
    "founder of", "participant in", "allied with", "opposed by", "treaty with", "battle with",
    "located in", "event date", "died in", "born in"
]
entities = ["person", "organization", "location", "event", "date", "country", "city"]

predictions = relation_extractor(text, entities=entities, relations=relations)
print("Extracted relations:")
for rel in predictions:
    print(rel)



Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 4985.96it/s]


: 

Lead # 2 Glirel

In [None]:
!pip install glirel spacy

In [None]:
import spacy
from spacy.language import Language
import glirel
from glirel.spacy_integration import SpacyGLiRELWrapper

#might need to be commented when ran multiple times
@Language.factory("glirel")
def create_glirel_component(nlp, name, pretrained_model_name_or_path="jackboyla/glirel-large-v0", batch_size=1, threshold=0.3):
    return SpacyGLiRELWrapper(
        pretrained_model_name_or_path=pretrained_model_name_or_path,
        batch_size=batch_size,
        threshold=threshold,
    )

# Load a blank spaCy model or an existing one
nlp = spacy.load('en_core_web_sm')

# Add the GLiREL component to the pipeline
nlp.add_pipe(
        "glirel", 
        after="ner",
        # config={"pretrained_model_name_or_path": "jackboyla/glirel-large-v0",
        # "batch_size": 1,
        # "threshold": 0.0}
    )

# Now you can use the pipeline with the GLiREL component
text = """
The Duke Leto Atreides leaned against a parapet of the landing control tower outside Arrakeen. The night’s first moon, an oblate silver coin, hung well above the southern horizon. Beneath it, the jagged cliffs of the Shield Wall shone like parched icing through a dust haze. To his left, the lights of Arrakeen glowed in the haze – yellow . . . white . . . blue.

He thought of the notices posted now above his signature all through the populous places of the planet: “Our Sublime Padishah Emperor has charged me to take possession of this planet and end all dispute.”

The ritualistic formality of it touched him with a feeling of loneliness. Who was fooled by that fatuous legalism? Not the Fremen, certainly. Nor the Houses Minor who controlled the interior trade of Arrakis . . . and were Harkonnen creatures almost to a man .

They have tried to take the life of my son!

The rage was difficult to suppress.

He saw lights of a moving vehicle coming toward the landing field from Arrakeen. He hoped it was the guard and troop carrier bringing Paul. The delay was galling even though he knew it was prompted by caution on the part of Hawat’s lieutenant.

They have tried to take the life of my son!

He shook his head to drive out the angry thoughts, glanced back at the field where five of his own frigates were posted around the rim like monolithic sentries.

Better a cautiousdelay  than . . .

The lieutenant was a good one, he reminded himself. A man marked for advancement, completely loyal.

“Our Sublime Padishah Emperor . . . ”

If the people of this decadent garrison city could only see the Emperor’s private note to his “Noble Duke” – the disdainful allusions to veiled men and women: ” – but what else is one to expect of barbarians whose dearest dream is to live outside the ordered security of the faufreluches?”

The Duke felt in this moment that his own dearest dream was to end all class distinctions and never again think of deadly order. He looked up and out of the dust at the unwinking stars, thought: Around one of those little lights circles Caladan . . . but I’ll never again see my home . The longing for Caladan was a sudden pain in his breast. He felt that it did not come from within himself, but that it reached out to him from Caladan. He could not bring himself to call this dry wasteland of Arrakis his home, and he doubted he ever would.

I must mask my feelings , he thought. For the boy’s sake. If ever he’s to have a home, this must be it. I may think of Arrakis as a hell I’ve reached before death, but he must find here that which will inspire him. There must be something .

A wave of self-pity, immediately despised and rejected, swept through him, and for some reason he found himself recalling two lines from a poem Gurney Halleck often repeated –

“My lungs taste the air of Time

Blown past falling sands . . . ”

Well, Gurney would find plenty of falling sands here, the Duke thought. The central wastelands beyond those moon-frosted cliffs were desert – barren rock, dunes, and blowing dust, an uncharted dry wilderness with here and there along its rim and perhaps scattered through it, knots of Fremen. If anything could buy a future for the Atreides line, the Fremen just might do it.

Provided the Harkonnens hadn’t managed to infect even the Fremen with their poisonous schemes.

They have tried to take the life of my son!

A scraping metal racket vibrated through the tower, shook the parapet beneath his arms. Blast shutters dropped in front of him, blocking the view.

Shuttle’s coming in , he thought. Time to go down and get to work . He turned to the stairs behind him, headed down to the big assembly room, trying to remain calm as he descended, to prepare his face for the coming encounter.

They have tried to take the life of my son!

The men were already boiling in from the field when he reached the yellow-domed room. They carried their spacebags over their shoulders, shouting and roistering like students returning from vacation.

“Hey! Feel that under your dogs? That’s gravity, man!” “How many G’s does this place pull? Feels heavy.” “Nine-tenths of a G by the book.”

The crossfire of thrown words filled the big room.

“Did you get a good look at this hole on the way down? Where’s all the loot this place’s supposed to have?” “The Harkonnens took it with ’em!” “Me for a hot shower and a soft bed!” “Haven’t you heard, stupid? No showers down here. You scrub your ass with sand!” “Hey! Can it! The Duke!”

The Duke stepped out of the stair entry into a suddenly silent room.

Gurney Halleck strode along at the point of the crowd, bag over one shoulder, the neck of his nine-string baliset clutched in the other hand. They were long-fingered hands with big thumbs, full of tiny movements that drew such delicate music from the baliset.

The Duke watched Halleck, admiring the ugly lump of a man, noting the glass-splinter eyes with their gleam of savage understanding. Here was a man who lived outside the faufreluches while obeying their every precept. What was it Paul had called him? “Gurney, the valorous .”
"""

fiction_labels = {
    "glirel_labels": {
        "parent": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "child": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "spouse": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "sibling": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "friend": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "enemy": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "mentor": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "ruler of": {"allowed_head": ["PERSON"], "allowed_tail": ["LOC", "ORG", "GPE"]},
        "member of": {"allowed_head": ["PERSON"], "allowed_tail": ["ORG", "GROUP"]},
        "located in": {"allowed_head": ["LOC", "FAC", "ORG"], "allowed_tail": ["LOC", "GPE"]},
        "artifact owned by": {"allowed_head": ["OBJECT", "FAC"], "allowed_tail": ["PERSON"]},
        "killed by": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "rescued by": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "no relation": {},
    }
}

# Add the labels to the pipeline at inference time
docs = list( nlp.pipe([(text, fiction_labels)], as_tuples=True) )
relations = docs[0][0]._.relations

print('Number of relations:', len(relations))

sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
print("\nDescending Order by Score:")
for item in sorted_data_desc:
    print(f"{item['head_text']} --> {item['label']} --> {item['tail_text']} | score: {item['score']}")




Number of relations: 31

Descending Order by Score:
['Gurney', 'Halleck'] --> ruler of --> ['Arrakeen'] | score: 0.5722787976264954
['Gurney'] --> ruler of --> ['Arrakeen'] | score: 0.5722787976264954
['Shuttle'] --> ruler of --> ['Arrakeen'] | score: 0.5722787976264954
['Gurney', 'Halleck'] --> ruler of --> ['Arrakeen'] | score: 0.5722787976264954
['Paul'] --> ruler of --> ['Arrakeen'] | score: 0.5722787976264954
['Gurney', 'Halleck'] --> ruler of --> ['Arrakeen'] | score: 0.5492805242538452
['Gurney'] --> ruler of --> ['Arrakeen'] | score: 0.5492805242538452
['Shuttle'] --> ruler of --> ['Arrakeen'] | score: 0.5492805242538452
['Gurney', 'Halleck'] --> ruler of --> ['Arrakeen'] | score: 0.5492805242538452
['Paul'] --> ruler of --> ['Arrakeen'] | score: 0.5492805242538452
['Gurney', 'Halleck'] --> ruler of --> ['Arrakeen'] | score: 0.5448113083839417
['Gurney'] --> ruler of --> ['Arrakeen'] | score: 0.5448113083839417
['Shuttle'] --> ruler of --> ['Arrakeen'] | score: 0.54481130838394

In [25]:
import spacy
from spacy.language import Language
import glirel
from glirel.spacy_integration import SpacyGLiRELWrapper

#might need to be commented when ran multiple times
# @Language.factory("glirel")
# def create_glirel_component(nlp, name, pretrained_model_name_or_path="jackboyla/glirel-large-v0", batch_size=1, threshold=0.3):
#     return SpacyGLiRELWrapper(
#         pretrained_model_name_or_path=pretrained_model_name_or_path,
#         batch_size=batch_size,
#         threshold=threshold,
#     )

# Load a blank spaCy model or an existing one
nlp = spacy.load('en_core_web_sm')

# Add the GLiREL component to the pipeline
nlp.add_pipe(
        "glirel", 
        after="ner",
        # config={"pretrained_model_name_or_path": "jackboyla/glirel-large-v0",
        # "batch_size": 1,
        # "threshold": 0.0}
    )

# Now you can use the pipeline with the GLiREL component
text = """
The French Revolution (French: Révolution française [ʁevɔlysjɔ̃ fʁɑ̃sɛːz]) was a period of political and societal change in France which began with the Estates General of 1789 and ended with the Coup of 18 Brumaire on 9 November 1799. Many of the revolution's ideas are considered fundamental principles of liberal democracy,[1] and its values remain central to modern French political discourse.[2]

The causes of the revolution were a combination of social, political, and economic factors which the ancien régime ("old regime") proved unable to manage. A financial crisis and widespread social distress led to the convocation of the Estates General in May 1789, its first meeting since 1614. The representatives of the Third Estate broke away and re-constituted themselves as a National Assembly in June. The Storming of the Bastille in Paris on 14 July was followed by radical measures by the Assembly, among them the abolition of feudalism, state control over the Catholic Church, and a declaration of rights. The next three years were dominated by a struggle for political control. King Louis XVI's attempted flight to Varennes in June 1791 further discredited the monarchy, and military defeats after the outbreak of the French Revolutionary Wars in April 1792 led to an armed insurrection on 10 August 1792. The monarchy was replaced by the French First Republic in September, and Louis XVI was executed in January 1793.

After another revolt in June 1793, the constitution was suspended, and political power passed from the National Convention to the Committee of Public Safety, dominated by radical Jacobins led by Maximilien Robespierre. About 16,000 people were sentenced by the Revolutionary Tribunal and executed in the Reign of Terror, which ended in July 1794 with the Thermidorian Reaction. Weakened by external threats and internal opposition, the Committee of Public Safety was replaced in November 1795 by the Directory. Its instability ended in the coup of 18 Brumaire and the establishment of the Consulate, with Napoleon Bonaparte as First Consul.
"""

historical_labels = {
    "glirel_labels": {
        "parent": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "child": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "spouse": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "sibling": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "successor": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "predecessor": {"allowed_head": ["PERSON"], "allowed_tail": ["PERSON"]},
        "ruler of": {"allowed_head": ["PERSON"], "allowed_tail": ["LOC", "ORG", "GPE"]},
        "member of": {"allowed_head": ["PERSON"], "allowed_tail": ["ORG", "GROUP"]},
        "founder of": {"allowed_head": ["PERSON"], "allowed_tail": ["ORG", "GPE"]},
        "participant in": {"allowed_head": ["PERSON", "ORG"], "allowed_tail": ["EVENT"]},
        "allied with": {"allowed_head": ["PERSON", "ORG"], "allowed_tail": ["PERSON", "ORG"]},
        "opposed by": {"allowed_head": ["PERSON", "ORG"], "allowed_tail": ["PERSON", "ORG"]},
        "treaty with": {"allowed_head": ["ORG", "GPE"], "allowed_tail": ["ORG", "GPE"]},
        "battle with": {"allowed_head": ["ORG", "GPE"], "allowed_tail": ["ORG", "GPE"]},
        "located in": {"allowed_head": ["LOC", "FAC", "ORG"], "allowed_tail": ["LOC", "GPE"]},
        "event date": {"allowed_head": ["EVENT"], "allowed_tail": ["DATE"]},
        "died in": {"allowed_head": ["PERSON"], "allowed_tail": ["LOC", "GPE"]},
        "born in": {"allowed_head": ["PERSON"], "allowed_tail": ["LOC", "GPE"]},
        "no relation": {},
    }
}

# Add the labels to the pipeline at inference time
docs = list( nlp.pipe([(text, fiction_labels)], as_tuples=True) )
relations = docs[0][0]._.relations

print('Number of relations:', len(relations))

sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
print("\nDescending Order by Score:")
for item in sorted_data_desc:
    print(f"{item['head_text']} --> {item['label']} --> {item['tail_text']} | score: {item['score']}")


Number of relations: 40

Descending Order by Score:
['Maximilien', 'Robespierre'] --> member of --> ['Directory'] | score: 0.6324959993362427
['Maximilien', 'Robespierre'] --> member of --> ['the', 'National', 'Convention'] | score: 0.5678336024284363
['Maximilien', 'Robespierre'] --> member of --> ['a', 'National', 'Assembly'] | score: 0.5542224645614624
['Louis', 'XVI'] --> member of --> ['Directory'] | score: 0.5370921492576599
['Maximilien', 'Robespierre'] --> member of --> ['the', 'Estates', 'General'] | score: 0.5157949328422546
['Maximilien', 'Robespierre'] --> member of --> ['Assembly'] | score: 0.514499306678772
['Louis', 'XVI', "'s"] --> member of --> ['Directory'] | score: 0.5016480088233948
['Maximilien', 'Robespierre'] --> member of --> ['the', 'Committee', 'of', 'Public', 'Safety'] | score: 0.48483604192733765
['Louis', 'XVI'] --> member of --> ['a', 'National', 'Assembly'] | score: 0.4706806540489197
['Maximilien', 'Robespierre'] --> member of --> ['the', 'Committee', 'o

Lead #3 Iter

In [None]:
!pip install git+https://github.com/fleonce/iter

In [1]:
# --- ITERForRelationExtraction usage for fiction text ---

from iter import ITERForRelationExtraction

# Load the ITER relation extraction model and move to CUDA
model = ITERForRelationExtraction.from_pretrained("fleonce/iter-conll04-deberta-large").cuda()
tokenizer = model.tokenizer

# Example fiction text
text = """
The Duke Leto Atreides leaned against a parapet of the landing control tower outside Arrakeen. The night’s first moon, an oblate silver coin, hung well above the southern horizon. Beneath it, the jagged cliffs of the Shield Wall shone like parched icing through a dust haze. To his left, the lights of Arrakeen glowed in the haze – yellow . . . white . . . blue.

He thought of the notices posted now above his signature all through the populous places of the planet: “Our Sublime Padishah Emperor has charged me to take possession of this planet and end all dispute.”

The ritualistic formality of it touched him with a feeling of loneliness. Who was fooled by that fatuous legalism? Not the Fremen, certainly. Nor the Houses Minor who controlled the interior trade of Arrakis . . . and were Harkonnen creatures almost to a man .

They have tried to take the life of my son!

The rage was difficult to suppress.

He saw lights of a moving vehicle coming toward the landing field from Arrakeen. He hoped it was the guard and troop carrier bringing Paul. The delay was galling even though he knew it was prompted by caution on the part of Hawat’s lieutenant.

They have tried to take the life of my son!

He shook his head to drive out the angry thoughts, glanced back at the field where five of his own frigates were posted around the rim like monolithic sentries.

Better a cautiousdelay  than . . .

The lieutenant was a good one, he reminded himself. A man marked for advancement, completely loyal.

“Our Sublime Padishah Emperor . . . ”

If the people of this decadent garrison city could only see the Emperor’s private note to his “Noble Duke” – the disdainful allusions to veiled men and women: ” – but what else is one to expect of barbarians whose dearest dream is to live outside the ordered security of the faufreluches?”

The Duke felt in this moment that his own dearest dream was to end all class distinctions and never again think of deadly order. He looked up and out of the dust at the unwinking stars, thought: Around one of those little lights circles Caladan . . . but I’ll never again see my home . The longing for Caladan was a sudden pain in his breast. He felt that it did not come from within himself, but that it reached out to him from Caladan. He could not bring himself to call this dry wasteland of Arrakis his home, and he doubted he ever would.

I must mask my feelings , he thought. For the boy’s sake. If ever he’s to have a home, this must be it. I may think of Arrakis as a hell I’ve reached before death, but he must find here that which will inspire him. There must be something .

A wave of self-pity, immediately despised and rejected, swept through him, and for some reason he found himself recalling two lines from a poem Gurney Halleck often repeated –

“My lungs taste the air of Time

Blown past falling sands . . . ”

Well, Gurney would find plenty of falling sands here, the Duke thought. The central wastelands beyond those moon-frosted cliffs were desert – barren rock, dunes, and blowing dust, an uncharted dry wilderness with here and there along its rim and perhaps scattered through it, knots of Fremen. If anything could buy a future for the Atreides line, the Fremen just might do it.

Provided the Harkonnens hadn’t managed to infect even the Fremen with their poisonous schemes.

They have tried to take the life of my son!

A scraping metal racket vibrated through the tower, shook the parapet beneath his arms. Blast shutters dropped in front of him, blocking the view.

Shuttle’s coming in , he thought. Time to go down and get to work . He turned to the stairs behind him, headed down to the big assembly room, trying to remain calm as he descended, to prepare his face for the coming encounter.

They have tried to take the life of my son!

The men were already boiling in from the field when he reached the yellow-domed room. They carried their spacebags over their shoulders, shouting and roistering like students returning from vacation.

“Hey! Feel that under your dogs? That’s gravity, man!” “How many G’s does this place pull? Feels heavy.” “Nine-tenths of a G by the book.”

The crossfire of thrown words filled the big room.

“Did you get a good look at this hole on the way down? Where’s all the loot this place’s supposed to have?” “The Harkonnens took it with ’em!” “Me for a hot shower and a soft bed!” “Haven’t you heard, stupid? No showers down here. You scrub your ass with sand!” “Hey! Can it! The Duke!”

The Duke stepped out of the stair entry into a suddenly silent room.

Gurney Halleck strode along at the point of the crowd, bag over one shoulder, the neck of his nine-string baliset clutched in the other hand. They were long-fingered hands with big thumbs, full of tiny movements that drew such delicate music from the baliset.

The Duke watched Halleck, admiring the ugly lump of a man, noting the glass-splinter eyes with their gleam of savage understanding. Here was a man who lived outside the faufreluches while obeying their every precept. What was it Paul had called him? “Gurney, the valorous .”
"""

# Tokenize and move tensors to CUDA
encodings = tokenizer(
    text,
    return_tensors="pt"
)
encodings = {k: v.cuda() for k, v in encodings.items()}

# Generate predictions
generation_output = model.generate(
    encodings["input_ids"],
    attention_mask=encodings["attention_mask"],
)

# Print entities
print("Entities:")
print(generation_output.entities)

# Print relations between entities
print("Relations:")
print(generation_output.links)

  from .autonotebook import tqdm as notebook_tqdm
Token indices sequence length is longer than the specified maximum sequence length for this model (1155 > 512). Running this sequence through the model will result in indexing errors


Entities:
[[((7204, 83522, 620, 368, 23796), 2, 'Duke Leto Atreides', 'Peop'), ((72266, 83783), 0, 'Arrakeen', 'Loc'), ((14076, 3285), 0, 'Shield Wall', 'Loc'), ((72266, 83783), 0, 'Arrakeen', 'Loc'), ((72266, 83783, 67089, 267, 262, 29247, 377, 3237, 323, 323, 323, 881, 323, 323, 323, 1707, 260, 383, 708, 265, 262, 12601, 1430, 394, 764, 315, 4767, 305, 390, 262, 32988, 1536, 265, 262, 3568, 294, 317, 6374, 63501, 14594, 4615, 4954, 12620), 1, 'Arrakeen glowed in the haze – yellow . . . white . . . blue. He thought of the notices posted now above his signature all through the populous places of the planet: “Our Sublime Padishah Emperor', 'Org'), ((100268, 1319), 3, 'Fremen', 'Other'), ((24498, 14370), 1, 'Houses Minor', 'Org'), ((75762, 30632), 0, 'Arrakis', 'Loc'), ((72266, 83783), 0, 'Arrakeen', 'Loc'), ((1722,), 2, 'Paul', 'Peop'), ((97611, 297), 2, 'Hawat', 'Peop'), ((97611, 297, 276, 268, 23797, 260, 450, 286, 1367, 264, 413, 262, 432, 265, 312, 1441, 300, 383, 10829, 315, 761, 2

Unfortunately Iter's output is hardly human readable as there's very few instructions on how to use it.

## Summary experimentation

In [None]:
from gliner import GLiNER
from gliner.multitask import GLiNERQuestionAnswerer

model_id = 'knowledgator/gliner-multitask-v1.0'
model = GLiNER.from_pretrained(model_id)


Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 7525.67it/s]
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


[[{'answer': 'SpaceX', 'score': 0.9981264472007751}]]


In [7]:
from gliner.multitask import GLiNERSummarizer

summarizer = GLiNERSummarizer(model=model)
text = """
The Duke Leto Atreides leaned against a parapet of the landing control tower outside Arrakeen. The night’s first moon, an oblate silver coin, hung well above the southern horizon. Beneath it, the jagged cliffs of the Shield Wall shone like parched icing through a dust haze. To his left, the lights of Arrakeen glowed in the haze – yellow . . . white . . . blue.

He thought of the notices posted now above his signature all through the populous places of the planet: “Our Sublime Padishah Emperor has charged me to take possession of this planet and end all dispute.”

The ritualistic formality of it touched him with a feeling of loneliness. Who was fooled by that fatuous legalism? Not the Fremen, certainly. Nor the Houses Minor who controlled the interior trade of Arrakis . . . and were Harkonnen creatures almost to a man .

They have tried to take the life of my son!

The rage was difficult to suppress.

He saw lights of a moving vehicle coming toward the landing field from Arrakeen. He hoped it was the guard and troop carrier bringing Paul. The delay was galling even though he knew it was prompted by caution on the part of Hawat’s lieutenant.

They have tried to take the life of my son!

He shook his head to drive out the angry thoughts, glanced back at the field where five of his own frigates were posted around the rim like monolithic sentries.

Better a cautiousdelay  than . . .

The lieutenant was a good one, he reminded himself. A man marked for advancement, completely loyal.

“Our Sublime Padishah Emperor . . . ”

If the people of this decadent garrison city could only see the Emperor’s private note to his “Noble Duke” – the disdainful allusions to veiled men and women: ” – but what else is one to expect of barbarians whose dearest dream is to live outside the ordered security of the faufreluches?”

The Duke felt in this moment that his own dearest dream was to end all class distinctions and never again think of deadly order. He looked up and out of the dust at the unwinking stars, thought: Around one of those little lights circles Caladan . . . but I’ll never again see my home . The longing for Caladan was a sudden pain in his breast. He felt that it did not come from within himself, but that it reached out to him from Caladan. He could not bring himself to call this dry wasteland of Arrakis his home, and he doubted he ever would.

I must mask my feelings , he thought. For the boy’s sake. If ever he’s to have a home, this must be it. I may think of Arrakis as a hell I’ve reached before death, but he must find here that which will inspire him. There must be something .

A wave of self-pity, immediately despised and rejected, swept through him, and for some reason he found himself recalling two lines from a poem Gurney Halleck often repeated –

“My lungs taste the air of Time

Blown past falling sands . . . ”

Well, Gurney would find plenty of falling sands here, the Duke thought. The central wastelands beyond those moon-frosted cliffs were desert – barren rock, dunes, and blowing dust, an uncharted dry wilderness with here and there along its rim and perhaps scattered through it, knots of Fremen. If anything could buy a future for the Atreides line, the Fremen just might do it.

Provided the Harkonnens hadn’t managed to infect even the Fremen with their poisonous schemes.

They have tried to take the life of my son!

A scraping metal racket vibrated through the tower, shook the parapet beneath his arms. Blast shutters dropped in front of him, blocking the view.

Shuttle’s coming in , he thought. Time to go down and get to work . He turned to the stairs behind him, headed down to the big assembly room, trying to remain calm as he descended, to prepare his face for the coming encounter.

They have tried to take the life of my son!

The men were already boiling in from the field when he reached the yellow-domed room. They carried their spacebags over their shoulders, shouting and roistering like students returning from vacation.

“Hey! Feel that under your dogs? That’s gravity, man!” “How many G’s does this place pull? Feels heavy.” “Nine-tenths of a G by the book.”

The crossfire of thrown words filled the big room.

“Did you get a good look at this hole on the way down? Where’s all the loot this place’s supposed to have?” “The Harkonnens took it with ’em!” “Me for a hot shower and a soft bed!” “Haven’t you heard, stupid? No showers down here. You scrub your ass with sand!” “Hey! Can it! The Duke!”

The Duke stepped out of the stair entry into a suddenly silent room.

Gurney Halleck strode along at the point of the crowd, bag over one shoulder, the neck of his nine-string baliset clutched in the other hand. They were long-fingered hands with big thumbs, full of tiny movements that drew such delicate music from the baliset.

The Duke watched Halleck, admiring the ugly lump of a man, noting the glass-splinter eyes with their gleam of savage understanding. Here was a man who lived outside the faufreluches while obeying their every precept. What was it Paul had called him? “Gurney, the valorous .”
"""
summary = summarizer(text)
print(summary)



['They were long-fingered hands with big thumbs, full of tiny movements that drew such delicate']


The sumarizer struggles with large text. let's break it down

In [11]:
# Fix: summarizer(chunk) may return a list, so flatten or join if needed

with open("dummy.txt", "r", encoding="utf-8") as f:
    text = f.read()

def split_text_into_chunks(text, max_words=1024):
    sentences = text.split('. ')
    chunks = []
    current_chunk = ""
    for sent in sentences:
        test_chunk = (current_chunk + ". " + sent).strip() if current_chunk else sent
        if len(test_chunk) <= max_words:
            current_chunk = test_chunk
        else:
            if current_chunk:
                chunks.append(current_chunk)
            current_chunk = sent
    if current_chunk:
        chunks.append(current_chunk)
    return chunks

chunks = split_text_into_chunks(text, max_words=1024)

summaries = []
for chunk in chunks:
    summary = summarizer(chunk)
    # If summary is a list, join its elements; else, use as is
    if isinstance(summary, list):
        summaries.append(" ".join(str(s) for s in summary))
    else:
        summaries.append(str(summary))

final_summary = "\n".join(summaries)
print(final_summary)

The Duke Leto Atreides leaned against a parapet of the landing control tower outside Arrakeen. He saw lights of a moving vehicle coming toward the landing field from Arrakeen
He hoped it was the guard and troop carrier bringing Paul. They have tried to take the life of my son! The Duke felt in this moment that his own dearest dream was to end all class distinctions and never again think of deadly order
Well, Gurney would find plenty of falling sands here, the Duke thought
The central wastelands beyond those moon-frosted cliffs were desert – barren rock, dunes, and blowing dust, an uncharted dry wilderness with here and there along its rim and perhaps scattered through it, knots of Fremen. The men were already boiling in from the field when he reached the yellow-domed room
They carried their spacebags over their shoulders, shouting and roistering like students returning from vacation. Gurney Halleck strode along at the point of the crowd, bag over one shoulder, the neck of his nine-stri

It's not areal summarizer but we know it struggles gains a big quantity of text

In [12]:
# Assume model is already loaded as 'model'
summarizer = GLiNERSummarizer(model=model)

# Short text to summarize directly (no chunking needed)
text = """
The French Revolution (French: Révolution française [ʁevɔlysjɔ̃ fʁɑ̃sɛːz]) was a period of political and societal change in France which began with the Estates General of 1789 and ended with the Coup of 18 Brumaire on 9 November 1799. Many of the revolution's ideas are considered fundamental principles of liberal democracy,[1] and its values remain central to modern French political discourse.[2]

The causes of the revolution were a combination of social, political, and economic factors which the ancien régime ("old regime") proved unable to manage. A financial crisis and widespread social distress led to the convocation of the Estates General in May 1789, its first meeting since 1614. The representatives of the Third Estate broke away and re-constituted themselves as a National Assembly in June. The Storming of the Bastille in Paris on 14 July was followed by radical measures by the Assembly, among them the abolition of feudalism, state control over the Catholic Church, and a declaration of rights. The next three years were dominated by a struggle for political control. King Louis XVI's attempted flight to Varennes in June 1791 further discredited the monarchy, and military defeats after the outbreak of the French Revolutionary Wars in April 1792 led to an armed insurrection on 10 August 1792. The monarchy was replaced by the French First Republic in September, and Louis XVI was executed in January 1793.

After another revolt in June 1793, the constitution was suspended, and political power passed from the National Convention to the Committee of Public Safety, dominated by radical Jacobins led by Maximilien Robespierre. About 16,000 people were sentenced by the Revolutionary Tribunal and executed in the Reign of Terror, which ended in July 1794 with the Thermidorian Reaction. Weakened by external threats and internal opposition, the Committee of Public Safety was replaced in November 1795 by the Directory. Its instability ended in the coup of 18 Brumaire and the establishment of the Consulate, with Napoleon Bonaparte as First Consul.
"""

summary = summarizer(text)
# If summary is a list, join its elements; else, use as is
if isinstance(summary, list):
    print(" ".join(str(s) for s in summary))
else:
    print(str(summary))

The French Revolution (French: Révolution française [ʁevɔlysjɔ̃ fʁɑ̃sɛːz]) was a period of political and societal change in France which began with the Estates General of 1789 and ended with the Coup of 18 Brumaire on 9 November 1799. Its instability ended in the coup of 18 Brumaire and the establishment of the Consulate, with Napoleon Bonaparte as First Consul.
