In [48]:
import re
from collections import defaultdict, Counter

In [49]:
class ConllEditor:
    def __init__(self, file_path):
        self.file_path = file_path
        self.data = self._load_data()

    def _load_data(self):
        with open(self.file_path, 'r') as f:
            lines = f.readlines()
        return [line.strip() for line in lines]

    def view_annotations(self):
        annotations = [line for line in self.data if line and not line.startswith("-DOCSTART-")]
        for annotation in annotations:
            print(annotation)
    
    def label_stats(self):
        label_counter = Counter()
        for line in self.data:
            if line and not line.startswith("-DOCSTART-"):
                label = line.split()[-1]
                label_counter[label] += 1
        for label, count in label_counter.items():
            print(f"Label: {label}, Count: {count}")
    
    def search_by_label(self, label):
        matches = [line for line in self.data if line.endswith(label)]
        for match in matches:
            print(match)
    
    def remove_label(self, label_to_remove):
        new_data = []
        for line in self.data:
            if line.endswith(label_to_remove):
                new_data.append(re.sub(rf"\s{label_to_remove}$", " O", line))
            else:
                new_data.append(line)
        self.data = new_data
        print(f"Label '{label_to_remove}' removed.")
    
    def merge_labels(self, labels_to_merge, new_label):
        new_data = []
        for line in self.data:
            if any(line.endswith(label) for label in labels_to_merge):
                new_data.append(re.sub(rf"\s({'|'.join(labels_to_merge)})$", f" {new_label}", line))
            else:
                new_data.append(line)
        self.data = new_data
        print(f"Labels {labels_to_merge} merged into '{new_label}'.")
    
    def rename_labels(self, label_mapping):
        new_data = []
        for line in self.data:
            if line and not line.startswith("-DOCSTART-"):
                parts = line.split()
                if parts:
                    label = parts[-1]
                    if label in label_mapping:
                        parts[-1] = label_mapping[label]
                    new_data.append(" ".join(parts))
            else:
                new_data.append(line)
        self.data = new_data
        print(f"Labels renamed according to {label_mapping}.")

    def save(self, output_path):
        with open(output_path, 'w') as f:
            f.write("\n".join(self.data) + "\n")
        print(f"Updated file saved to {output_path}")

In [50]:
# Initialize the editor with the CoNLL file path
editor = ConllEditor(r'c:\Users\Sakib Ahmed\Desktop\yours.conll')

In [51]:
# 1. View Annotations
editor.view_annotations()

IntentÃ³ -X- _ O
un -X- _ O
sistema -X- _ O
de -X- _ O
centro -X- _ O
que -X- _ O
naturalmente -X- _ O
no -X- _ O
era -X- _ O
el -X- _ O
mismo -X- _ O
que -X- _ O
ahora -X- _ O
, -X- _ O
puntualizÃ³ -X- _ O
, -X- _ O
pero -X- _ O
que -X- _ O
logrÃ³ -X- _ O
una -X- _ O
sÃ­ntesis -X- _ O
entre -X- _ O
dos -X- _ O
posiciones -X- _ O
dialÃ©cticas -X- _ O
e -X- _ O
intentÃ³ -X- _ O
evitar -X- _ O
choques -X- _ O
entre -X- _ O
posiciones -X- _ O
contrarias -X- _ O
. -X- _ O
Por -X- _ O
otra -X- _ O
parte -X- _ O
Narbona -X- _ B-PER
criticÃ³ -X- _ O
" -X- _ O
el -X- _ O
retraso -X- _ O
de -X- _ O
las -X- _ O
obras -X- _ O
para -X- _ O
la -X- _ O
construcciÃ³n -X- _ O
de -X- _ O
la -X- _ O
planta -X- _ O
de -X- _ O
secado -X- _ O
tÃ©rmico -X- _ O
de -X- _ O
lodos -X- _ O
de -X- _ O
la -X- _ O
Depuradora -X- _ B-MISC
Sur -X- _ I-MISC
" -X- _ O
. -X- _ O
- -X- _ O
- -X- _ O
Ara -X- _ B-PER
, -X- _ O
que -X- _ O
calificÃ³ -X- _ O
de -X- _ O
" -X- _ O
crÃ­tico -X- _ O
" -X- _ O
el -X- _ O
momento 

In [52]:
# 2. Label Statistics
editor.label_stats()

Label: O, Count: 182
Label: B-PER, Count: 6
Label: B-MISC, Count: 3
Label: I-MISC, Count: 4
Label: B-ORG, Count: 4
Label: I-ORG, Count: 4
Label: I-PER, Count: 2
Label: B-LOC, Count: 4
Label: I-LOC, Count: 4


In [53]:
# 3. Search Annotations with a specific label
editor.search_by_label('B-PER')

Narbona -X- _ B-PER
Ara -X- _ B-PER
Fernando -X- _ B-PER
Solari -X- _ B-PER
Kiko -X- _ B-PER
Anna -X- _ B-PER


In [54]:
# 4. Remove specific label
editor.remove_label('B-PER')

Label 'B-PER' removed.


In [55]:
# Rechecking Label Statistics
editor.label_stats()

Label: O, Count: 188
Label: B-MISC, Count: 3
Label: I-MISC, Count: 4
Label: B-ORG, Count: 4
Label: I-ORG, Count: 4
Label: I-PER, Count: 2
Label: B-LOC, Count: 4
Label: I-LOC, Count: 4


In [56]:
# 5. Merge multiple labels into one
editor.merge_labels(['B-MISC', 'I-MISC', 'B-ORG'], 'C-MISC')

Labels ['B-MISC', 'I-MISC', 'B-ORG'] merged into 'C-MISC'.


In [57]:
# Rechecking Label Statistics
editor.label_stats()

Label: O, Count: 188
Label: C-MISC, Count: 11
Label: I-ORG, Count: 4
Label: I-PER, Count: 2
Label: B-LOC, Count: 4
Label: I-LOC, Count: 4


In [60]:
# 6. Rename labels based on JSON mapping
editor.rename_labels({
    'I-PER':'A-MISC',
    'B-LOC':'A-LOC'
})

Labels renamed according to {'I-PER': 'A-MISC', 'B-LOC': 'A-LOC'}.


In [61]:
# Rechecking Label Statistics
editor.label_stats()

Label: O, Count: 188
Label: C-MISC, Count: 11
Label: I-ORG, Count: 4
Label: A-MISC, Count: 2
Label: A-LOC, Count: 4
Label: I-LOC, Count: 4


In [62]:
# Save the updated CoNLL file
editor.save('updated_conll_file.conll')

Updated file saved to updated_conll_file.conll
