In [6]:
import nltk

def read_ann_file(ann_file_path):
    annotations = []
    
    with open(ann_file_path, 'r',encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')  # Split by tab
            
            if len(parts) == 2:
                # Entity annotation (e.g., T1 background_claim 2417 2522 complicated)
                annotation_id, annotation_details = parts
                try:
                    entity_type, start, end, text = annotation_details.split()
                    annotations.append({
                        "id": annotation_id,
                        "type": entity_type,
                        "start": int(start),
                        "end": int(end),
                        "text": text
                    })
                except ValueError:
                    print(f"Skipping line due to unexpected format (Entity): {line.strip()}")
                    continue  # Skip this line
            
            elif len(parts) == 3:
                # Relation annotation (e.g., R1 supports Arg1:T4 Arg2:T5)
                annotation_id, relation_type, args = parts
                arguments = args.split()
                if len(arguments) != 2:
                    print(f"Skipping line due to unexpected argument count (Relation): {line.strip()}")
                    continue  # Skip this line
                
                # Split the arguments into pairs
                relation_args = {}
                for arg in arguments:
                    if ':' in arg:
                        key, value = arg.split(':')
                        relation_args[key] = value
                    else:
                        print(f"Skipping invalid argument (Relation): {arg}")
                        continue  # Skip invalid arguments
                
                annotations.append({
                    "id": annotation_id,
                    "relation_type": relation_type,
                    "arguments": relation_args
                })
    
    return annotations


In [7]:
file="A01.ann"
annotation=read_ann_file(file)

Skipping line due to unexpected argument count (Relation): T1	background_claim 2417 2522	complicated 3D character models are widely used in fields of entertainment, virtual reality, medicine etc
Skipping line due to unexpected argument count (Relation): T2	background_claim 2524 2640	The range of breathtaking realistic 3D models is only limited by the creativity of artists and resolution of devices
Skipping line due to unexpected argument count (Relation): T4	background_claim 2855 2897	a production cannot afford major revisions
Skipping line due to unexpected argument count (Relation): T5	own_claim 2961 3041	providing a flexible and efficient solution to animation remains an open problem
Skipping line due to unexpected format (Entity): R1	supports Arg1:T4 Arg2:T5
Skipping line due to unexpected argument count (Relation): T6	background_claim 3211 3307	Skeleton Subspace Deformation (SSD) is the predominant approach to character skinning at present
Skipping line due to unexpected argument 

In [3]:
import nltk

def read_ann_file(ann_file_path):
    annotations = []
    
    with open(ann_file_path, 'r',encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')  # Split by tab
            
            if len(parts) == 2:
                # Entity annotation (e.g., T1 background_claim 2417 2522 complicated)
                annotation_id, annotation_details = parts
                try:
                    entity_type, start, end, text = annotation_details.split()
                    annotations.append({
                        "id": annotation_id,
                        "type": entity_type,
                        "start": int(start),
                        "end": int(end),
                        "text": text
                    })
                except ValueError:
                    print(f"Skipping line due to unexpected format (Entity): {line.strip()}")
                    continue  # Skip this line
            
            elif len(parts) == 3:
                # Relation annotation (e.g., R1 supports Arg1:T4 Arg2:T5)
                annotation_id, relation_type, args = parts
                arguments = args.split()
                if len(arguments) != 2:
                    print(f"Skipping line due to unexpected argument count (Relation): {line.strip()}")
                    continue  # Skip this line
                
                # Split the arguments into pairs
                relation_args = {}
                for arg in arguments:
                    if ':' in arg:
                        key, value = arg.split(':')
                        relation_args[key] = value
                    else:
                        print(f"Skipping invalid argument (Relation): {arg}")
                        continue  # Skip invalid arguments
                
                annotations.append({
                    "id": annotation_id,
                    "relation_type": relation_type,
                    "arguments": relation_args
                })
    
    return annotations


In [4]:
file="A01.ann"
annotation=read_ann_file(file)

Skipping line due to unexpected argument count (Relation): T1	background_claim 2417 2522	complicated 3D character models are widely used in fields of entertainment, virtual reality, medicine etc
Skipping line due to unexpected argument count (Relation): T2	background_claim 2524 2640	The range of breathtaking realistic 3D models is only limited by the creativity of artists and resolution of devices
Skipping line due to unexpected argument count (Relation): T4	background_claim 2855 2897	a production cannot afford major revisions
Skipping line due to unexpected argument count (Relation): T5	own_claim 2961 3041	providing a flexible and efficient solution to animation remains an open problem
Skipping line due to unexpected format (Entity): R1	supports Arg1:T4 Arg2:T5
Skipping line due to unexpected argument count (Relation): T6	background_claim 3211 3307	Skeleton Subspace Deformation (SSD) is the predominant approach to character skinning at present
Skipping line due to unexpected argument 

In [5]:
annotation

[{'id': 'T24', 'relation_type': 'data 6583 6605', 'arguments': {}},
 {'id': 'T45', 'relation_type': 'background_claim 8802 8814', 'arguments': {}},
 {'id': 'T53', 'relation_type': 'data 10411 10425', 'arguments': {}},
 {'id': 'T116', 'relation_type': 'data 6364 6383', 'arguments': {}},
 {'id': 'T124', 'relation_type': 'data 4998 5012', 'arguments': {}},
 {'id': 'T169', 'relation_type': 'data 26269 26277', 'arguments': {}},
 {'id': 'T182', 'relation_type': 'data 2906 2924', 'arguments': {}},
 {'id': 'T183', 'relation_type': 'data 2928 2948', 'arguments': {}}]

In [9]:
def read_ann_file(ann_file_path):
    annotations = []
    
    with open(ann_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if not line:  # Skip empty lines
                continue
            try:
                parts = line.split('\t')
                
                if len(parts) == 2:
                    # Entity annotation (e.g., T1 Person 0 4 John)
                    annotation_id, annotation_details = parts
                    entity_details = annotation_details.split()
                    if len(entity_details) == 4:
                        entity_type, start, end, text = entity_details
                        annotations.append({
                            "id": annotation_id,
                            "type": entity_type,
                            "start": int(start),
                            "end": int(end),
                            "text": text
                        })
                    else:
                        print(f"Skipping malformed entity: {line}")
                
                elif len(parts) == 3:
                    # Relation annotation (e.g., R1 Located_in Arg1:T1 Arg2:T2)
                    annotation_id, relation_type, args = parts
                    arguments = args.split()
                    annotations.append({
                        "id": annotation_id,
                        "relation_type": relation_type,
                        "arguments": arguments
                    })
                
                else:
                    print(f"Skipping malformed line: {line}")
            except Exception as e:
                print(f"Error processing line: {line}\n{str(e)}")
    
    return annotations


In [14]:
import nltk

def read_ann_file(ann_file_path):
    annotations = []
    
    with open(ann_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')
            if len(parts) == 2:
                # Entity annotation (e.g., T1 Person 0 4 John)
                annotation_id, annotation_details = parts
                entity_type, start, end, text = annotation_details.split()
                annotations.append({
                    "id": annotation_id,
                    "type": entity_type,
                    "start": int(start),
                    "end": int(end),
                    "text": text
                })
            elif len(parts) == 3:
                # Relation annotation (e.g., R1 Located_in Arg1:T1 Arg2:T2)
                annotation_id, relation_type, args = parts
                arguments = args.split()
                annotations.append({
                    "id": annotation_id,
                    "relation_type": relation_type,
                    "arguments": arguments
                })
    
    return annotations


In [15]:
file="A01.ann"
annotation=read_ann_file(file)

ValueError: not enough values to unpack (expected 4, got 3)

In [11]:
annotation

[{'id': 'T1',
  'relation_type': 'background_claim 2417 2522',
  'arguments': ['complicated',
   '3D',
   'character',
   'models',
   'are',
   'widely',
   'used',
   'in',
   'fields',
   'of',
   'entertainment,',
   'virtual',
   'reality,',
   'medicine',
   'etc']},
 {'id': 'T2',
  'relation_type': 'background_claim 2524 2640',
  'arguments': ['The',
   'range',
   'of',
   'breathtaking',
   'realistic',
   '3D',
   'models',
   'is',
   'only',
   'limited',
   'by',
   'the',
   'creativity',
   'of',
   'artists',
   'and',
   'resolution',
   'of',
   'devices']},
 {'id': 'T4',
  'relation_type': 'background_claim 2855 2897',
  'arguments': ['a', 'production', 'cannot', 'afford', 'major', 'revisions']},
 {'id': 'T5',
  'relation_type': 'own_claim 2961 3041',
  'arguments': ['providing',
   'a',
   'flexible',
   'and',
   'efficient',
   'solution',
   'to',
   'animation',
   'remains',
   'an',
   'open',
   'problem']},
 {'id': 'T6',
  'relation_type': 'background_claim 

In [16]:
import nltk

def read_ann_file(ann_file_path):
    annotations = []
    
    with open(ann_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            parts = line.strip().split('\t')
            
            if len(parts) == 2:
                # Entity annotation (e.g., T1 Person 0 4 John)
                annotation_id, annotation_details = parts
                annotation_details_parts = annotation_details.split()
                
                # Skip lines that don't have exactly 4 parts
                if len(annotation_details_parts) != 4:
                    continue
                
                entity_type, start, end, text = annotation_details_parts
                annotations.append({
                    "id": annotation_id,
                    "type": entity_type,
                    "start": int(start),
                    "end": int(end),
                    "text": text
                })
            elif len(parts) == 3:
                # Relation annotation (e.g., R1 Located_in Arg1:T1 Arg2:T2)
                annotation_id, relation_type, args = parts
                arguments = args.split()
                annotations.append({
                    "id": annotation_id,
                    "relation_type": relation_type,
                    "arguments": arguments
                })
    
    return annotations


In [17]:
file="A01.ann"
ann=read_ann_file(file)