In [19]:
import pandas as pd # We use pandas to manipulate the data
import json  # Load the data in json
import numpy as np

In [20]:
json_path = 'data/round2/accord-r2-b_annotations.json'
majority_vote = 2

classes_output = 'data/round2/accord-r1-b-classes.csv'
spans_output = 'data/round2/accord-r1-b-tags.csv'

In [21]:
results = json.load(open(json_path))  # Downloaded results
examples = results['examples']

In [22]:
# any change to the column order would affect class_columm_backward calculation in sentence level
Examples = pd.DataFrame(examples,columns=['example_id', 'content','metadata'])
Examples.head()

Unnamed: 0,example_id,content,metadata
0,eeb4239f-3819-475c-ad20-756bed80c4ff,"In reference heat loss calculations, the value...",{'ID': '151_Finnish_EnergyEfficiency'}
1,efd2d371-97de-4d5e-af06-e404ff71f316,The impact of the ground and crawl space on he...,{'ID': '91_Finnish_EnergyEfficiency'}
2,efe70fd9-5cdd-4586-9ebc-73c2960b1e00,"If a fire door is kept open in normal use, it ...",{'ID': '120_Finnish_FireSafety'}
3,f19c920a-bd5a-49ac-a111-69fb944271b4,The net heating energy demand for ventilation ...,{'ID': '86_Finnish_EnergyEfficiency'}
4,f439dcde-3cf9-4b40-b555-a2107ae5559f,If the circulation piping is located insi...,{'ID': '104_Finnish_EnergyEfficiency'}


# Sentence Level
https://guide.lighttag.io/in-depth/working_with_results/#Classifications

In [5]:
all_classifications = sum(map(lambda x:x['classifications'],examples),[])
all_classifications[0]

{'correct': None,
 'class_id': '83c313b8-6590-4a77-93e7-ef9f91a6cc93',
 'reviewed': False,
 'classname': 'complete-regulatory',
 'example_id': '1e98cae3-27a1-4a20-82c8-7d0d1310da11',
 'classified_by': [{'annotator': 'hansi.hettiarachchi@bcu.ac.uk',
   'timestamp': '2023-03-30T08:55:51.012+00:00',
   'annotator_id': 1},
  {'annotator': 'amna.dridi@bcu.ac.uk',
   'timestamp': '2023-03-29T19:04:46.024+00:00',
   'annotator_id': 3}],
 'definition_id': '48c2f559-a29a-4cf1-b904-7d080c3bee7e'}

In [6]:
Classifications = pd.io.json.json_normalize(all_classifications,meta=['example_id','classname'],record_path='classified_by')
Classifications.head()

  Classifications = pd.io.json.json_normalize(all_classifications,meta=['example_id','classname'],record_path='classified_by')


Unnamed: 0,annotator,timestamp,annotator_id,example_id,classname
0,hansi.hettiarachchi@bcu.ac.uk,2023-03-30T08:55:51.012+00:00,1,1e98cae3-27a1-4a20-82c8-7d0d1310da11,complete-regulatory
1,amna.dridi@bcu.ac.uk,2023-03-29T19:04:46.024+00:00,3,1e98cae3-27a1-4a20-82c8-7d0d1310da11,complete-regulatory
2,amna.dridi@bcu.ac.uk,2023-03-29T19:38:04.74+00:00,3,5dad4733-1522-4d40-b38e-815629c33073,complete-regulatory
3,hansi.hettiarachchi@bcu.ac.uk,2023-03-30T12:51:09.489+00:00,1,5dad4733-1522-4d40-b38e-815629c33073,complete-regulatory
4,amna.dridi@bcu.ac.uk,2023-03-29T19:53:06.397+00:00,3,fbfb9d28-29e2-4066-9012-db971816ef3c,other


In [7]:
ClassPivot = Classifications.pivot_table(index='example_id',columns='classname',values='annotator_id',aggfunc=len).fillna(0)
ClassPivot.head()

classname,complete-regulatory,other
example_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0cdc1d50-e346-469a-8528-546e35cf5b95,2.0,0.0
1e98cae3-27a1-4a20-82c8-7d0d1310da11,2.0,0.0
1f9970ac-b9a8-4e15-bdc9-7a247e0356b5,2.0,0.0
40b22cfb-7d06-4d18-972b-2706d689d552,0.0,2.0
48361556-520b-4a1f-9338-11d5292a6360,2.0,0.0


In [8]:
ExamplesWithClasses = pd.merge(Examples,ClassPivot,on='example_id')
ExamplesWithClasses.head()

Unnamed: 0,example_id,content,metadata,complete-regulatory,other
0,1e98cae3-27a1-4a20-82c8-7d0d1310da11,The structure of the exterior wall against the...,{'ID': '10_Finnish_Health-Humidity'},2.0,0.0
1,5dad4733-1522-4d40-b38e-815629c33073,in an evacuation area where the maximum number...,{'ID': '8_Finnish_FireSafety'},2.0,0.0
2,fbfb9d28-29e2-4066-9012-db971816ef3c,The selection of relevant critical situations ...,{'ID': '1_UK_Construction'},0.0,2.0
3,48361556-520b-4a1f-9338-11d5292a6360,A building with dwellings or accommodatio...,{'ID': '4_Finnish_AcousticEnvironment'},2.0,0.0
4,8315bdda-89cb-4351-94ce-31b074d3c4af,Distances may be exceeded by 20 per cent in a ...,{'ID': '6_Finnish_FireSafety'},2.0,0.0


In [9]:
class_columm_backward = -(ExamplesWithClasses.shape[1] - (ExamplesWithClasses.columns.get_loc("metadata") + 1))
class_columm_backward

-2

In [10]:
Classes = ExamplesWithClasses.iloc[:,class_columm_backward:]
Classes.head(5)

Unnamed: 0,complete-regulatory,other
0,2.0,0.0
1,2.0,0.0
2,0.0,2.0
3,2.0,0.0
4,2.0,0.0


In [11]:
ExamplesWithClasses['class'] = Classes.idxmax(axis=1)
ExamplesWithClasses.head(5)

Unnamed: 0,example_id,content,metadata,complete-regulatory,other,class
0,1e98cae3-27a1-4a20-82c8-7d0d1310da11,The structure of the exterior wall against the...,{'ID': '10_Finnish_Health-Humidity'},2.0,0.0,complete-regulatory
1,5dad4733-1522-4d40-b38e-815629c33073,in an evacuation area where the maximum number...,{'ID': '8_Finnish_FireSafety'},2.0,0.0,complete-regulatory
2,fbfb9d28-29e2-4066-9012-db971816ef3c,The selection of relevant critical situations ...,{'ID': '1_UK_Construction'},0.0,2.0,other
3,48361556-520b-4a1f-9338-11d5292a6360,A building with dwellings or accommodatio...,{'ID': '4_Finnish_AcousticEnvironment'},2.0,0.0,complete-regulatory
4,8315bdda-89cb-4351-94ce-31b074d3c4af,Distances may be exceeded by 20 per cent in a ...,{'ID': '6_Finnish_FireSafety'},2.0,0.0,complete-regulatory


In [12]:
ExamplesWithClasses['vote'] = Classes.max(axis=1)
ExamplesWithClasses.head(5)

Unnamed: 0,example_id,content,metadata,complete-regulatory,other,class,vote
0,1e98cae3-27a1-4a20-82c8-7d0d1310da11,The structure of the exterior wall against the...,{'ID': '10_Finnish_Health-Humidity'},2.0,0.0,complete-regulatory,2.0
1,5dad4733-1522-4d40-b38e-815629c33073,in an evacuation area where the maximum number...,{'ID': '8_Finnish_FireSafety'},2.0,0.0,complete-regulatory,2.0
2,fbfb9d28-29e2-4066-9012-db971816ef3c,The selection of relevant critical situations ...,{'ID': '1_UK_Construction'},0.0,2.0,other,2.0
3,48361556-520b-4a1f-9338-11d5292a6360,A building with dwellings or accommodatio...,{'ID': '4_Finnish_AcousticEnvironment'},2.0,0.0,complete-regulatory,2.0
4,8315bdda-89cb-4351-94ce-31b074d3c4af,Distances may be exceeded by 20 per cent in a ...,{'ID': '6_Finnish_FireSafety'},2.0,0.0,complete-regulatory,2.0


In [31]:
# ExamplesWithClasses.to_csv(classes_output, encoding='utf-8', index=False)

In [13]:
ExamplesWithClasses['decision'] = np.where(ExamplesWithClasses['vote'] >= majority_vote, 'finalised', 'pending')
ExamplesWithClasses.sort_values(by=['example_id'], inplace=True, ignore_index=True)
ExamplesWithClasses.head()

Unnamed: 0,example_id,content,metadata,complete-regulatory,other,class,vote,decision
0,0cdc1d50-e346-469a-8528-546e35cf5b95,The wall ties should have a horizontal spacing...,{'ID': '2_UK_Construction'},2.0,0.0,complete-regulatory,2.0,finalised
1,1e98cae3-27a1-4a20-82c8-7d0d1310da11,The structure of the exterior wall against the...,{'ID': '10_Finnish_Health-Humidity'},2.0,0.0,complete-regulatory,2.0,finalised
2,1f9970ac-b9a8-4e15-bdc9-7a247e0356b5,"The width of an exit must be at least 1,200 mi...",{'ID': '7_Finnish_FireSafety'},2.0,0.0,complete-regulatory,2.0,finalised
3,40b22cfb-7d06-4d18-972b-2706d689d552,Areas belonging to fire load category le...,{'ID': '5_Finnish_FireSafety'},0.0,2.0,other,2.0,finalised
4,48361556-520b-4a1f-9338-11d5292a6360,A building with dwellings or accommodatio...,{'ID': '4_Finnish_AcousticEnvironment'},2.0,0.0,complete-regulatory,2.0,finalised


In [14]:
ExamplesWithClasses.to_csv(classes_output, encoding='utf-8', index=False)

# Span Level
https://guide.lighttag.io/in-depth/working_with_results/#Annotations

In [23]:
all_annotations = sum(map(lambda x:x['annotations'],examples),[])

In [24]:
Annotations = pd.DataFrame(all_annotations)
Annotations.pivot_table(index='value',columns='tag',values='example_id',aggfunc=len).fillna(0).head()

tag,object,property,quality,value
value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
15,0.0,0.0,0.0,1.0
15 of the sum total of the floor plate areas of the buildings storeys that are fully or in part above the ground,0.0,0.0,0.0,1.0
2.0 m3/(h m2),0.0,0.0,0.0,1.0
200 m2,0.0,0.0,0.0,1.0
25,0.0,0.0,0.0,1.0


In [25]:
IAAData = pd.io.json.json_normalize(all_annotations,meta=['tagged_token_id','example_id','tag','value', 'start', 'end'],
                                                          record_path='annotated_by')
IAAData.head()

  IAAData = pd.io.json.json_normalize(all_annotations,meta=['tagged_token_id','example_id','tag','value', 'start', 'end'],


Unnamed: 0,annotator,timestamp,annotator_id,tagged_token_id,example_id,tag,value,start,end
0,maria.hedblom@ju.se,2023-04-28T11:32:53.116+00:00,4,e462555a-a413-469f-b3b1-4a165a3ee2ba,eeb4239f-3819-475c-ad20-756bed80c4ff,object,buildings ventilation system,130,158
1,pooyan.parsafard@gmail.com,2023-04-27T22:13:27.199+00:00,6,e462555a-a413-469f-b3b1-4a165a3ee2ba,eeb4239f-3819-475c-ad20-756bed80c4ff,object,buildings ventilation system,130,158
2,hansi.hettiarachchi@bcu.ac.uk,2023-04-28T18:57:08.095+00:00,1,e8668ccd-0035-47a5-acaa-fbf9f045d2d5,eeb4239f-3819-475c-ad20-756bed80c4ff,quality,In reference heat loss calculations,0,35
3,maria.hedblom@ju.se,2023-04-28T11:32:15.284+00:00,4,10edc284-0858-45a4-98ea-4131f5f686ba,eeb4239f-3819-475c-ad20-756bed80c4ff,property,annual efficiency of heat recovery,59,93
4,hansi.hettiarachchi@bcu.ac.uk,2023-04-28T18:57:01.385+00:00,1,31628a5b-2adf-4407-bb99-df9b09ab62d6,eeb4239f-3819-475c-ad20-756bed80c4ff,object,ventilation system,140,158


In [26]:
Annotations = IAAData[['example_id', 'tagged_token_id', 'annotator_id', 'start', 'end', 'value', 'tag']]
Annotations.head()

Unnamed: 0,example_id,tagged_token_id,annotator_id,start,end,value,tag
0,eeb4239f-3819-475c-ad20-756bed80c4ff,e462555a-a413-469f-b3b1-4a165a3ee2ba,4,130,158,buildings ventilation system,object
1,eeb4239f-3819-475c-ad20-756bed80c4ff,e462555a-a413-469f-b3b1-4a165a3ee2ba,6,130,158,buildings ventilation system,object
2,eeb4239f-3819-475c-ad20-756bed80c4ff,e8668ccd-0035-47a5-acaa-fbf9f045d2d5,1,0,35,In reference heat loss calculations,quality
3,eeb4239f-3819-475c-ad20-756bed80c4ff,10edc284-0858-45a4-98ea-4131f5f686ba,4,59,93,annual efficiency of heat recovery,property
4,eeb4239f-3819-475c-ad20-756bed80c4ff,31628a5b-2adf-4407-bb99-df9b09ab62d6,1,140,158,ventilation system,object


In [27]:
Temp1 = Annotations.groupby('tagged_token_id').annotator_id.apply(set).reset_index()

Temp2 = Annotations.groupby('tagged_token_id').annotator_id.count().reset_index()
Temp2 = Temp2.rename(columns={"annotator_id": "vote"})

Vote = pd.merge(Temp1,Temp2,on='tagged_token_id')
Vote.head()

Unnamed: 0,tagged_token_id,annotator_id,vote
0,01b35266-4c3f-47b8-b822-4c3f23777d42,{4},1
1,02ba1bcb-aa03-40ae-9d63-3eddb0102d11,{6},1
2,0359f9a0-73bb-4101-834b-6d7539c392b1,{4},1
3,03b58272-bdd1-4182-9860-06d6df39111a,{4},1
4,0506bc11-8fe3-4eba-8f65-ad379c5c06da,"{4, 6}",2


In [28]:
Spans = Annotations.drop(['annotator_id'], axis=1)
Spans.drop_duplicates(inplace=True)
print(Spans.shape)
Spans.head()

(247, 6)


Unnamed: 0,example_id,tagged_token_id,start,end,value,tag
0,eeb4239f-3819-475c-ad20-756bed80c4ff,e462555a-a413-469f-b3b1-4a165a3ee2ba,130,158,buildings ventilation system,object
2,eeb4239f-3819-475c-ad20-756bed80c4ff,e8668ccd-0035-47a5-acaa-fbf9f045d2d5,0,35,In reference heat loss calculations,quality
3,eeb4239f-3819-475c-ad20-756bed80c4ff,10edc284-0858-45a4-98ea-4131f5f686ba,59,93,annual efficiency of heat recovery,property
4,eeb4239f-3819-475c-ad20-756bed80c4ff,31628a5b-2adf-4407-bb99-df9b09ab62d6,140,158,ventilation system,object
5,eeb4239f-3819-475c-ad20-756bed80c4ff,793914fd-7ea1-45f9-990b-0baaa34f41ec,130,139,buildings,object


In [29]:
SpanVotes = pd.merge(Spans, Vote, on='tagged_token_id')
print(SpanVotes.shape)
SpanVotes.head(20)

(247, 8)


Unnamed: 0,example_id,tagged_token_id,start,end,value,tag,annotator_id,vote
0,eeb4239f-3819-475c-ad20-756bed80c4ff,e462555a-a413-469f-b3b1-4a165a3ee2ba,130,158,buildings ventilation system,object,"{4, 6}",2
1,eeb4239f-3819-475c-ad20-756bed80c4ff,e8668ccd-0035-47a5-acaa-fbf9f045d2d5,0,35,In reference heat loss calculations,quality,{1},1
2,eeb4239f-3819-475c-ad20-756bed80c4ff,10edc284-0858-45a4-98ea-4131f5f686ba,59,93,annual efficiency of heat recovery,property,{4},1
3,eeb4239f-3819-475c-ad20-756bed80c4ff,31628a5b-2adf-4407-bb99-df9b09ab62d6,140,158,ventilation system,object,{1},1
4,eeb4239f-3819-475c-ad20-756bed80c4ff,793914fd-7ea1-45f9-990b-0baaa34f41ec,130,139,buildings,object,{1},1
5,eeb4239f-3819-475c-ad20-756bed80c4ff,7a0d3f18-a514-4043-bbfa-f8ef610b0d91,3,35,reference heat loss calculations,quality,{6},1
6,eeb4239f-3819-475c-ad20-756bed80c4ff,7f1ae410-339c-45c8-bb31-c4fb9373bae8,59,122,annual efficiency of heat recovery from ventil...,property,"{1, 6}",2
7,eeb4239f-3819-475c-ad20-756bed80c4ff,89f9f8f8-e677-458c-b6e0-8f0b7ee0c625,3,35,reference heat loss calculations,object,{4},1
8,eeb4239f-3819-475c-ad20-756bed80c4ff,e2d1b75a-f90c-4637-a880-6448770535a3,168,170,55,value,"{1, 4, 6}",3
9,efd2d371-97de-4d5e-af06-e404ff71f316,d349f07c-9f86-47db-a512-b4fdfac2c856,44,53,heat loss,property,{4},1


In [30]:
ExamplesWithAnnotations = pd.merge(Examples,SpanVotes,on='example_id')
ExamplesWithAnnotations.sort_values(by=['example_id', 'start'], inplace=True, ignore_index=True)
ExamplesWithAnnotations.head()

Unnamed: 0,example_id,content,metadata,tagged_token_id,start,end,value,tag,annotator_id,vote
0,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},8bd0ea1f-7fff-42fc-ab0a-8ce9bd6f40f7,4,11,various,quality,{6},1
1,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},f0f867fc-95a8-4b37-baba-ce4082b0c3b5,12,19,storeys,object,"{4, 6}",2
2,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},24ada91a-9e8e-402a-a63c-f798424c752c,12,19,storeys,property,{1},1
3,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},c3d675ef-0834-4c90-90c1-82970da830f7,21,37,basement storeys,property,{1},1
4,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},46e053e9-d2a2-4375-80ea-1aa449bd76cf,21,29,basement,quality,{6},1


In [31]:
# ExamplesWithAnnotations['decision'] = np.where(ExamplesWithAnnotations['vote'] >= majority_vote, 'finalised', 'pending')
# ExamplesWithAnnotations.sort_values(by=['example_id', 'start'], inplace=True, ignore_index=True)

FinalisedAnnotations = ExamplesWithAnnotations[ExamplesWithAnnotations['vote'] >= majority_vote].copy()
FinalisedAnnotations.sort_values(by=['example_id', 'start'], inplace=True, ignore_index=True)
FinalisedAnnotations['decision'] = ['finalised' for i in range(0, FinalisedAnnotations.shape[0])]
print(FinalisedAnnotations.shape)
FinalisedAnnotations.head()

(59, 11)


Unnamed: 0,example_id,content,metadata,tagged_token_id,start,end,value,tag,annotator_id,vote,decision
0,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},f0f867fc-95a8-4b37-baba-ce4082b0c3b5,12,19,storeys,object,"{4, 6}",2,finalised
1,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},d44fb976-39fc-4371-a076-1414fc1735f8,46,51,attic,object,"{1, 4, 6}",3,finalised
2,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},c5c40d8a-938a-4919-98c8-39663ff437d6,95,165,made into separate fire compartments (fire com...,quality,"{1, 6}",2,finalised
3,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},8300e08e-e84c-405c-b8be-907376d7bc7d,215,228,fire class P3,object,"{1, 4}",2,finalised
4,103f99ad-884b-4bae-9f22-ccb1c5bc3d26,In an open garage of no more than five storeys...,{'ID': '110_Finnish_FireSafety'},8e43a172-80ca-49be-90e1-a725ffeed872,6,10,open,quality,"{1, 4, 6}",3,finalised


In [32]:
PendingAnnotations = ExamplesWithAnnotations[ExamplesWithAnnotations['vote'] < majority_vote].copy()
PendingAnnotations.sort_values(by=['example_id', 'start'], inplace=True, ignore_index=True)
PendingAnnotations['decision'] = ['pending' for i in range(0, PendingAnnotations.shape[0])]
print(PendingAnnotations.shape)
PendingAnnotations.head(10)

(188, 11)


Unnamed: 0,example_id,content,metadata,tagged_token_id,start,end,value,tag,annotator_id,vote,decision
0,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},8bd0ea1f-7fff-42fc-ab0a-8ce9bd6f40f7,4,11,various,quality,{6},1,pending
1,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},24ada91a-9e8e-402a-a63c-f798424c752c,12,19,storeys,property,{1},1,pending
2,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},c3d675ef-0834-4c90-90c1-82970da830f7,21,37,basement storeys,property,{1},1,pending
3,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},46e053e9-d2a2-4375-80ea-1aa449bd76cf,21,29,basement,quality,{6},1,pending
4,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},53b05c45-b304-43b7-b159-f649cf8f00f8,21,37,basement storeys,object,{4},1,pending
5,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},ab239ad4-425d-4e7c-bbd8-bfe101eb42e4,30,37,storeys,object,{6},1,pending
6,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},bef52f33-110d-424a-9899-ab9eb9d485c6,57,86,P1 and P2 fire class building,object,{4},1,pending
7,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},edab285d-b575-438c-ba51-eca3cf0cce37,57,59,P1,object,{6},1,pending
8,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},69fb373c-c9f1-40bc-8bd7-0dc3e59fe910,57,77,P1 and P2 fire class,quality,{1},1,pending
9,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},575eefad-f9c4-4312-9932-52db3ac27010,64,66,P2,object,{6},1,pending


In [33]:
# recognise annotations that are already covered by majority votes
for index, row in PendingAnnotations.iterrows():
  if row['example_id'] != id:
    id = row['example_id']
    temp_df = FinalisedAnnotations[FinalisedAnnotations['example_id'] == row['example_id']]
    ranges = []
    for temp_index, temp_row in temp_df.iterrows():
      ranges.append(range(temp_row['start'], temp_row['end']))
  r1 = range(row['start'], row['end'])
  has_overlap = False
  # if a pending annotation gets overlaps with a finished annotation, mark the pending as covered
  for r in ranges:
    overlaps = set(r1).intersection(r)
    if len(overlaps) > 0:
      has_overlap = True
      break
  if has_overlap:
    PendingAnnotations.at[index, 'decision'] = 'covered'

PendingAnnotations.head(10)

Unnamed: 0,example_id,content,metadata,tagged_token_id,start,end,value,tag,annotator_id,vote,decision
0,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},8bd0ea1f-7fff-42fc-ab0a-8ce9bd6f40f7,4,11,various,quality,{6},1,pending
1,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},24ada91a-9e8e-402a-a63c-f798424c752c,12,19,storeys,property,{1},1,covered
2,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},c3d675ef-0834-4c90-90c1-82970da830f7,21,37,basement storeys,property,{1},1,pending
3,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},46e053e9-d2a2-4375-80ea-1aa449bd76cf,21,29,basement,quality,{6},1,pending
4,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},53b05c45-b304-43b7-b159-f649cf8f00f8,21,37,basement storeys,object,{4},1,pending
5,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},ab239ad4-425d-4e7c-bbd8-bfe101eb42e4,30,37,storeys,object,{6},1,pending
6,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},bef52f33-110d-424a-9899-ab9eb9d485c6,57,86,P1 and P2 fire class building,object,{4},1,pending
7,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},edab285d-b575-438c-ba51-eca3cf0cce37,57,59,P1,object,{6},1,pending
8,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},69fb373c-c9f1-40bc-8bd7-0dc3e59fe910,57,77,P1 and P2 fire class,quality,{1},1,pending
9,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},575eefad-f9c4-4312-9932-52db3ac27010,64,66,P2,object,{6},1,pending


In [34]:
ProcessedAnnotations = pd.concat([FinalisedAnnotations, PendingAnnotations])
ProcessedAnnotations.sort_values(by=['example_id', 'start'], inplace=True, ignore_index=True)
ProcessedAnnotations.head()

Unnamed: 0,example_id,content,metadata,tagged_token_id,start,end,value,tag,annotator_id,vote,decision
0,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},8bd0ea1f-7fff-42fc-ab0a-8ce9bd6f40f7,4,11,various,quality,{6},1,pending
1,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},f0f867fc-95a8-4b37-baba-ce4082b0c3b5,12,19,storeys,object,"{4, 6}",2,finalised
2,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},24ada91a-9e8e-402a-a63c-f798424c752c,12,19,storeys,property,{1},1,covered
3,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},c3d675ef-0834-4c90-90c1-82970da830f7,21,37,basement storeys,property,{1},1,pending
4,00f47e54-1414-4199-8909-3a9cc10f9309,"The various storeys, basement storeys and the ...",{'ID': '101_Finnish_FireSafety'},46e053e9-d2a2-4375-80ea-1aa449bd76cf,21,29,basement,quality,{6},1,pending


In [35]:
ProcessedAnnotations.to_csv(spans_output, encoding='utf-8', index=False)