In [None]:
import os

# get the current working directory
current_working_directory = os.getcwd()

# print output to the console
print(current_working_directory)

/content


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls /content/drive/Shareddrives/Folklore/'Folklore and Fauna Project'/data/Annotated
print("=" * 50)
!ls /content/drive/Shareddrives/Folklore/'Folklore and Fauna Project'/data/Annotated/'Folktales v2 Emily'
print("=" * 50)
!ls /content/drive/Shareddrives/Folklore/'Folklore and Fauna Project'/data/Annotated/'Folktales v2 Kasey'
print("=" * 50)
!ls /content/drive/Shareddrives/Folklore/'Folklore and Fauna Project'/data/Annotated/'Folktales v2 Keer'
print("=" * 50)
!ls /content/drive/Shareddrives/Folklore/'Folklore and Fauna Project'/data/Annotated/'Folktales v2 Joyce'

'Folktales v2 Emily'  'Folktales v2 Joyce.tar.gz'  'Folktales v2 Keer'
'Folktales v2 Joyce'  'Folktales v2 Kasey'	   'Folktales v2_Keer.tar.gz'
annotation.conf  Cherokee  Filipino  Japanese  Maori  Seneca
annotation.conf  annotation.confï€ºZone.Identifier  Cherokee  Filipino  Japanese	Maori  Seneca
annotation.conf  Cherokee  Filipino  Japanese  Maori  Seneca
annotation.conf  Cherokee  Filipino  Japanese  Maori  Seneca


In [None]:
def dict_of_annotations(path):
  '''
  path: filepath for the .ann file
  return: a dictionary for the annotation file that includes all of the flora and fauna in the file, and a tuple of their motifs
  return: {flora/fauna: tuple(protag/antag, good/evil)}
  '''
  file_dict = {}  # dict to return
  tag_dict = {}  # dict mapping each tag to its flora/fauna
  motif_tuple = ()  # tuple of motifs for each flora/fauna
  prev_attributed_tag = None # saving the tag aka flora/fauna that has the attribute in case the next attribute doesn't match

  # open the file
  with open(path, 'r') as file:
    for line in file:

      if line[0] == 'T':
        # add all the flora/fauna to the dictionary
        file_dict[line.split()[1]] = None
        tag_dict[line.split()[0]] = line.split()[1]

      elif line[0] == 'A':
        # map the motifs as tuples (protag/antag, good/evil) to their animal
        curr_attributed_tag = line.split()[2]
        # edgecase: if the annotator only added one of the two motifs for a tag
        if prev_attributed_tag != curr_attributed_tag and len(motif_tuple) != 0:
          file_dict[tag_dict[prev_attributed_tag]] = motif_tuple
          motif_tuple = ()
        # if the tuple for this flora/fauna is currently empty
        if len(motif_tuple) == 0:
          motif_tuple = (line.split()[-1],)
        # if it just has one, add the good/evil feature, add the tuple to the dict then wipe the tuple
        elif len(motif_tuple) == 1:
          motif_tuple += (line.split()[-1],)
          file_dict[tag_dict[curr_attributed_tag]] = motif_tuple
          motif_tuple = ()
        prev_attributed_tag = curr_attributed_tag

    return file_dict

In [None]:
import os
import pprint

annotator_pairs = [("Emily", "Kasey"), ("Keer", "Joyce")]
cultures = ["Cherokee", "Filipino", "Maori", "Seneca", "Japanese"]
ann_file_dict = {}

ann_dir = "/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily"

for root, dirs, files in os.walk(ann_dir):
  print(root)
  for file in files:
    filename = os.path.join(root, file)



/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Filipino
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Cherokee
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Seneca
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Japanese
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Maori


In [None]:
import os
import pprint

annotator_pairs = [("Emily", "Kasey"), ("Keer", "Joyce")]
cultures = ["Cherokee", "Filipino", "Maori", "Seneca", "Japanese"]
ann_file_dict = {}

for annotator_pair in annotator_pairs:
    ann_file_dict[annotator_pair] = []
    ann_dir = f"/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 {annotator_pair[0]}"
    for root, dirs, files in os.walk(ann_dir):
        for file in files:
          if file.endswith(".ann"):
            filename = os.path.join(root, file)
            ann_file_dict[annotator_pair].append((filename, filename.replace(annotator_pair[0], annotator_pair[1])))

def fix_annotation(annotation):
  roles = ["Protagonist", "Default", "Antagonist"]
  alignments = ["Good", "Neutral", "Evil"]

  if annotation[0] in roles:
    return (annotation[0], "Neutral")
  if annotation[0] in alignments:
    return ("Default", annotation[0])

#cohen's kappa
def count_agreements(file1, file2, verbose = True):
  if verbose: print(file1)

  role_agreements = 0
  alignment_agreements = 0
  total = 0

  ann_dict1 = dict_of_annotations(file1)
  ann_dict2 = dict_of_annotations(file2)

  for key in set(ann_dict1.keys()) | set(ann_dict2.keys()):

    #ignore mutual missed annotations (both did not)
    if (key not in ann_dict1.keys() or (not ann_dict1[key])) and (key not in ann_dict2.keys() or (not ann_dict2[key])): continue

    #assume missing values if anns did not annotate
    if key not in ann_dict1.keys() or (not ann_dict1[key]): ann_dict1[key] = ('Default', 'Neutral')
    if key not in ann_dict2.keys() or (not ann_dict2[key]): ann_dict2[key] = ('Default', 'Neutral')
    #assume missing values
    if len(ann_dict1[key]) == 1: ann_dict1[key] = fix_annotation(ann_dict1[key])
    if len(ann_dict2[key]) == 1: ann_dict2[key] = fix_annotation(ann_dict2[key])

    #remove unimportant cases
    if ann_dict1[key] == ('Default', 'Neutral') and ann_dict2[key] == ('Default', 'Neutral'): continue

    #values we're working with
    if verbose: print(key, ann_dict1[key], ann_dict2[key])

    role_agreements += 1 if ann_dict1[key][0] == ann_dict2[key][0] else 0
    alignment_agreements += 1 if ann_dict1[key][1] == ann_dict2[key][1] else 0
    total += 1

  if verbose: print(role_agreements, alignment_agreements, total)
  return (role_agreements, alignment_agreements, total)

# note: had to delete a file because Joyce had it but Keer didn't
def view_disagreements(file1, file2):
  ann_dict1 = dict_of_annotations(file1)
  ann_dict2 = dict_of_annotations(file2)

  output = ""
  for key in ann_dict1.keys():
    output += key + ": "
    if key in ann_dict2:
      pprint.pprint([key, ann_dict1[key], ann_dict2[key]])
    else:  # edgecase: annotator adds additional tag + attribute even though they're not supposed to
      pprint.pprint([key, ann_dict1[key],])

  #return output

import numpy as np
for annotator_pair in annotator_pairs:
  print(annotator_pair)
  agreement_triple = np.array([0, 0, 0]) #role_agreements, alignments_agreements, count)
  for file_pair in ann_file_dict[annotator_pair]:
    with open(file_pair[0], "r") as f:
      if "A1" in f.read():
        agreement_triple += np.array(count_agreements(file_pair[0], file_pair[1]))
  #cohen's kappa: (p_o - p_e) / (1 - p_e)
  print("Role, Alignment, Total: ", agreement_triple)
  print("Role kappa: " , (agreement_triple[0]/agreement_triple[2] - 1/3) / (2/3))
  print("Alignment kappa: " , (agreement_triple[1]/agreement_triple[2] - 1/3) / (2/3))


('Emily', 'Kasey')
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Filipino/HowtheTinguianLearnedtoPlant.ann
Canine ('Default', 'Neutral') ('Default', 'Evil')
Fish ('Default', 'Neutral') ('Default', 'Good')
2 0 2
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Filipino/Sogsogot.ann
0 0 0
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Filipino/The Boy who became a Stone.ann
Bird ('Antagonist', 'Neutral') ('Default', 'Neutral')
0 1 1
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Filipino/The Carabao and the Shell.ann
Ungulate ('Protagonist', 'Neutral') ('Protagonist', 'Neutral')
1 1 1
/content/drive/Shareddrives/Folklore/Folklore and Fauna Project/data/Annotated/Folktales v2 Emily/Filipino/The Dogedog.ann
Crocodilia ('Default', 'Neutral') ('Default', 'Good')
Insect ('Default', 'Neutral') 