In [1]:
import ast
from collections import Counter

name_set_true = set()
url_set_true = set()
email_set_true = set()
phoneNum_set_true = set()

# processing true categories
true_categories = Counter()
with open('data/pii_entities.txt', 'r') as file:
    for line in file:
        line = line.strip()
        
        # convert the string to a tuple
        tup = ast.literal_eval(line)
        
        # Only process the first 500 docs
        doc_idx = tup[0]
        # if doc_idx < 15000:
        # increment this category's count in dictionary
        category = tup[2]
        true_categories[category] += 1
        
        new_tup = (tup[0],tup[1],tup[3])
        if category == 'NAME_STUDENT':
            name_set_true.add(new_tup)
        elif category == 'URL_PERSONAL':
            url_set_true.add(new_tup)
        elif category == 'EMAIL':
            email_set_true.add(new_tup)
        elif category == 'PHONE_NUM':
            phoneNum_set_true.add(new_tup)
        
# print categories
print("All true entites summary:")
for key, count in true_categories.items():
    print(f"{key}: {count}")

All true entites summary:
NAME_STUDENT: 4394
URL_PERSONAL: 352
EMAIL: 111
PHONE_NUM: 14


In [2]:
def calculate_metrics(synthetic_set, analyzed_set):
    # Exclude the entity type from the comparison
    true_set = set((i, start, end) for i, _, (start, end) in synthetic_set)
    detected_set = set((i, start, end) for i, _, (start, end) in analyzed_set)

    tp = true_set & detected_set
    fp = detected_set - true_set
    fn = true_set - detected_set

    tp_count = len(tp)
    fp_count = len(fp)
    fn_count = len(fn)
    
    return tp_count, fp_count, fn_count

def compute_precision_recall_f1_f5(tp, fp, fn):
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    f5 = (1 + 5**2) * (precision * recall) / ((5**2 * precision) + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1, f5


In [3]:
def evaluate_detected_entities(input_detected_entities):
    global name_set_detected
    global url_set_detected
    global email_set_detected
    global phoneNum_set_detected

    name_set_detected = set()
    url_set_detected = set()
    email_set_detected = set()
    phoneNum_set_detected = set()

    # processing detected categories
    detected_categories = Counter()
    with open(input_detected_entities, 'r') as file:
        for line in file:
            line = line.strip()
            
            # convert the string to a tuple
            tup = ast.literal_eval(line)
            
            # increment this category's count in dictionary
            category = tup[2]
            detected_categories[category] += 1
            
            new_tup = (tup[0],tup[1],tup[3])
            if category == 'PERSON':
                name_set_detected.add(new_tup)
            elif category == 'URL':
                url_set_detected.add(new_tup)
            elif category == 'EMAIL_ADDRESS':
                email_set_detected.add(new_tup)
            elif category == 'PHONE_NUMBER':
                phoneNum_set_detected.add(new_tup)

    print("All detected entites summary:")
    # print categories
    for key, count in detected_categories.items():
        print(f"{key}: {count}")
    
    tp_n, fp_n, fn_n = calculate_metrics(name_set_true, name_set_detected)
    tp_u, fp_u, fn_u = calculate_metrics(url_set_true, url_set_detected)
    tp_e, fp_e, fn_e = calculate_metrics(email_set_true, email_set_detected)
    tp_p, fp_p, fn_p = calculate_metrics(phoneNum_set_true, phoneNum_set_detected)

    # Compute Precision, Recall, and F1 Score
    precision_n, recall_n, f1_n, f5_n = compute_precision_recall_f1_f5(tp_n, fp_n, fn_n)
    precision_u, recall_u, f1_u, f5_u = compute_precision_recall_f1_f5(tp_u, fp_u, fn_u)
    precision_e, recall_e, f1_e, f5_e = compute_precision_recall_f1_f5(tp_e, fp_e, fn_e)
    precision_p, recall_p, f1_p, f5_p = compute_precision_recall_f1_f5(tp_p, fp_p, fn_p)
    
    print("\nSummary of Precision, Recall, F1 Score, and F5 Score")
    print(f"NAME_STUDENT    -- Precision: {precision_n:.4f}, Recall: {recall_n:.4f}, F1 Score: {f1_n:.4f}, F5 Score: {f5_n:.4f}, TP: {tp_n}, FP: {fp_n}, FN: {fn_n}")
    print(f"URL_PERSONAL    -- Precision: {precision_u:.4f}, Recall: {recall_u:.4f}, F1 Score: {f1_u:.4f}, F5 Score: {f5_u:.4f}, TP: {tp_u}, FP: {fp_u}, FN: {fn_u}")
    print(f"EMAIL           -- Precision: {precision_e:.4f}, Recall: {recall_e:.4f}, F1 Score: {f1_e:.4f}, F5 Score: {f5_e:.4f}, TP: {tp_e}, FP: {fp_e}, FN: {fn_e}")
    print(f"PHONE_NUM       -- Precision: {precision_p:.4f}, Recall: {recall_p:.4f}, F1 Score: {f1_p:.4f}, F5 Score: {f5_p:.4f}, TP: {tp_p}, FP: {fp_p}, FN: {fn_p}")

In [35]:
evaluate_detected_entities('output/pii_detected_lg.txt')

All detected entites summary:
PERSON: 18623
URL: 4254
EMAIL_ADDRESS: 132
PHONE_NUMBER: 85

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.1623, Recall: 0.6878, F1 Score: 0.2626, F5 Score: 0.6116, TP: 3022, FP: 15601, FN: 1372
URL_PERSONAL    -- Precision: 0.0691, Recall: 0.8352, F1 Score: 0.1277, F5 Score: 0.5856, TP: 294, FP: 3960, FN: 58
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.1647, Recall: 1.0000, F1 Score: 0.2828, F5 Score: 0.8368, TP: 14, FP: 71, FN: 0


In [38]:
evaluate_detected_entities('output/pii_detected_trf.txt')

All detected entites summary:
PERSON: 15330
URL: 4254
EMAIL_ADDRESS: 132
PHONE_NUMBER: 85

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.2367, Recall: 0.8257, F1 Score: 0.3679, F5 Score: 0.7535, TP: 3628, FP: 11702, FN: 766
URL_PERSONAL    -- Precision: 0.0686, Recall: 0.8295, F1 Score: 0.1268, F5 Score: 0.5816, TP: 292, FP: 3962, FN: 60
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.1647, Recall: 1.0000, F1 Score: 0.2828, F5 Score: 0.8368, TP: 14, FP: 71, FN: 0


In [40]:
evaluate_detected_entities('others/pii_detected_trf_threshold.txt')

All detected entites summary:
PERSON: 15330
URL: 1122
EMAIL_ADDRESS: 132
PHONE_NUMBER: 9

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.2367, Recall: 0.8257, F1 Score: 0.3679, F5 Score: 0.7535, TP: 3628, FP: 11702, FN: 766
URL_PERSONAL    -- Precision: 0.2602, Recall: 0.8295, F1 Score: 0.3962, F5 Score: 0.7652, TP: 292, FP: 830, FN: 60
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.4444, Recall: 0.2857, F1 Score: 0.3478, F5 Score: 0.2897, TP: 4, FP: 5, FN: 10


In [42]:
evaluate_detected_entities('output/pii_detected_trf_filtered.txt')

All detected entites summary:
PERSON: 15330
URL: 1126
EMAIL_ADDRESS: 132
PHONE_NUMBER: 85

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.2367, Recall: 0.8257, F1 Score: 0.3679, F5 Score: 0.7535, TP: 3628, FP: 11702, FN: 766
URL_PERSONAL    -- Precision: 0.2593, Recall: 0.8295, F1 Score: 0.3951, F5 Score: 0.7649, TP: 292, FP: 834, FN: 60
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.1647, Recall: 1.0000, F1 Score: 0.2828, F5 Score: 0.8368, TP: 14, FP: 71, FN: 0


In [None]:
# TODO: Run previous and this code chunk. No need to run following code chunks.
evaluate_detected_entities('output/pii_detected_gpt.txt')

In [5]:
def print_likely_successful_entities(true_set, detected_set, category):
    output_list = []
    count = 0

    for entity_true in true_set:
        if entity_true not in detected_set:
            idx, entity_text, (start, end) = entity_true
            match_found = False  # Initialize a flag to track if a match is found

            for entity_detected in detected_set:
                idx_d, entity_text_d, (start_d, end_d) = entity_detected
                if idx == idx_d:
                    if (start == start_d and end < end_d) or (end == end_d and start > start_d):
                        count += 1
                        output_list.append(f"{entity_true} ---- {entity_detected}")
                        match_found = True  # Set the flag to True when a match is found
                        break  # No need to check further once a match is found

            if not match_found:  # Only add the original entity if no match was found
                output_list.append(str(entity_true))
    
    # Sort the output list by document index (idx)
    sorted_output = sorted(output_list, key=lambda x: int(x.split(",")[0][1:]))

    # Print the sorted output
    print(f"Out of {len(sorted_output)} FN entities for {category} category, {count} entities might have likely been successfully detected by Presidio.")
    for line in sorted_output:
        print(line)


In [6]:
print_likely_successful_entities(name_set_true, name_set_detected, 'NAME_STUDENT')

Out of 1372 FN entities for NAME_STUDENT category, 901 entities might have likely been successfully detected by Presidio.
(7, 'Nathalie Sylla', (52, 66)) ---- (7, 'Nathalie Sylla\n\n', (52, 68))
(80, 'Karol Ferreira', (2, 16)) ---- (80, 'Karol Ferreira\n\nREFLECTION - VISUALIZATION\n\nChallenge\n\nWorking', (2, 64))
(88, 'Rakesh Singh', (61, 73)) ---- (88, 'Rakesh Singh\n\nChallenge', (61, 84))
(99, 'Francesco Boscolo', (38, 55)) ---- (99, 'Francesco Boscolo\n\nChallenge', (38, 66))
(109, 'Michael', (3336, 3343)) ---- (109, 'Michael  -', (3336, 3346))
(109, 'Michael', (1300, 1307)) ---- (109, 'Michael  -', (1300, 1310))
(109, 'Michael', (71, 78)) ---- (109, 'Michael  -', (71, 81))
(112, 'Francisco Ferreira', (30, 48))
(113, 'Rita', (0, 4))
(123, 'Stefano Lovato', (156, 170)) ---- (123, 'Stefano Lovato\n\nMDI-191', (156, 179))
(161, 'Juan Farid', (3758, 3768)) ---- (161, 'Juan Farid  ', (3758, 3770))
(202, 'Danny Long', (0, 10)) ---- (202, 'Danny Long Reflection', (0, 21))
(204, 'Deiby'

In [7]:
print_likely_successful_entities(url_set_true, url_set_detected, 'URL_PERSONAL')

Out of 58 FN entities for URL_PERSONAL category, 26 entities might have likely been successfully detected by Presidio.
(1309, 'https://www.hall.biz/wp-contenthome.html', (2581, 2621))
(3202, 'tps://www.facebook.com/bclark', (169, 198))
(3202, 'https://www.youtube.com/channel/UC1ElAcppeuhfet nYZqnhEXw', (200, 257))
(3515, 'https://www.youtube.com/watch?v=mYxoZaftuNN', (2166, 2209)) ---- (3515, 'https://www.youtube.com/watch?v=mYxoZaftuNN.', (2166, 2210))
(3592, 'https://www.peterson.net/tag/app/listmain.php', (222, 267)) ---- (3592, 'https://www.peterson.net/tag/app/listmain.php.', (222, 268))
(5358, 'https://schaefer.biz/posts/search/appsearch.php', (1543, 1590))
(5861, 'https://www.stevens.biz/wp-contentindex.jsp', (3227, 3270))
(6257, 'http://mcneil.org/list/taghomepage.htm', (721, 759)) ---- (6257, 'http://mcneil.org/list/taghomepage.htm.', (721, 760))
(7183, 'http://bailey-perry.com/posts/tag/categoriesfaq.htm', (3411, 3462)) ---- (7183, 'http://bailey-perry.com/posts/tag/categorie

In [8]:
print_likely_successful_entities(email_set_true, email_set_detected, 'EMAIL')

Out of 1 FN entities for EMAIL category, 0 entities might have likely been successfully detected by Presidio.
(11699, 'srpe…r@....kelsey21@gmail.com', (6157, 6186))


In [9]:
print_likely_successful_entities(phoneNum_set_true, phoneNum_set_detected, 'PHONE_NUM')

Out of 0 FN entities for PHONE_NUM category, 0 entities might have likely been successfully detected by Presidio.


In [10]:
import ast

def update_detected_entities(true_name_set, true_url_set, detected_file, output_file, indent=4):
    with open(detected_file, 'r') as file:
        detected_entities = file.readlines()

    updated_entities = []
    count_name_updates = 0
    count_url_updates = 0
    indent_space = ' ' * indent

    for line in detected_entities:
        entity = ast.literal_eval(line.strip())
        idx, entity_text, category, (start, end) = entity

        # Process 'PERSON' category
        if category == 'PERSON':
            match_found = False
            for entity_true in true_name_set:
                idx_true, entity_text_true, (start_true, end_true) = entity_true
                # Check for possible matches where detected entity is longer
                if idx == idx_true and ((start == start_true and end > end_true) or (end == end_true and start < start_true)):
                    updated_entities.append(f"{indent_space}({idx}, '{entity_text_true}', 'PERSON', ({start_true}, {end_true}))\n")
                    count_name_updates += 1
                    match_found = True
                    break
            if not match_found:
                updated_entities.append(line)

        # Process 'URL' category
        elif category == 'URL':
            match_found = False
            for entity_true in true_url_set:
                idx_true, entity_text_true, (start_true, end_true) = entity_true
                # Check for possible matches where detected entity is longer
                if idx == idx_true and ((start == start_true and end > end_true) or (end == end_true and start < start_true)):
                    updated_entities.append(f"{indent_space}({idx}, '{entity_text_true}', 'URL', ({start_true}, {end_true}))\n")
                    count_url_updates += 1
                    match_found = True
                    break
            if not match_found:
                updated_entities.append(line)
        else:
            updated_entities.append(line)

    # Save updated entities to the new file
    with open(output_file, 'w') as file:
        file.writelines(updated_entities)

    print(f"Out of {len(detected_entities)} entities, {count_name_updates} 'PERSON' and {count_url_updates} 'URL' were updated as likely successful detections.")
    print(f"Updated detected entities saved to {output_file}")

# Example Usage
update_detected_entities(name_set_true, url_set_true, 'output/pii_detected_lg.txt', 'others/pii_entities_detected_updated.txt')


Out of 23094 entities, 894 'PERSON' and 30 'URL' were updated as likely successful detections.
Updated detected entities saved to output/pii_entities_detected_updated.txt


In [11]:
evaluate_detected_entities('others/pii_entities_detected_updated.txt')

All detected entites summary:
PERSON: 18623
URL: 4254
EMAIL_ADDRESS: 132
PHONE_NUMBER: 85

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.2102, Recall: 0.8908, F1 Score: 0.3401, F5 Score: 0.7921, TP: 3914, FP: 14707, FN: 480
URL_PERSONAL    -- Precision: 0.0753, Recall: 0.9091, F1 Score: 0.1391, F5 Score: 0.6375, TP: 320, FP: 3930, FN: 32
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.1647, Recall: 1.0000, F1 Score: 0.2828, F5 Score: 0.8368, TP: 14, FP: 71, FN: 0


DDL: August 26, 2024 (Aiming at presentation towards Ken)
1. Post processing - AL
2. Other models - YW, JS
3. Random sampling (2000 cases) or stratified sampling for each category - JS
4. find mappings for ID_NUM, STREET_ADDRESS, USERNAME - JS

EDM paper, sequence labeling, we prompt GPT to do BIO, performance might not be good.
Why we use GPT model? Other models?
Let's do mini first.

In [12]:
import ast

def check_url_personal_starts_with_http(file_path: str):
    # Initialize a list to store any URLs that don't start with 'http'
    invalid_urls = []
    
    # Read the file line by line
    with open(file_path, 'r') as file:
        for line in file:
            entity = ast.literal_eval(line.strip())
            idx, entity_text, category, (start, end) = entity

            # Check if the category is 'URL_PERSONAL'
            if category == 'URL_PERSONAL' and not entity_text.startswith('http'):
                invalid_urls.append(entity)
    
    # Print the results
    if invalid_urls:
        print("URLs not starting with 'http':")
        for url in invalid_urls:
            print(url)
    else:
        print("All 'URL_PERSONAL' entities start with 'http'.")

# Example usage
check_url_personal_starts_with_http('data/pii_entities.txt')


URLs not starting with 'http':
(3202, 'tps://www.facebook.com/bclark', 'URL_PERSONAL', (169, 198))
(16435, 'madehttps://smith.org/main/list/tagsprivacy.htm', 'URL_PERSONAL', (3581, 3628))


In [13]:
import ast

def count_url_entities(file_path: str):
    # Initialize counters
    count_with_http = 0
    count_without_http = 0
    total_count_url = 0

    # Read the file line by line
    with open(file_path, 'r') as file:
        for line in file:
            entity = ast.literal_eval(line.strip())
            idx, entity_text, category, (start, end) = entity

            # Check if the category is 'URL'
            if category == 'URL':
                total_count_url += 1
                if 'http' in entity_text:
                    count_with_http += 1
                else:
                    count_without_http += 1

    # Print the results
    print(f"Total 'URL' entities: {total_count_url}")
    print(f"Total 'URL' entities containing 'http': {count_with_http}")
    print(f"Total 'URL' entities not containing 'http': {count_without_http}")

# Example usage
count_url_entities('others/pii_entities_detected_updated.txt')


Total 'URL' entities: 4254
Total 'URL' entities containing 'http': 1094
Total 'URL' entities not containing 'http': 3160


In [14]:
import ast

def remove_non_http_urls(input_file: str, output_file: str):
    # Initialize a list to store filtered entities
    filtered_entities = []

    # Read the file line by line
    with open(input_file, 'r') as file:
        for line in file:
            entity = ast.literal_eval(line.strip())
            idx, entity_text, category, (start, end) = entity

            # Keep the entity if it's not 'URL' or it contains 'http'
            if category != 'URL' or 'http' in entity_text:
                filtered_entities.append(line)

    # Save the filtered entities to a new file
    with open(output_file, 'w') as file:
        file.writelines(filtered_entities)

    print(f"Filtered entities saved to {output_file}")

# Example usage
remove_non_http_urls('others/pii_entities_detected_updated.txt', 'others/pii_entities_detected_updated2.txt')


Filtered entities saved to output/pii_entities_detected_updated2.txt


In [15]:
evaluate_detected_entities('others/pii_entities_detected_updated2.txt')

All detected entites summary:
PERSON: 18623
URL: 1094
EMAIL_ADDRESS: 132
PHONE_NUMBER: 85

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.2102, Recall: 0.8908, F1 Score: 0.3401, F5 Score: 0.7921, TP: 3914, FP: 14707, FN: 480
URL_PERSONAL    -- Precision: 0.2936, Recall: 0.9091, F1 Score: 0.4438, F5 Score: 0.8413, TP: 320, FP: 770, FN: 32
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.1647, Recall: 1.0000, F1 Score: 0.2828, F5 Score: 0.8368, TP: 14, FP: 71, FN: 0


In [24]:
evaluate_detected_entities('output/pii_detected_trf.txt')

All detected entites summary:
PERSON: 15330
URL: 4254
EMAIL_ADDRESS: 132
PHONE_NUMBER: 85

Summary of Precision, Recall, F1 Score, and F5 Score
NAME_STUDENT    -- Precision: 0.2367, Recall: 0.8257, F1 Score: 0.3679, F5 Score: 0.7535, TP: 3628, FP: 11702, FN: 766
URL_PERSONAL    -- Precision: 0.0686, Recall: 0.8295, F1 Score: 0.1268, F5 Score: 0.5816, TP: 292, FP: 3962, FN: 60
EMAIL           -- Precision: 0.8333, Recall: 0.9910, F1 Score: 0.9053, F5 Score: 0.9838, TP: 110, FP: 22, FN: 1
PHONE_NUM       -- Precision: 0.1647, Recall: 1.0000, F1 Score: 0.2828, F5 Score: 0.8368, TP: 14, FP: 71, FN: 0


In [31]:
# import ast

# def normalize_entity(entity_str):
#     """Helper function to normalize entities by stripping whitespace."""
#     entity = ast.literal_eval(entity_str.strip())
#     idx, entity_text, category, positions = entity
#     # Return a tuple with normalized text (no lowercase conversion)
#     return (idx, entity_text.strip(), category.strip(), positions)

# def compare_entities_limited(file1_path, file2_path):
#     # Read entities from the first file
#     entities1 = set()
#     with open(file1_path, 'r') as file1:
#         for line in file1:
#             entities1.add(normalize_entity(line))

#     # Read entities from the second file
#     entities2 = set()
#     with open(file2_path, 'r') as file2:
#         for line in file2:
#             entities2.add(normalize_entity(line))

#     # Compare the entities
#     only_in_file1 = entities1 - entities2
#     only_in_file2 = entities2 - entities1
#     common_entities = entities1 & entities2

#     # Display the comparison results
#     print(f"Entities only in {file1_path}: {len(only_in_file1)}")
#     for entity in sorted(only_in_file1):
#         print(entity)

#     print(f"\nEntities only in {file2_path}: {len(only_in_file2)}")
#     for entity in sorted(only_in_file2):
#         print(entity)

#     print(f"\nCommon entities in both files: {len(common_entities)}")
#     for entity in sorted(common_entities):
#         print(entity)

# # Example usage:
# compare_entities_limited('output/pii_detected_lg.txt', 'output/pii_detected_trf.txt')


Entities only in output/pii_detected_lg.txt: 8343
(2, 'VISUALIZATION', 'PERSON', (0, 13))
(4, 'Henry Acosta', 'PERSON', (36, 48))
(5, 'https://www.greatplacetowork.com/resources/blog/why-is-diversity-inclusion-in-the-workplace-important', 'PERSON', (4150, 4251))
(7, "Dessine-moi  l'intelligence", 'PERSON', (292, 319))
(7, 'Nathalie Sylla', 'PERSON', (52, 68))
(10, 'Diego Estrada', 'PERSON', (0, 13))
(10, 'Diego Estrada', 'PERSON', (2386, 2399))
(15, 'A. XYZ', 'PERSON', (2834, 2840))
(15, 'A. XYZ', 'PERSON', (5861, 5867))
(15, 'A. XYZ', 'PERSON', (8888, 8894))
(15, 'A. XYZ', 'PERSON', (11925, 11931))
(23, 'Cap', 'PERSON', (2158, 2161))
(33, 'Learning Oﬃcer', 'PERSON', (2404, 2418))
(37, 'Wann', 'PERSON', (895, 899))
(37, 'wie', 'PERSON', (911, 914))
(44, 'Centreno', 'PERSON', (376, 384))
(46, 'INSIGHT', 'PERSON', (2034, 2041))
(56, 'Nadine Born', 'PERSON', (53, 64))
(59, 'MindMap', 'PERSON', (1248, 1255))
(65, 'Cotton', 'PERSON', (486, 492))
(66, 'd. Skills  e. Egos  f. Team', 'PERSON',