/
build_classes_lookup.py
37 lines (32 loc) · 1.02 KB
/
build_classes_lookup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
"""
Script to create the structure of the lookup table noun -> dbpedia class (dbpedia ontology)
Author: Fernando Casabán Blasco
"""
CLASSES_FILE_PATH = "datasets/dbo_classes.txt"
CLASS_PREFIX = "http://dbpedia.org/ontology/"
OUTPUT_FILE = "datasets/classes_lookup.json"
import json
import re
def format_string(text):
""" Function to format strings with uppercases and no spaces IceHockeyPlayer -> ice hockey player """
result = ""
for letter in text:
if letter.isupper():
result += " "
result += letter
return result[1:].lower()
def main():
with open(CLASSES_FILE_PATH) as cfile:
# Read all the classes
dbo_classes = cfile.readlines()
dbo_classes = [c.replace("\n","") for c in dbo_classes]
lookup = {}
for c in dbo_classes:
key = c.replace(CLASS_PREFIX,"",1)
key = format_string(key)
lookup[key] = c
save_file = open(OUTPUT_FILE, "w")
json.dump(lookup, save_file, indent=4)
save_file.close()
if __name__ == "__main__":
main()