In [1]:
import csv 
import json

from unidecode import unidecode
"""Used to normalize names to ascii, since German names have umlauts and other special characters"""

In [11]:
headers = [
    "name",
    "link",
    "rating",
    "budget",
    "closed_days",
    "work",
    "wifi",
    "dietary_restrictions",
    "card_payment",
    "size",
    "opening_times",
    "distance_walking",
    "distance_biking",
    "distance_transit",
    "nearest_subway",
]
"""Headers for the CSV file. We use these to map the JSON keys, as well as to build the predicates in the KB."""

ignored = [
    "link",
    "opening_times",
    "distance_biking",
    "distance_transit",
    "nearest_subway",
    "closed_days",
]
"""Keys that are ignored in the CSV file, since they are not used in the KB."""


In [19]:
data = []

with open('./data/data.csv', 'r') as f:
    reader = csv.reader(f)

    for row in reader:
        row = [x for i, x in enumerate(row) if headers[i] not in ignored]
        row_headers = [x for x in headers if x not in ignored]

        parsed_row = dict(zip(row_headers, row))

        # Normalize the name by converting it to lowercase, replacing spaces with underscores, removing periods and converting special characters to ascii
        parsed_row["name"] = unidecode(parsed_row["name"].replace(" ", "_")).lower().replace(".", "")

        data.append(parsed_row)

with open('./data/data.json', 'w') as f:
    json.dump(data, f)

In [20]:
defaults = {
    "size": '"M"',
}
"""Default values for the predicates in the KB. We use these to fill in missing values in the CSV file."""

In [24]:
with open('./kb.pl', 'w') as f:
    for i, row in enumerate(data):
        if i == 0:
            # Skip the first row, since it contains the headers
            continue
        
        for key, value in row.items():
            if key == "name":
                # `cafe/1` is a special predicate that is used to indicate that a place is a cafe
                f.write(f"cafe({value}).\n")
            elif key == "budget":
                # Normalize the budget values by converting them to integers (average of the range)
                value = 5 if value == "1-10" else value
                value = 15 if value == "10-20" else value
                value = 25 if value == "20-30" else value
            else:
                # Normalize the other values by converting them to uppercase and replacing spaces with underscores,
                # as well as escaping strings with double quotes
                value = '"Y"' if value == "Y" else value
                value = '"N"' if value == "N" else value
                value = '"L"' if value == "L" else value
                value = '"L"' if value == "M" else value
                value = '"S"' if value == "S" else value
                value = f'"{value}"' if "-" in value else value

                value = value.replace(" ", "_")

            if key in defaults and value == "":
                value = defaults[key]
                
            f.write(f"{key}({row['name']}, {value}).\n")

        f.write("\n")