In [6]:
from __future__ import print_function
import json
import uuid
import itertools
import urllib
from tqdm import tqdm

import rdflib
from rdflib import Namespace
from rdflib.namespace import DC, DCTERMS, DOAP, FOAF, SKOS, OWL, RDF, RDFS, VOID, XMLNS, XSD

import re
import os

In [7]:
def add_node(node):
    nodes.add(node)

def read_nodes(path):
    content = None
    with open(path) as f:
        content = f.readlines()
    content = [x.strip() for x in content]

    for line in content:
        head,rel,tail = line.split("\t")
        add_node(head)
        add_node(tail)
        
def divide_chunks(l, n):
    for i in range(0, len(l), n): 
        yield l[i:i + n]
        
def load_json_file(path):
    with open(path) as json_file:
        return json.load(json_file)

def load_id_tsv(path):
    content = None
    with open(path, encoding="utf-8") as f:
        content = f.readlines()
    content = [x.strip() for x in content]

    idValueDict = {}
    for line in content:
        id,value = line.split("\t")
        idValueDict[id] = value
    return idValueDict

In [8]:
# https://developers.google.com/knowledge-graph/

root = r"workspace/labels"
train_path = r"workspace\data\FB15-237\train.txt"
test_path = r"workspace\data\FB15-237\test.txt"
valid_path = r"workspace\data\FB15-237\valid.txt"

nodes = set()
read_nodes(train_path)
read_nodes(test_path)
read_nodes(valid_path)
nodes = list(nodes)

print(len(nodes))

14541


## Download files


In [9]:
api_key = open('.api_key').read()

service_url = 'https://kgsearch.googleapis.com/v1/entities:search'

limit = 500

result = {
    'result': []
}

for node_list in tqdm(divide_chunks(nodes, limit)):
    params = [
        ('limit', limit),
        ('indent', True),
        ('key', api_key),
    ]
    params.extend(zip(itertools.repeat('ids'), node_list))
    
    url = service_url + '?' + urllib.parse.urlencode(params)
    response = json.loads(urllib.request.urlopen(url).read())
    
    result['result'].extend(response['itemListElement'])
        
with open(root + "//fb.json", 'w') as outfile:
    json.dump(result, outfile, indent=4, sort_keys=True)

30it [00:40,  1.33s/it]


Retrieve missing from https://github.com/yao8839836/kg-bert/blob/master/data/FB15K/

## Create graph

In [21]:
kg = Namespace("http://g.co/kg")

g = rdflib.Graph()

g.bind("kg", kg)
g.bind("rdf", RDF)
g.bind("rdfs", RDFS)

In [25]:

missing = nodes.copy()

jsonfile = load_json_file(root + "//fb.json")
for x in tqdm(jsonfile["result"]):
    term = x["result"]["@id"].replace("kg:","")
    missing.remove(term)
    g.add((
        kg.term(term),
        RDFS.label,
        rdflib.Literal(x["result"]["name"], datatype=XSD.string)

    ))
    if "@type" in x["result"]:
        for type_ in x["result"]["@type"]:
            g.add((
                kg.term(term),
                RDF.type,
                rdflib.Literal(type_, datatype=XSD.string)
            ))
    if "detailedDescription" in x["result"]:
        g.add((
            kg.term(term),
            RDFS.comment,
            rdflib.Literal(x["result"]["detailedDescription"]["articleBody"], datatype=XSD.string)
        ))
    elif "description" in x["result"]:
        g.add((
            kg.term(term),
            RDFS.comment,
            rdflib.Literal(x["result"]["description"], datatype=XSD.string)
        ))


alt_labels = load_id_tsv(root + "/FB15k_mid2name.txt")
alt_descriptions = load_id_tsv(root + "/FB15k_mid2description.txt")
for entity in missing:
    label = alt_labels.get(entity, None)
    description = alt_descriptions.get(entity, "")
    assert (label != None), "Again missing " + entity

    g.add((
            kg.term(entity),
            RDFS.label,
            rdflib.Literal(label.replace("_", " "), datatype=XSD.string)

        ))
    g.add((
        kg.term(entity),
        RDFS.comment,
        rdflib.Literal(description.replace("@en","").replace("\\n", "").replace("\\\"", "\"")[1:-1], datatype=XSD.string)

    ))
    
g.serialize(os.path.abspath(r"workspace/graphs/fb15k237.ttl"),format="turtle")

100%|██████████| 13948/13948 [00:03<00:00, 4219.00it/s]


In [None]:
relations = {
    "/organization/endowed_organization/endowment./measurement_unit/dated_money_value/currency": "",
    "/location/hud_county_place/place": "",
    "/education/educational_institution_campus/educational_institution": "",
    "/film/actor/film./film/performance/special_performance_type": "",
    "/olympics/olympic_sport/athletes./olympics/olympic_athlete_affiliation/country": "",
    "/tv/tv_program/program_creator": "",
    "/education/educational_degree/people_with_this_degree./education/education/institution": "",
    "/location/statistical_region/religions./location/religion_percentage/religion": "",
    "/award/ranked_item/appears_in_ranked_lists./award/ranking/list": "",
    "/film/film/produced_by": "",
    "/people/profession/specialization_of": "",
    "/olympics/olympic_games/medals_awarded./olympics/olympic_medal_honor/medal": "",
    "/people/marriage_union_type/unions_of_this_type./people/marriage/location_of_ceremony": "",
    "/music/instrument/instrumentalists": "",
    "/music/performance_role/track_performances./music/track_contribution/role": "",
    "/sports/professional_sports_team/draft_picks./sports/sports_league_draft_pick/draft": "",
    "/celebrities/celebrity/celebrity_friends./celebrities/friendship/friend": "",
    "/people/cause_of_death/people": "",
    "/sports/sports_league/teams./sports/sports_league_participation/team": "",
    "/location/statistical_region/gni_per_capita_in_ppp_dollars./measurement_unit/dated_money_value/currency": "",
    "/award/award_nominated_work/award_nominations./award/award_nomination/nominated_for": "",
    "/people/person/employment_history./business/employment_tenure/company": "",
    "/base/culturalevent/event/entity_involved": "",
    "/award/award_winning_work/awards_won./award/award_honor/award": "",
    "/people/person/gender": "",
    "/travel/travel_destination/how_to_get_here./travel/transportation/mode_of_transportation": "",
    "/film/film/edited_by": "",
    "/film/actor/dubbing_performances./film/dubbing_performance/language": "",
    "/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/service_language": "",
    "/award/award_nominee/award_nominations./award/award_nomination/award_nominee": "",
    "/people/person/profession": "",
    "/location/country/capital": "",
    "/base/localfood/seasonal_month/produce_available./base/localfood/produce_availability/seasonal_months": "",
    "/sports/sports_position/players./american_football/football_historical_roster_position/position_s": "",
    "/soccer/football_team/current_roster./soccer/football_roster_position/position": "",
    "/people/person/spouse_s./people/marriage/type_of_union": "",
    "/people/person/religion": "",
    "/tv/tv_program/regular_cast./tv/regular_tv_appearance/actor": "",
    "/soccer/football_team/current_roster./sports/sports_team_roster/position": "",
    "/award/award_winner/awards_won./award/award_honor/award_winner": "",
    "/award/award_category/winners./award/award_honor/ceremony": "",
    "/olympics/olympic_participating_country/medals_won./olympics/olympic_medal_honor/medal": "",
    "/film/film/distributors./film/film_film_distributor_relationship/film_distribution_medium": "",
    "/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/service_location": "",
    "/base/americancomedy/celebrity_impressionist/celebrities_impersonated": "",
    "/tv/non_character_role/tv_regular_personal_appearances./tv/tv_regular_personal_appearance/person": "",
    "/tv/tv_program/genre": "",
    "/medicine/disease/notable_people_with_this_condition": "",
    "/music/group_member/membership./music/group_membership/role": "",
    "/education/educational_institution/campuses": "",
    "/location/country/second_level_divisions": "",
    "/language/human_language/countries_spoken_in": "",
    "/film/person_or_entity_appearing_in_film/films./film/personal_film_appearance/type_of_appearance": "",
    "/base/schemastaging/organization_extra/phone_number./base/schemastaging/phone_sandbox/contact_category": "",
    "/sports/sport/pro_athletes./sports/pro_sports_played/athlete": "",
    "/tv/tv_program/languages": "",
    "/government/politician/government_positions_held./government/government_position_held/legislative_sessions": "",
    "/tv/tv_writer/tv_programs./tv/tv_program_writer_relationship/tv_program": "",
    "/music/record_label/artist": "",
    "/base/locations/continents/countries_within": "",
    "/sports/sports_team/sport": "",
    "/base/biblioness/bibs_location/state": "",
    "/soccer/football_player/current_team./sports/sports_team_roster/team": "",
    "/time/event/instance_of_recurring_event": "",
    "/business/business_operation/industry": "",
    "/government/politician/government_positions_held./government/government_position_held/jurisdiction_of_office": "",
    "/location/administrative_division/country": "",
    "/film/film/featured_film_locations": "",
    "/sports/sports_team/roster./baseball/baseball_roster_position/position": "",
    "/film/film/release_date_s./film/film_regional_release_date/film_regional_debut_venue": "",
    "/organization/organization/child./organization/organization_relationship/child": "",
    "/education/educational_institution/school_type": "",
    "/food/food/nutrients./food/nutrition_fact/nutrient": "",
    "/business/business_operation/operating_income./measurement_unit/dated_money_value/currency": "",
    "/tv/tv_personality/tv_regular_appearances./tv/tv_regular_personal_appearance/program": "",
    "/film/film/dubbing_performances./film/dubbing_performance/actor": "",
    "/tv/tv_network/programs./tv/tv_network_duration/program": "",
    "/location/hud_foreclosure_area/estimated_number_of_mortgages./measurement_unit/dated_integer/source": "",
    "/military/military_conflict/combatants./military/military_combatant_group/combatants": "",
    "/film/film/personal_appearances./film/personal_film_appearance/person": "",
    "/location/statistical_region/rent50_2./measurement_unit/dated_money_value/currency": "",
    "/people/ethnicity/geographic_distribution": "",
    "/people/person/languages": "",
    "/film/special_film_performance_type/film_performance_type./film/performance/film": "",
    "/user/tsegaran/random/taxonomy_subject/entry./user/tsegaran/random/taxonomy_entry/taxonomy": "",
    "/celebrities/celebrity/sexual_relationships./celebrities/romantic_relationship/celebrity": "",
    "/olympics/olympic_participating_country/medals_won./olympics/olympic_medal_honor/olympics": "",
    "/location/statistical_region/places_exported_to./location/imports_and_exports/exported_to": "",
    "/film/film/runtime./film/film_cut/film_release_region": "",
    "/education/educational_degree/people_with_this_degree./education/education/major_field_of_study": "",
    "/location/statistical_region/gdp_real./measurement_unit/adjusted_money_value/adjustment_currency": "",
    "/award/award_ceremony/awards_presented./award/award_honor/honored_for": "",
    "/music/artist/origin": "",
    "/base/petbreeds/city_with_dogs/top_breeds./base/petbreeds/dog_city_relationship/dog_breed": "",
    "/award/award_category/nominees./award/award_nomination/nominated_for": "",
    "/people/deceased_person/place_of_death": "",
    "/medicine/symptom/symptom_of": "",
    "/location/statistical_region/gdp_nominal./measurement_unit/dated_money_value/currency": "",
    "/music/performance_role/regular_performances./music/group_membership/role": "",
    "/film/film/other_crew./film/film_crew_gig/film_crew_role": "",
    "/education/university/domestic_tuition./measurement_unit/dated_money_value/currency": "",
    "/film/film/distributors./film/film_film_distributor_relationship/region": "",
    "/people/person/places_lived./people/place_lived/location": "",
    "/base/eating/practicer_of_diet/diet": "",
    "/tv/tv_producer/programs_produced./tv/tv_producer_term/program": "",
    "/government/legislative_session/members./government/government_position_held/legislative_sessions": "",
    "/business/business_operation/assets./measurement_unit/dated_money_value/currency": "",
    "/people/person/spouse_s./people/marriage/location_of_ceremony": "",
    "/people/person/nationality": "",
    "/broadcast/content/artist": "",
    "/baseball/baseball_team/team_stats./baseball/baseball_team_stats/season": "",
    "/film/film/prequel": "",
    "/business/job_title/people_with_this_title./business/employment_tenure/company": "",
    "/user/alexander/philosophy/philosopher/interests": "",
    "/government/legislative_session/members./government/government_position_held/district_represented": "",
    "/film/film/music": "",
    "/organization/organization_member/member_of./organization/organization_membership/organization": "",
    "/sports/professional_sports_team/draft_picks./sports/sports_league_draft_pick/school": "",
    "/music/group_member/membership./music/group_membership/group": "",
    "/music/performance_role/regular_performances./music/group_membership/group": "",
    "/award/award_category/disciplines_or_subjects": "",
    "/location/location/adjoin_s./location/adjoining_relationship/adjoins": "",
    "/education/field_of_study/students_majoring./education/education/major_field_of_study": "",
    "/film/film/cinematography": "",
    "/award/award_nominee/award_nominations./award/award_nomination/award": "",
    "/film/film/written_by": "",
    "/film/film/film_festivals": "",
    "/film/film/language": "",
    "/film/film/estimated_budget./measurement_unit/dated_money_value/currency": "",
    "/film/film/other_crew./film/film_crew_gig/crewmember": "",
    "/people/ethnicity/languages_spoken": "",
    "/olympics/olympic_participating_country/athletes./olympics/olympic_athlete_affiliation/olympics": "",
    "/people/ethnicity/people": "",
    "/organization/organization/headquarters./location/mailing_address/state_province_region": "",
    "/government/political_party/politicians_in_this_party./government/political_party_tenure/politician": "",
    "/base/popstra/celebrity/friendship./base/popstra/friendship/participant": "",
    "/film/film/film_art_direction_by": "",
    "/location/location/time_zones": "",
    "/film/director/film": "",
    "/location/administrative_division/first_level_division_of": "",
    "/medicine/disease/risk_factors": "",
    "/education/educational_institution/students_graduates./education/education/student": "",
    "/base/aareas/schema/administrative_area/capital": "",
    "/location/location/partially_contains": "",
    "/base/biblioness/bibs_location/country": "",
    "/award/hall_of_fame/inductees./award/hall_of_fame_induction/inductee": "",
    "/people/person/spouse_s./people/marriage/spouse": "",
    "/organization/organization/headquarters./location/mailing_address/country": "",
    "/base/schemastaging/person_extra/net_worth./measurement_unit/dated_money_value/currency": "",
    "/sports/sports_position/players./sports/sports_team_roster/position": "",
    "/education/university/international_tuition./measurement_unit/dated_money_value/currency": "",
    "/film/actor/film./film/performance/film": "",
    "/base/aareas/schema/administrative_area/administrative_parent": "",
    "/film/film/costume_design_by": "",
    "/travel/travel_destination/climate./travel/travel_destination_monthly_climate/month": "",
    "/government/government_office_category/officeholders./government/government_position_held/jurisdiction_of_office": "",
    "/film/film/release_date_s./film/film_regional_release_date/film_release_region": "",
    "/sports/sports_team/roster./american_football/football_historical_roster_position/position_s": "",
    "/military/military_combatant/military_conflicts./military/military_combatant_group/combatants": "",
    "/organization/organization/headquarters./location/mailing_address/citytown": "",
    "/dataworld/gardening_hint/split_to": "",
    "/education/field_of_study/students_majoring./education/education/student": "",
    "/film/film/executive_produced_by": "",
    "/sports/pro_athlete/teams./sports/sports_team_roster/team": "",
    "/influence/influence_node/peers./influence/peer_relationship/peers": "",
    "/film/film/release_date_s./film/film_regional_release_date/film_release_distribution_medium": "",
    "/sports/sports_team/colors": "",
    "/olympics/olympic_games/sports": "",
    "/location/hud_county_place/county": "",
    "/base/popstra/location/vacationers./base/popstra/vacation_choice/vacationer": "",
    "/music/performance_role/guest_performances./music/recording_contribution/performance_role": "",
    "/location/country/form_of_government": "",
    "/award/award_category/category_of": "",
    "/tv/tv_program/tv_producer./tv/tv_producer_term/producer_type": "",
    "/education/educational_institution/students_graduates./education/education/major_field_of_study": "",
    "/sports/sports_team/roster./basketball/basketball_roster_position/position": "",
    "/base/aareas/schema/administrative_area/administrative_area_type": "",
    "/location/country/official_language": "",
    "/base/saturdaynightlive/snl_cast_member/seasons./base/saturdaynightlive/snl_season_tenure/cast_members": "",
    "/tv/tv_program/country_of_origin": "",
    "/base/popstra/celebrity/breakup./base/popstra/breakup/participant": "",
    "/time/event/locations": "",
    "/organization/organization_founder/organizations_founded": "",
    "/sports/sports_team/roster./american_football/football_roster_position/position": "",
    "/common/topic/webpage./common/webpage/category": "",
    "/award/award_ceremony/awards_presented./award/award_honor/award_winner": "",
    "/sports/sports_league_draft/picks./sports/sports_league_draft_pick/school": "",
    "/film/film/film_production_design_by": "",
    "/film/film/story_by": "",
    "/award/award_winning_work/awards_won./award/award_honor/award_winner": "",
    "/people/deceased_person/place_of_burial": "",
    "/government/politician/government_positions_held./government/government_position_held/basic_title": "",
    "/base/popstra/celebrity/dated./base/popstra/dated/participant": "",
    "/education/university/local_tuition./measurement_unit/dated_money_value/currency": "",
    "/award/award_category/winners./award/award_honor/award_winner": "",
    "/education/university/fraternities_and_sororities": "",
    "/organization/organization/place_founded": "",
    "/tv/tv_producer/programs_produced./tv/tv_producer_term/producer_type": "",
    "/user/jg/default_domain/olympic_games/sports": "",
    "/location/capital_of_administrative_division/capital_of./location/administrative_division_capital_relationship/administrative_division": "",
    "/olympics/olympic_games/participating_countries": "",
    "/base/marchmadness/ncaa_basketball_tournament/seeds./base/marchmadness/ncaa_tournament_seed/team": "",
    "/film/film/production_companies": "",
    "/organization/non_profit_organization/registered_with./organization/non_profit_registration/registering_agency": "",
    "/music/artist/contribution./music/recording_contribution/performance_role": "",
    "/influence/influence_node/influenced_by": "",
    "/organization/role/leaders./organization/leadership/organization": "",
    "/base/x2010fifaworldcupsouthafrica/world_cup_squad/current_world_cup_squad./base/x2010fifaworldcupsouthafrica/current_world_cup_squad/current_club": "",
    "/film/film_subject/films": "",
    "/music/artist/track_contributions./music/track_contribution/role": "",
    "/american_football/football_team/current_roster./sports/sports_team_roster/position": "",
    "/film/film_set_designer/film_sets_designed": "",
    "/music/genre/artists": "",
    "/award/award_nominee/award_nominations./award/award_nomination/nominated_for": "",
    "/education/educational_institution/colors": "",
    "/people/person/sibling_s./people/sibling_relationship/sibling": "",
    "/music/genre/parent_genre": "",
    "/people/person/place_of_birth": "",
    "/music/instrument/family": "",
    "/film/film/genre": "",
    "/sports/sports_position/players./sports/sports_team_roster/team": "",
    "/education/educational_degree/people_with_this_degree./education/education/student": "",
    "/government/governmental_body/members./government/government_position_held/legislative_sessions": "",
    "/location/us_county/county_seat": "",
    "/film/film/country": "",
    "/sports/sports_team_location/teams": "",
    "/ice_hockey/hockey_team/current_roster./sports/sports_team_roster/position": "",
    "/media_common/netflix_genre/titles": "",
    "/film/film/film_format": "",
    "/business/business_operation/revenue./measurement_unit/dated_money_value/currency": "",
    "/olympics/olympic_sport/athletes./olympics/olympic_athlete_affiliation/olympics": "",
    "/location/statistical_region/gdp_nominal_per_capita./measurement_unit/dated_money_value/currency": "",
    "/base/popstra/celebrity/canoodled./base/popstra/canoodled/participant": "",
    "/award/award_winning_work/awards_won./award/award_honor/honored_for": "",
    "/film/film_distributor/films_distributed./film/film_film_distributor_relationship/film": "",
    "/user/ktrueman/default_domain/international_organization/member_states": "",
    "/location/location/contains": ""
}

In [26]:

outfile = open(r"workspace/graphs/fb15k237.ttl", 'a')

def read_set(path, typ):
    content = None
    with open(path) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    for line in content:
        head,rel,tail = line.split("\t")
        outfile.write(f"<<<http://g.co/kg{head}> <http://g.co/kg{rel}> <http://g.co/kg{tail}>>> obl:split obl:{typ} . " + "\n")

read_set(train_path, 'train')
read_set(test_path, 'test')
read_set(valid_path, 'valid')

outfile.close()