In [3]:
# SPARQL query preparation
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint = SPARQLWrapper("http://114.212.81.217:8890/sparql/")

In [15]:
def get_all_item_with_relations_list(r_list,r_type="temporal fact",result_path="",t_type="period"):
    relation_stat=[]
    for relation in r_list:
        # variables for statistics for each relation
        this_r_stat={}
        this_r_stat["r"]=relation
        cnt=0
        st_cnt=0
        en_cnt=0
        point_cnt=0
        st_en_pair_cnt=0
        dup_cnt=0

        # differrent query for different relation types
        if r_type=="temporal fact":
            query = '''
                PREFIX wd: <http://www.wikidata.org/entity/>
                PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                SELECT DISTINCT ?e1 ?e2 ?st ?en ?time ?s
                WHERE
                {{
                    ?e1 p:{} ?s.
                    ?s ps:{} ?e2.
                    ?s a <http://wikiba.se/ontology#Statement>.
                    ?e1 a <http://wikiba.se/ontology#Item>.

                    {{
                        ?s pq:P580 ?st.
                        OPTIONAL {{?s pq:P582 ?en.}}
                        OPTIONAL {{?s pq:P585 ?time.}}
                    }}
                    UNION
                    {{
                        ?s pq:P582 ?en.
                        OPTIONAL {{?s pq:P580 ?st.}}
                        OPTIONAL {{?s pq:P585 ?time.}}
                    }}
                    UNION
                    {{
                        ?s pq:P585 ?time.
                        OPTIONAL {{?s pq:P580 ?st.}}
                        OPTIONAL{{?s pq:P582 ?en.}}
                    }}
                }}
                '''.format(relation,relation)
        elif r_type=="time property":
            query = '''
                PREFIX wd: <http://www.wikidata.org/entity/>
                PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                SELECT DISTINCT ?e1 ?e2 ?st ?en ?s
                WHERE
                {{
                    ?e1 p:{} ?s.
                    ?s ps:{} ?e2.

                    OPTIONAL {{?s pq:P580 ?st.}}
                    OPTIONAL {{?s pq:P582 ?en.}}
                }}
                '''.format(relation,relation)
        # print(query)
        endpoint.setQuery(query)
        endpoint.setReturnFormat(JSON)
        response = endpoint.query().convert()
        results = response['results']['bindings']
        print("%s Query complete!" % relation)
        # print(results)
        # for each_result in results:
        #     print(each_result["s"]["value"])
        results.sort(key=lambda ele: ele['s']['value'])
        # print("After:")
        # for each_result in results:
        #     print(each_result["s"]["value"])

        # process query results
        query_res=[]
        for i in range(len(results)):
            each_result=results[i]
            equal2last=False
            equal2next=False
            if i>0:
                if results[i]["s"]["value"]==results[i-1]["s"]["value"]:
                    # print("duplication detected:%s" %results[i]["s"]["value"])
                    equal2last=True
            if i<(len(results)-1):
                if results[i]["s"]["value"]==results[i+1]["s"]["value"]:
                    # print("duplication detected:%s" %results[i]["s"]["value"])
                    equal2next=True
            if (equal2last or equal2next) and (t_type=="period"):
                dup_cnt+=1
                continue

            temporal_fact={}

            temporal_fact["s"]=each_result["e1"]["value"][31:]
            temporal_fact["p"]=relation

            # process time info
            # process point in time typed relation with "point in time"
            if ("time" in each_result) and (t_type=="point"):
                if each_result["time"]["type"]=="typed-literal":
                    if each_result["time"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                        print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
                    temporal_fact["t1"]=each_result["time"]["value"].split("T")[0]
                    temporal_fact["t2"]=each_result["time"]["value"].split("T")[0]
                else:
                    continue # unexcepted datatype of time found, drop it
            # process period typed relation
            elif t_type=="period":
                # first check if start time exists
                if "st" in each_result:
                    if each_result["st"]["type"]=="typed-literal":
                        if each_result["st"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                            print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
                        temporal_fact["t1"]=each_result["st"]["value"].split("T")[0]
                    else:
                        continue # unexcepted datatype of time found, drop it
                else:
                    temporal_fact["t1"]="null"

                # then check if end time exists
                if "en" in each_result:
                    if each_result["en"]["type"]=="typed-literal":
                        if each_result["en"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                            print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
                        temporal_fact["t2"]=each_result["en"]["value"].split("T")[0]
                    else:
                        continue
                else:
                    temporal_fact["t2"]="null" # unexcepted datatype of time found, drop it

                # none of start and end time exists, use point of time
                if (temporal_fact["t1"]=="null") and (temporal_fact["t2"]=="null"):
                    if "time" in each_result:
                        if each_result["time"]["type"]=="typed-literal":
                            if each_result["time"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                                print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
                            temporal_fact["t1"]=each_result["time"]["value"].split("T")[0]
                            temporal_fact["t2"]=each_result["time"]["value"].split("T")[0]
                        else:
                            continue # unexcepted datatype of time found, drop it
                    elif r_type=="temporal fact":
                        continue # point in time also not exists

            # process object(tail entity) info
            # for temporal facts
            if each_result["e2"]["type"]=="uri":
                temporal_fact["o"]=each_result["e2"]["value"][31:]
            # for literal
            elif each_result["e2"]["type"]=="literal":
                temporal_fact["o"]=each_result["e2"]["value"]
            #for time property
            elif each_result["e2"]["type"]=="typed-literal":
                if each_result["e2"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                    print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
                temporal_fact["o"]=each_result["e2"]["value"].split("T")[0]
                # if temporal_fact["t1"]=="null" and temporal_fact["t2"]=="null":
                temporal_fact["t1"]=temporal_fact["o"]
                temporal_fact["t2"]=temporal_fact["o"]
            else:
                continue # for unexcepted datatype of object

            if "st" in each_result:
                st_cnt+=1
            if "en" in each_result:
                en_cnt+=1
            if ("st" in each_result)and("en" in each_result):
                st_en_pair_cnt+=1
            if "time" in each_result:
                point_cnt+=1
            cnt+=1
            query_res.append(temporal_fact)

        this_r_stat["cnt"]=cnt
        this_r_stat["st_cnt"]=st_cnt
        this_r_stat["en_cnt"]=en_cnt
        this_r_stat["st_en_pair_ent"]=st_en_pair_cnt
        this_r_stat["point_cnt"]=point_cnt
        this_r_stat["dup_cnt"]=dup_cnt
        relation_stat.append(this_r_stat)
        f=open("%s/%sres.csv" %(result_path,relation),"w")
        for tf in query_res:
            f.write("%s,%s,%s,%s,%s\n" %(tf["s"],tf["p"],tf["o"],tf["t1"],tf["t2"]))
        f.close()
    return relation_stat

In [11]:
get_all_item_with_relations_list(["P286"],"temporal fact","raw/test/")

P286 Query complete!
duplication detected:http://www.wikidata.org/entity/statement/Q1003357-1b481d03-4b87-ff3d-2b45-9019ed48d700
duplication detected:http://www.wikidata.org/entity/statement/Q1003357-1b481d03-4b87-ff3d-2b45-9019ed48d700
duplication detected:http://www.wikidata.org/entity/statement/Q1262822-d80eefa7-4df3-d56a-ecf3-7d20c4f0618c
duplication detected:http://www.wikidata.org/entity/statement/Q1262822-d80eefa7-4df3-d56a-ecf3-7d20c4f0618c
duplication detected:http://www.wikidata.org/entity/statement/Q16201949-5c3af888-464c-58fb-b44c-056410df3eef
duplication detected:http://www.wikidata.org/entity/statement/Q16201949-5c3af888-464c-58fb-b44c-056410df3eef
duplication detected:http://www.wikidata.org/entity/statement/Q28230355-9c67a634-4fb1-07c1-a724-0706c02a9a69
duplication detected:http://www.wikidata.org/entity/statement/Q28230355-9c67a634-4fb1-07c1-a724-0706c02a9a69
duplication detected:http://www.wikidata.org/entity/statement/Q4512-2BE24BAF-E11D-4050-BF1F-68A231A5E606
duplic

[{'r': 'P286',
  'cnt': 2909,
  'st_cnt': 2829,
  'en_cnt': 1672,
  'st_en_pair_ent': 1628,
  'point_cnt': 36,
  'dup_cnt': 16}]

In [8]:
# import optional relation info
tr_file=open("property final.tsv")
line=tr_file.readline()
line=tr_file.readline()
optional_relation_list=[]
while line:
    relation=line.split('\t')[0][32:-1]
    optional_relation_list.append(relation)
    line=tr_file.readline()
# print(optional_relation_list)

In [12]:
# required relations
common_relation_list=['P26', 'P108', 'P54', 'P286']
timeobj_relation_list=['P569', 'P570']


In [16]:
# phase 1 for temporal relations in required relations (all period)
stat_info=get_all_item_with_relations_list(common_relation_list,"temporal fact","raw/required/common/")
print("Phase 1 done.")

statfilename="PHASE1_RELATION_STAT.txt"
f=open(statfilename,"w")
f.write("?relation\t?count\t?st_count\t?en_count\t?st_en_pairs_count\t?point_count\n")
for this_r_stat in stat_info:
    f.write("%s\t%d\t%d\t%d\t%d\t%d\t%d\n" %(this_r_stat["r"],this_r_stat["cnt"],this_r_stat["st_cnt"],this_r_stat["en_cnt"],this_r_stat["st_en_pair_ent"],this_r_stat["point_cnt"],this_r_stat["dup_cnt"]))
f.close()

P26 Query complete!
P108 Query complete!
P54 Query complete!
P286 Query complete!
Phase 1 done.


In [17]:
# phase 2 for time property in required relations
stat_info=get_all_item_with_relations_list(timeobj_relation_list,"time property","raw/required/time_prop")
print("Phase 2 done.")

statfilename="PHASE2_RELATION_STAT.txt"
f=open(statfilename,"w")
f.write("?relation\t?count\t?st_count\t?en_count\t?st_en_pairs_count\t?point_count\n")
for this_r_stat in stat_info:
    f.write("%s\t%d\t%d\t%d\t%d\t%d\n" %(this_r_stat["r"],this_r_stat["cnt"],this_r_stat["st_cnt"],this_r_stat["en_cnt"],this_r_stat["st_en_pair_ent"],this_r_stat["point_cnt"]))
f.close()

P569 Query complete!
P570 Query complete!
Phase 2 done.


In [6]:
# phase 3 for all optional relations (suppose they are all period)
stat_info=get_all_item_with_relations_list(optional_relation_list,"temporal fact","raw/optional/common")
print("Phase 3 done.")

statfilename="PHASE3_RELATION_STAT.txt"
f=open(statfilename,"w")
f.write("?relation\t?count\t?st_count\t?en_count\t?st_en_pairs_count\t?point_count\n")
for this_r_stat in stat_info:
    f.write("%s\t%d\t%d\t%d\t%d\t%d\n" %(this_r_stat["r"],this_r_stat["cnt"],this_r_stat["st_cnt"],this_r_stat["en_cnt"],this_r_stat["st_en_pair_ent"],this_r_stat["point_cnt"]))
f.close()

P6 Query complete!
P17 Query complete!
P19 Query complete!
P20 Query complete!
P26 Query complete!
P27 Query complete!
P30 Query complete!
P35 Query complete!
P36 Query complete!
P37 Query complete!
P38 Query complete!
P39 Query complete!
P47 Query complete!
P50 Query complete!
P53 Query complete!
P54 Query complete!
P57 Query complete!
P58 Query complete!
P69 Query complete!
P85 Query complete!
P97 Query complete!
P102 Query complete!
P108 Query complete!
P119 Query complete!
P122 Query complete!
P127 Query complete!
P131 Query complete!
P150 Query complete!
P159 Query complete!
P161 Query complete!
P166 Query complete!
P170 Query complete!
P175 Query complete!
P180 Query complete!
P190 Query complete!
P197 Query complete!
P241 Query complete!
P276 Query complete!
P286 Query complete!
P411 Query complete!
P457 Query complete!
P463 Query complete!
P512 Query complete!
P521 Query complete!
P530 Query complete!
P551 Query complete!
P610 Query complete!
P612 Query complete!
P647 Query com

In [1]:
# merge files into dataset
import os
def convert_time2value(raw_time_str):
    if raw_time_str=="null":
        return raw_time_str
    bc=""
    if raw_time_str.startswith("-"):
        bc="-"
    res=raw_time_str.replace("-","")
    return bc+res
    
dir_list=["raw/required/common/","raw/required/time_prop/"]
r_list=[]
dataset_name="required_relations"
dataset_version="alpha-3"
merged_f=open(dataset_name+"_"+dataset_version+".tsv","w")
for each_dir in dir_list:
    all_obj=os.listdir(each_dir)
    for each_obj in all_obj:
        if each_obj.endswith("res.csv"):
            read_f=open(each_dir+each_obj,"r")
            file_content=read_f.readlines()
            duplicate_chk_set=set()
            for eachline in file_content:
                line_info=eachline.strip().split(",")
                if eachline in duplicate_chk_set:
                    # print("Duplicate line found:%s" % eachline)
                    continue
                if line_info[3]!="null" and line_info[4]!="null":
                    st_bc=line_info[3].startswith("-")
                    st_t=line_info[3]
                    en_bc=line_info[4].startswith("-")
                    en_t=line_info[4]
                    if ((not (st_bc and en_bc)) and st_t>en_t) or (st_bc and en_bc and st_t<en_t):
                        print("Time inversion found:%s" %eachline)
                duplicate_chk_set.add(eachline)
                merged_f.write("<%s>\t<%s>\t<%s>\t<%s>\t<%s>\t<true>\t<1>\n" %(line_info[0],line_info[1],line_info[2],convert_time2value(line_info[3]),convert_time2value(line_info[4])))
            read_f.close()
            print(each_dir+each_obj)
            print(len(duplicate_chk_set))
merged_f.close()


Time inversion found:Q11091470,P26,Q504372,1927-01-01,0019-01-01

Time inversion found:Q11881,P26,Q234525,1844-06-26,1821-03-03

Time inversion found:Q13405913,P26,Q64086,1710-09-12,1710-02-27

Time inversion found:Q154691,P26,Q7517008,1976-01-01,0001-01-01

Time inversion found:Q15993800,P26,Q15993882,1944-01-01,1902-01-01

Time inversion found:Q208229,P26,Q466286,2006-11-18,2000-01-01

Time inversion found:Q20977780,P26,Q74042,1959-03-07,1959-01-01

Time inversion found:Q21207211,P26,Q651948,1396-01-01,1395-07-12

Time inversion found:Q21549628,P26,Q354863,1906-02-01,1906-01-01

Time inversion found:Q22084549,P26,Q1911638,1963-01-11,1963-01-01

Time inversion found:Q22234426,P26,Q543182,1921-01-01,0127-01-01

Time inversion found:Q234525,P26,Q11881,1844-06-26,1821-03-03

Time inversion found:Q241966,P26,Q183245,1864-02-20,1864-01-01

Time inversion found:Q260378,P26,Q6129886,1965-01-01,1876-01-01

Time inversion found:Q266715,P26,Q712793,1862-01-01,1807-11-16

Time inversion found:Q2

In [None]:
#test temporal fact query sparql
relation="P1789"
query = '''
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    SELECT DISTINCT ?e1 ?e2 ?st ?en
    WHERE
    {{
        ?e1 p:{} ?s.
        ?s ps:{} ?e2.

        {{?s pq:P580 ?st.
        OPTIONAL {{?s pq:P582 ?en.}}}}
        UNION
        {{?s pq:P582 ?en.
        OPTIONAL {{?s pq:P580 ?st.}}}}
    }}
    '''.format(relation,relation)
# print(query)
endpoint.setQuery(query)
endpoint.setReturnFormat(JSON)
response = endpoint.query().convert()
results = response['results']['bindings']
print(results)
query_res=[]
print("%s Query complete!" % relation)
for each_result in results:
    temporal_fact={}
    temporal_fact["s"]=each_result["e1"]["value"][31:]
    temporal_fact["p"]=relation
    temporal_fact["o"]=each_result["e2"]["value"][31:]
    
    if "st" in each_result:
        if each_result["st"]["type"]=="typed-literal":
            if each_result["st"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
            temporal_fact["t1"]=each_result["st"]["value"].split("T")[0]
        else:
            continue
    else:
        temporal_fact["t1"]="null"
    if "en" in each_result:
        if each_result["en"]["type"]=="typed-literal":
            if each_result["en"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
            temporal_fact["t2"]=each_result["en"]["value"].split("T")[0]
        else:
            continue
    else:
        temporal_fact["t2"]="null"
    query_res.append(temporal_fact)

# f=open("%sres.csv" %relation,"w")
# for tf in query_res:
#     f.write("%s,%s,%s,%s,%s\n" %(tf["s"],tf["p"],tf["o"],tf["t1"],tf["t2"]))
# f.close()
# print(query_res)


In [None]:
#test time property query sparql
relation="P569"
query = '''
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT  ?e1 \"{}\" ?time
        WHERE
        {{
            ?e1 wdt:{} ?time.
        }}
    '''.format(relation,relation)
# print(query)
endpoint.setQuery(query)
endpoint.setReturnFormat(JSON)
response = endpoint.query().convert()
results = response['results']['bindings']
print(results)
query_res=[]
print("%s Query complete!" % relation)
for each_result in results:
    if "literal" in each_result["time"]["type"]:
        temporal_fact={}
        temporal_fact["s"]=each_result["e1"]["value"][31:]
        temporal_fact["p"]=each_result["callret-1"]["value"]
        temporal_fact["o"]=each_result["time"]["value"]
        temporal_fact["t1"]=each_result["time"]["value"]
        temporal_fact["t2"]=each_result["time"]["value"]
        query_res.append(temporal_fact)
# f=open("%sres.csv" %relation,"w")
# for tf in query_res:
#     f.write("%s,%s,%s,%s,%s\n" %(tf["s"],tf["p"],tf["o"],tf["t1"],tf["t2"]))
# f.close()
# print(query_res)

In [None]:
# multi-value stat
relation_list=optional_relation_list#["P26","P54","P108","P286","P166"]
property_list=["P580","P582","P585"]
relation=relation_list[0]
time_property=property_list[0]
for relation in relation_list:
    print(relation,end="")
    for time_property in property_list:
        query = '''
                PREFIX wd: <http://www.wikidata.org/entity/>
                PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                SELECT ?e1 ?e2 ?s COUNT(?e2)
                WHERE
                {{
                    ?e1 p:{} ?s.
                    ?s ps:{} ?e2.
                    ?s a <http://wikiba.se/ontology#Statement>.
                    ?s pq:{} ?timevalue.
                }}
                GROUP BY ?e1 ?e2 ?s 
                HAVING (COUNT(?e2)>1)
            '''.format(relation,relation,time_property)
        # print(query)
        endpoint.setQuery(query)
        endpoint.setReturnFormat(JSON)
        response = endpoint.query().convert()
        results = response['results']['bindings']
        # print(results[0])
        s_cnt=len(results)
        cnt=0
        for each_data in results:
            cnt=cnt+int(each_data['callret-3']['value'])
        print("\t%d(%d statements)" %(cnt,s_cnt),end="")
    print("")

In [9]:
# prefix limit check
relation_list=optional_relation_list
for relation in relation_list:
    query = '''
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            SELECT COUNT DISTINCT ?e1
            WHERE
            {{
                ?e1 a <http://wikiba.se/ontology#Item>.
                ?s a <http://wikiba.se/ontology#Statement>.
                ?e1 p:{} ?s.
            }}
        '''.format(relation)
    # print(query)
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    response = endpoint.query().convert()
    prefix_limit_results=response['results']['bindings']
    # prefix_limit_e1=set([str(i['e1']) for i in prefix_limit_results])
    prefix_limit_cnt = int(prefix_limit_results[0]['callret-0']['value'])
    # print(prefix_limit_cnt)

    query = '''
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            SELECT COUNT DISTINCT ?e1
            WHERE
            {{
                ?e1 a <http://wikiba.se/ontology#Property>.
                ?s a <http://wikiba.se/ontology#Statement>.
                ?e1 p:{} ?s.
            }}
        '''.format(relation)
    # print(query)
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    response = endpoint.query().convert()
    p_prefix_limit_results=response['results']['bindings']
    # prefix_limit_e1=set([str(i['e1']) for i in prefix_limit_results])
    p_prefix_limit_cnt = int(p_prefix_limit_results[0]['callret-0']['value'])

    query = '''
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            SELECT COUNT DISTINCT ?e1
            WHERE
            {{
                ?s a <http://wikiba.se/ontology#Statement>.
                ?e1 p:{} ?s.
            }}
        '''.format(relation)
    # print(query)
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    response = endpoint.query().convert()
    all_results=response['results']['bindings']
    # all_e1=set([str(i['e1']) for i in all_results])
    all_cnt = int(all_results[0]['callret-0']['value'])
    # print(all_cnt)
    if prefix_limit_cnt==all_cnt:
        print("%s\tEQUAL\t%d" %(relation,all_cnt))
    else:
        print("%s\tINEQUAL\t%d\t%d\t%d\t%s" %(relation,prefix_limit_cnt,p_prefix_limit_cnt,all_cnt,prefix_limit_cnt+p_prefix_limit_cnt==all_cnt))
        # print(all_e1-prefix_limit_e1)

P6	EQUAL	19421
P17	INEQUAL	10407109	2013	10409122	True
P19	EQUAL	2117834
P20	EQUAL	810813
P26	EQUAL	105536
P27	EQUAL	2901998
P30	EQUAL	46550
P35	EQUAL	707
P36	EQUAL	78797
P37	EQUAL	5471
P38	EQUAL	1346
P39	INEQUAL	428767	1	428768	True
P47	EQUAL	113695
P50	INEQUAL	3500999	1	3501000	True
P53	EQUAL	31470
P54	EQUAL	358050
P57	EQUAL	232178
P58	EQUAL	113358
P69	EQUAL	789376
P85	EQUAL	633
P97	EQUAL	35313
P102	EQUAL	298893
P108	EQUAL	382068
P119	EQUAL	136288
P122	EQUAL	1226
P127	INEQUAL	163318	5	163323	True
P131	INEQUAL	7439458	2	7439460	True
P150	EQUAL	76063
P159	EQUAL	220290
P161	EQUAL	180664
P166	EQUAL	391292
P170	INEQUAL	469240	3	469243	True
P175	EQUAL	304935
P180	INEQUAL	91325	1	91326	True
P190	EQUAL	15641
P197	EQUAL	53084
P241	EQUAL	65925
P276	INEQUAL	989741	2	989743	True
P286	EQUAL	7463
P411	EQUAL	8481
P457	EQUAL	403
P463	EQUAL	176045
P512	EQUAL	67940
P521	EQUAL	54
P530	EQUAL	259
P551	EQUAL	67662
P610	EQUAL	4076
P612	EQUAL	258
P647	EQUAL	11205
P669	EQUAL	68933
P708	EQUAL	55882
P710	EQUAL

In [9]:
# check cnt>=100w
relation_list=optional_relation_list
for relation in relation_list:
    query = '''
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT COUNT DISTINCT ?e1 ?e2 ?st ?en ?time ?s
        WHERE
        {{
            ?e1 p:{} ?s.
            ?s ps:{} ?e2.
            ?s a <http://wikiba.se/ontology#Statement>.
            ?e1 a <http://wikiba.se/ontology#Item>.

            {{
                ?s pq:P580 ?st.
                OPTIONAL {{?s pq:P582 ?en.}}
                OPTIONAL {{?s pq:P585 ?time.}}
            }}
            UNION
            {{
                ?s pq:P582 ?en.
                OPTIONAL {{?s pq:P580 ?st.}}
                OPTIONAL {{?s pq:P585 ?time.}}
            }}
            UNION
            {{
                ?s pq:P585 ?time.
                OPTIONAL {{?s pq:P580 ?st.}}
                OPTIONAL{{?s pq:P582 ?en.}}
            }}
        }}
        '''.format(relation,relation)
    # print(query)
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    response = endpoint.query().convert()
    results=response['results']['bindings']
    print("%s\t%s"%(relation,results[0]['callret-0']['value']))

P6	11568
P17	12350
P19	12
P20	7
P26	33714
P27	30871
P30	6
P35	741
P36	990
P37	69
P38	273
P39	222423
P47	1051
P50	59
P53	17
P54	920828
P57	90
P58	52
P69	51260
P85	46
P97	2207
P102	12791
P108	201110
P119	4092
P122	76
P127	32331
P131	70124
P150	11059
P159	1839
P161	407
P166	196132
P170	77
P175	351
P180	80
P190	6401
P197	1173
P241	2073
P276	4870
P286	2930
P411	479
P457	29
P463	27504
P512	10376
P521	10
P530	509
P551	21143
P610	6
P612	90
P647	155
P669	157
P708	286
P710	2590
P725	18
P726	60
P750	41
P802	69
P803	279
P859	186
P1038	59
P1066	230
P1075	876
P1344	2255
P1366	1269
P1399	833
P1411	34572
P1433	308
P1435	366834
P1448	24575
P1596	113
P1640	50
P1652	58
P1789	10
P1891	989
P2238	8
P2291	438
P2567	12
P2568	5110
P2632	543
P2652	8
P2828	7
P2838	46
P2962	14898
P3085	8
P3148	4
P3300	67
P3342	61
P3448	4
P3919	51
P4002	6751
P4675	0
P5030	0
P5460	5
P5817	1
P6087	118
P6364	72
P6872	0
P7047	0
P7152	0
P7779	0
P7936	0
P8047	0
P8138	0
P8413	0
P8764	0
P8791	0
P8839	0
P8852	0
P8938	0


In [10]:
# check cnt>=100w
relation_list=["P569","P570"]
for relation in relation_list:
    query = '''
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT COUNT DISTINCT ?e1 ?e2 ?st ?en ?s
        WHERE
        {{
            ?e1 p:{} ?s.
            ?s ps:{} ?e2.

            OPTIONAL {{?s pq:P580 ?st.}}
            OPTIONAL {{?s pq:P582 ?en.}}
        }}
        '''.format(relation,relation)
    # print(query)
    endpoint.setQuery(query)
    endpoint.setReturnFormat(JSON)
    response = endpoint.query().convert()
    results=response['results']['bindings']
    print("%s\t%s"%(relation,results[0]['callret-0']['value']))

P569	3483802
P570	1726827
