In [43]:
# SPARQL query preparation
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint = SPARQLWrapper("http://114.212.81.217:8890/sparql/")
wd_prefix="http://www.wikidata.org/entity/"
node_prefix="nodeID://"

In [1]:
import pyodbc
def initialize_odbc_connection():
    global odbc_conn
    odbc_conn = pyodbc.connect(
        'DRIVER=/usr/local/lib/virtodbc.so;Host=114.212.81.217:1111;UID=dba;PWD=dba'
    )
    odbc_conn.setdecoding(pyodbc.SQL_CHAR, encoding='utf8')
    odbc_conn.setdecoding(pyodbc.SQL_WCHAR, encoding='utf8')
    odbc_conn.setencoding(encoding='utf8')
    print('Wikidata Virtuoso ODBC connected')

def query_with_odbc(query):
    try:
        with odbc_conn.cursor() as cursor:
            cursor.execute(query)
            rows = cursor.fetchall()
    except Exception as err:
        print(err)
        print(f"Query Execution Failed:{query}")
        initialize_odbc_connection()
        return []    
    return rows

initialize_odbc_connection()

Wikidata Virtuoso ODBC connected


In [37]:
def convert_time2value(raw_time_str):
    if raw_time_str=="null" or raw_time_str=="None":
        return "null"
    bc=""
    res=raw_time_str
    if res.startswith("-"):
        bc="-"
        res=res[1:]
    if res.find(':')!=-1:
        res=res[:(res.find(':')-2)].strip().strip('T').strip('Z')
    date_split_list=res.split('-')
    while len(date_split_list)<3:
        date_split_list.append("01")

    if int(date_split_list[0])==0:
        bc=""
    if int(date_split_list[1])<1 or int(date_split_list[1])>12:
        date_split_list[1]="01"
    if int(date_split_list[2])<1 or int(date_split_list[2])>31:
        date_split_list[2]="01"


    return bc+date_split_list[0]+date_split_list[1]+date_split_list[2]

def get_all_item_with_relations_list(r_dict,r_type="temporal fact",result_path=""):
    relation_stat=[]
    for relation in r_dict.keys():
        # variables for statistics for each relation
        this_r_stat={}
        this_r_stat["r"]=relation
        cnt=0
        st_cnt=0
        en_cnt=0
        point_cnt=0
        st_en_pair_cnt=0
        dup_cnt=0
        unk_cnt=0

        # differrent query for different relation types
        if r_type=="temporal fact":
            query = '''
                SPARQL
                PREFIX wd: <http://www.wikidata.org/entity/>
                PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                SELECT DISTINCT ?s ?e1 ?e2 ?st ?en ?time
                WHERE
                {{
                    ?e1 p:{} ?s.
                    ?s ps:{} ?e2.
                    ?s a <http://wikiba.se/ontology#Statement>.
                    ?e1 a <http://wikiba.se/ontology#Item>.

                    {{
                        ?s pq:P580 ?st.
                        OPTIONAL {{?s pq:P582 ?en.}}
                        OPTIONAL {{?s pq:P585 ?time.}}
                    }}
                    UNION
                    {{
                        ?s pq:P582 ?en.
                        OPTIONAL {{?s pq:P580 ?st.}}
                        OPTIONAL {{?s pq:P585 ?time.}}
                    }}
                    UNION
                    {{
                        ?s pq:P585 ?time.
                        OPTIONAL {{?s pq:P580 ?st.}}
                        OPTIONAL{{?s pq:P582 ?en.}}
                    }}
                }}
                '''.format(relation,relation)
        elif r_type=="time property":
            query = '''
                SPARQL
                PREFIX wd: <http://www.wikidata.org/entity/>
                PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                SELECT DISTINCT ?s ?e1 ?e2 ?st ?en ?time
                WHERE
                {{
                    ?e1 p:{} ?s.
                    ?s ps:{} ?e2.

                    OPTIONAL {{?s pq:P580 ?st.}}
                    OPTIONAL {{?s pq:P582 ?en.}}
                    OPTIONAL {{?s pq:P585 ?time.}}
                }}
                '''.format(relation,relation)
        # print(query)

        results=query_with_odbc(query)
        print("%s Query complete! count:%d" % (relation,len(results)))
        results.sort(key=lambda ele: ele[0])

        # process query results
        query_res=[]
        for i in range(len(results)):
            # unknown value check
            each_result=results[i]
            unkv_exists=False
            for ele in each_result:
                if (str(ele)).startswith(node_prefix):
                    unkv_exists=True
                    break
            if unkv_exists:
                unk_cnt+=1
                # if unk_cnt<=5:
                #     print("UNK Warning!%s" %(str(each_result)))                
                continue

            # dup check
            equal2last=False
            equal2next=False
            if i>0:
                if results[i][0]==results[i-1][0]:
                    # print("duplication detected:%s" %results[i][0])
                    equal2last=True
            if i<(len(results)-1):
                if results[i][0]==results[i+1][0]:
                    # print("duplication detected:%s" %results[i][0])
                    equal2next=True
            if (equal2last or equal2next) and (r_dict[relation]=="period") and (r_type=="temporal fact"):
                dup_cnt+=1
                # print("DUP Alert!%s" %(str(each_result)))
                continue

            temporal_fact={}

            if each_result[1].startswith(wd_prefix):
                temporal_fact["s"]=each_result[1][31:]
            else:
                print("S NOT ENTITY Alert!%s" %(str(each_result)))
                continue
            
            temporal_fact["p"]=relation

            if r_type=="time property":
                temporal_fact["o"] = str(each_result[2])
                temporal_fact["t1"] = convert_time2value(str(each_result[2]))
                temporal_fact["t2"] = convert_time2value(str(each_result[2]))
            elif r_type=="temporal fact":
                if(each_result[3]==None and each_result[4]==None and each_result[5]==None):
                    print("ALL TIME NULL Alert!%s" %(str(each_result)))
                    continue
                if each_result[2].startswith(wd_prefix):
                    temporal_fact["o"]=each_result[2][31:]
                else:
                    temporal_fact["o"]=each_result[2]
                if (r_dict[relation]=="point" and each_result[5]==None) or (r_dict[relation]=="period" and (each_result[3]!=None or each_result[4]!=None)):
                    temporal_fact["t1"] = convert_time2value(str(each_result[3]))
                    temporal_fact["t2"] = convert_time2value(str(each_result[4]))
                elif (r_dict[relation]=="point" and each_result[5]!=None) or (r_dict[relation]=="period" and each_result[3]==None and each_result[4]==None):
                    temporal_fact["t1"] = convert_time2value(str(each_result[5]))
                    temporal_fact["t2"] = convert_time2value(str(each_result[5]))

            if each_result[3]!=None:
                st_cnt+=1
            if each_result[4]!=None:
                en_cnt+=1
            if (each_result[3]!=None)and(each_result[4]!=None):
                st_en_pair_cnt+=1
            if each_result[5]!=None:
                point_cnt+=1
            cnt+=1
            query_res.append(temporal_fact)

        this_r_stat["cnt"]=cnt
        this_r_stat["st_cnt"]=st_cnt
        this_r_stat["en_cnt"]=en_cnt
        this_r_stat["st_en_pair_ent"]=st_en_pair_cnt
        this_r_stat["point_cnt"]=point_cnt
        this_r_stat["dup_cnt"]=dup_cnt
        this_r_stat["unk_cnt"]=unk_cnt
        relation_stat.append(this_r_stat)
        f=open("%s/%sres.tsv" %(result_path,relation),"w")
        for tf in query_res:
            f.write("%s\t%s\t%s\t%s\t%s\n" %(tf["s"],tf["p"],tf["o"],tf["t1"],tf["t2"]))
        f.close()
    return relation_stat

In [None]:
# get_all_item_with_relations_list(["P26"],"temporal fact","raw/test/")
get_all_item_with_relations_list({"P569":"point"},"time property","raw/test/")

In [39]:
# import optional relation info
tr_file=open("property_type.tsv")
line=tr_file.readline()
line=tr_file.readline()
optional_relation_dict={}
while line:
    line_info=line.strip().split('\t')
    relation=line_info[0]
    time_type=line_info[1]
    optional_relation_dict[relation]=time_type
    line=tr_file.readline()
print(optional_relation_dict)
tr_file.close()

{'P6': 'period', 'P17': 'period', 'P19': 'point', 'P20': 'point', 'P26': 'period', 'P27': 'period', 'P30': 'period', 'P35': 'period', 'P36': 'period', 'P37': 'period', 'P38': 'period', 'P39': 'period', 'P47': 'period', 'P50': 'period', 'P53': 'period', 'P54': 'period', 'P57': 'period', 'P58': 'period', 'P69': 'period', 'P85': 'period', 'P97': 'period', 'P102': 'period', 'P108': 'period', 'P119': 'period', 'P122': 'period', 'P127': 'period', 'P131': 'period', 'P150': 'period', 'P159': 'period', 'P161': 'period', 'P166': 'point', 'P170': 'point', 'P175': 'point', 'P180': 'period', 'P190': 'period', 'P197': 'period', 'P241': 'period', 'P276': 'period', 'P286': 'period', 'P411': 'point', 'P457': 'point', 'P463': 'period', 'P512': 'point', 'P521': 'period', 'P530': 'period', 'P551': 'point', 'P610': 'period', 'P612': 'period', 'P647': 'point', 'P669': 'period', 'P708': 'period', 'P710': 'point', 'P725': 'period', 'P726': 'period', 'P750': 'period', 'P802': 'period', 'P803': 'period', 'P859'

In [41]:
# required relations
common_relation_dict={'P26':'period', 'P108':'period', 'P54':'period', 'P286':'period'}
timeobj_relation_dict={'P569':'point', 'P570':'point'}


In [28]:
# phase 1 for temporal relations in required relations (all period)
stat_info=get_all_item_with_relations_list(common_relation_dict,"temporal fact","raw/required/common/")
print("Phase 1 done.")

statfilename="PHASE1_RELATION_STAT.txt"
f=open(statfilename,"w")
f.write("?relation\t?count\t?st_count\t?en_count\t?st_en_pairs_count\t?point_count\t?dup_count\n")
for this_r_stat in stat_info:
    f.write("%s\t%d\t%d\t%d\t%d\t%d\t%d\n" %(this_r_stat["r"],this_r_stat["cnt"],this_r_stat["st_cnt"],this_r_stat["en_cnt"],this_r_stat["st_en_pair_ent"],this_r_stat["point_cnt"],this_r_stat["dup_cnt"]))
f.close()

P26 Query complete! count:33857
P108 Query complete! count:201115
P54 Query complete! count:920902
P286 Query complete! count:2930
Phase 1 done.


In [44]:
# phase 2 for time property in required relations
stat_info=get_all_item_with_relations_list(timeobj_relation_dict,"time property","raw/required/time_prop")
print("Phase 2 done.")

statfilename="PHASE2_RELATION_STAT.txt"
f=open(statfilename,"w")
f.write("?relation\t?count\t?st_count\t?en_count\t?st_en_pairs_count\t?point_count\t?dup_count\n")
for this_r_stat in stat_info:
    f.write("%s\t%d\t%d\t%d\t%d\t%d\t%d\n" %(this_r_stat["r"],this_r_stat["cnt"],this_r_stat["st_cnt"],this_r_stat["en_cnt"],this_r_stat["st_en_pair_ent"],this_r_stat["point_cnt"],this_r_stat["dup_cnt"]))
f.close()

P569 Query complete! count:3483802
P570 Query complete! count:1726827
Phase 2 done.


In [45]:
# phase 3 for all optional relations 
stat_info=get_all_item_with_relations_list(optional_relation_dict,"temporal fact","raw/optional/common")
print("Phase 3 done.")

statfilename="PHASE3_RELATION_STAT.txt"
f=open(statfilename,"w")
f.write("?relation\t?count\t?st_count\t?en_count\t?st_en_pairs_count\t?point_count\n")
for this_r_stat in stat_info:
    f.write("%s\t%d\t%d\t%d\t%d\t%d\n" %(this_r_stat["r"],this_r_stat["cnt"],this_r_stat["st_cnt"],this_r_stat["en_cnt"],this_r_stat["st_en_pair_ent"],this_r_stat["point_cnt"]))
f.close()

P6 Query complete! count:11571
P17 Query complete! count:12367
P19 Query complete! count:12
P20 Query complete! count:7
P26 Query complete! count:33857
P27 Query complete! count:30873
P30 Query complete! count:6
P35 Query complete! count:813
P36 Query complete! count:990
P37 Query complete! count:69
P38 Query complete! count:273
P39 Query complete! count:222472
P47 Query complete! count:1051
P50 Query complete! count:59
P53 Query complete! count:17
P54 Query complete! count:920902
P57 Query complete! count:90
P58 Query complete! count:52
P69 Query complete! count:51271
P85 Query complete! count:46
P97 Query complete! count:2217
P102 Query complete! count:12803
P108 Query complete! count:201115
P119 Query complete! count:4092
P122 Query complete! count:76
P127 Query complete! count:32333
P131 Query complete! count:70146
P150 Query complete! count:11059
P159 Query complete! count:1839
P161 Query complete! count:407
P166 Query complete! count:196132
P170 Query complete! count:77
P175 Quer

In [46]:
# merge files into dataset
import os
def convert_time2value(raw_time_str):
    if raw_time_str=="null":
        return raw_time_str
    bc=""
    if raw_time_str.startswith("-"):
        bc="-"
    res=raw_time_str.replace("-","").strip("T")
    if res.endswith("Z"):
        print("time format error:"+bc+res)
    return bc+res
    
dir_list=["raw/optional/common/","raw/required/time_prop/","raw/redundant/"]
r_list=[]
dataset_name="all_relations_with_redundant_wikidata"
dataset_version="alpha-1.3"
merged_f=open(dataset_name+"_"+dataset_version+".tsv","w")
for each_dir in dir_list:
    all_obj=os.listdir(each_dir)
    for each_obj in all_obj:
        dup_cnt=0
        if each_obj.endswith("res.tsv"):
            read_f=open(each_dir+each_obj,"r")
            file_content=read_f.readlines()
            duplicate_chk_set=set()
            for eachline in file_content:
                line_info=eachline.strip().split("\t")
                if eachline in duplicate_chk_set:
                    dup_cnt+=1
                    if dup_cnt<=10:
                        print("Duplicate line found:%s" % eachline)
                    continue
                if line_info[3]!="null" and line_info[4]!="null":
                    st_bc=line_info[3].startswith("-")
                    st_t=line_info[3]
                    en_bc=line_info[4].startswith("-")
                    en_t=line_info[4]
                    if ((not (st_bc and en_bc)) and st_t>en_t) or (st_bc and en_bc and st_t<en_t):
                        print("Time inversion found:%s" %eachline)
                duplicate_chk_set.add(eachline)
                merged_f.write("%s\t%s\t%s\t%s\t%s\n" %(line_info[0],line_info[1],line_info[2],convert_time2value(line_info[3]),convert_time2value(line_info[4])))
            read_f.close()
            print(each_dir+each_obj)
            print(len(duplicate_chk_set))
merged_f.close()


Time inversion found:Q103572	P17	Q723118	18050101	18020101

Time inversion found:Q103572	P17	Q223936	18150101	18050101

Time inversion found:Q1143949	P17	Q34266	18220721	17961031

Time inversion found:Q1540322	P17	Q15180	19221230	19200302

Time inversion found:Q156593	P17	Q36	19181111	19180916

Time inversion found:Q2621007	P17	Q49683	13000101	12600101

Time inversion found:Q262472	P17	Q655621	17110430	15700101

Time inversion found:Q316841	P17	Q459780	18780713	18780712

Time inversion found:Q4051948	P17	Q7318	19420620	19420113

Time inversion found:Q405351	P17	Q219	18780712	18780303

Time inversion found:Q40811	P17	Q15180	19921229	19911226

Time inversion found:Q4150091	P17	Q15180	19320101	19310101

Time inversion found:Q423946	P17	Q36	19891229	19891228

Time inversion found:Q490058	P17	Q142	19010209	18140101

Time inversion found:Q727	P17	Q596214	19450508	19400519

raw/optional/common/P17res.tsv
12235
raw/optional/common/P2568res.tsv
5110
raw/optional/common/P725res.tsv
18
Time inver

In [None]:
#test temporal fact query sparql
relation="P1789"
query = '''
    SPARQL
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    SELECT DISTINCT ?e1 ?e2 ?st ?en
    WHERE
    {{
        ?e1 p:{} ?s.
        ?s ps:{} ?e2.

        {{?s pq:P580 ?st.
        OPTIONAL {{?s pq:P582 ?en.}}}}
        UNION
        {{?s pq:P582 ?en.
        OPTIONAL {{?s pq:P580 ?st.}}}}
    }}
    '''.format(relation,relation)
# print(query)
results=query_with_odbc(query)

print(results)
query_res=[]
print("%s Query complete!" % relation)
for each_result in results:
    temporal_fact={}
    temporal_fact["s"]=each_result["e1"]["value"][31:]
    temporal_fact["p"]=relation
    temporal_fact["o"]=each_result["e2"]["value"][31:]
    
    if "st" in each_result:
        if each_result["st"]["type"]=="typed-literal":
            if each_result["st"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
            temporal_fact["t1"]=each_result["st"]["value"].split("T")[0]
        else:
            continue
    else:
        temporal_fact["t1"]="null"
    if "en" in each_result:
        if each_result["en"]["type"]=="typed-literal":
            if each_result["en"]["datatype"]!="http://www.w3.org/2001/XMLSchema#dateTime":
                print("DatatypeAlert!type:%s",each_result["en"]["datatype"])
            temporal_fact["t2"]=each_result["en"]["value"].split("T")[0]
        else:
            continue
    else:
        temporal_fact["t2"]="null"
    query_res.append(temporal_fact)

# f=open("%sres.csv" %relation,"w")
# for tf in query_res:
#     f.write("%s,%s,%s,%s,%s\n" %(tf["s"],tf["p"],tf["o"],tf["t1"],tf["t2"]))
# f.close()
# print(query_res)


In [None]:
#test time property query sparql
relation="P569"
query = '''
        SPARQL
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT  ?e1 \"{}\" ?time
        WHERE
        {{
            ?e1 wdt:{} ?time.
        }}
    '''.format(relation,relation)
# print(query)
results=query_with_odbc(query)
print(results)
query_res=[]
print("%s Query complete!" % relation)
for each_result in results:
    if "literal" in each_result["time"]["type"]:
        temporal_fact={}
        temporal_fact["s"]=each_result["e1"]["value"][31:]
        temporal_fact["p"]=each_result["callret-1"]["value"]
        temporal_fact["o"]=each_result["time"]["value"]
        temporal_fact["t1"]=each_result["time"]["value"]
        temporal_fact["t2"]=each_result["time"]["value"]
        query_res.append(temporal_fact)
# f=open("%sres.csv" %relation,"w")
# for tf in query_res:
#     f.write("%s,%s,%s,%s,%s\n" %(tf["s"],tf["p"],tf["o"],tf["t1"],tf["t2"]))
# f.close()
# print(query_res)

In [None]:
# multi-value stat
relation_list=optional_relation_dict.keys()#["P26","P54","P108","P286","P166"]
property_list=["P580","P582","P585"]
relation=relation_list[0]
time_property=property_list[0]
for relation in relation_list:
    print(relation,end="")
    for time_property in property_list:
        query = '''
                SPARQL
                PREFIX wd: <http://www.wikidata.org/entity/>
                PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                SELECT ?e1 ?e2 ?s COUNT(?e2)
                WHERE
                {{
                    ?e1 p:{} ?s.
                    ?s ps:{} ?e2.
                    ?s a <http://wikiba.se/ontology#Statement>.
                    ?s pq:{} ?timevalue.
                }}
                GROUP BY ?e1 ?e2 ?s 
                HAVING (COUNT(?e2)>1)
            '''.format(relation,relation,time_property)
        # print(query)
        results=query_with_odbc(query)
        
        # print(results[0])
        s_cnt=len(results)
        cnt=0
        for each_data in results:
            cnt=cnt+int(each_data['callret-3']['value'])
        print("\t%d(%d statements)" %(cnt,s_cnt),end="")
    print("")

In [None]:
# prefix limit check
relation_list=optional_relation_dict.keys()
for relation in relation_list:
    query = '''
            SPARQL
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            SELECT COUNT DISTINCT ?e1
            WHERE
            {{
                ?e1 a <http://wikiba.se/ontology#Item>.
                ?s a <http://wikiba.se/ontology#Statement>.
                ?e1 p:{} ?s.
            }}
        '''.format(relation)
    # print(query)
    prefix_limit_results=query_with_odbc(query)
    # prefix_limit_e1=set([str(i['e1']) for i in prefix_limit_results])
    prefix_limit_cnt = int(prefix_limit_results[0]['callret-0']['value'])
    # print(prefix_limit_cnt)

    query = '''
            SPARQL
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            SELECT COUNT DISTINCT ?e1
            WHERE
            {{
                ?e1 a <http://wikiba.se/ontology#Property>.
                ?s a <http://wikiba.se/ontology#Statement>.
                ?e1 p:{} ?s.
            }}
        '''.format(relation)
    # print(query)
    p_prefix_limit_results=query_with_odbc(query)
    # prefix_limit_e1=set([str(i['e1']) for i in prefix_limit_results])
    p_prefix_limit_cnt = int(p_prefix_limit_results[0]['callret-0']['value'])

    query = '''
            SPARQL
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            SELECT COUNT DISTINCT ?e1
            WHERE
            {{
                ?s a <http://wikiba.se/ontology#Statement>.
                ?e1 p:{} ?s.
            }}
        '''.format(relation)
    # print(query)
    all_results=query_with_odbc(query)
    # all_e1=set([str(i['e1']) for i in all_results])
    all_cnt = int(all_results[0]['callret-0']['value'])
    # print(all_cnt)
    if prefix_limit_cnt==all_cnt:
        print("%s\tEQUAL\t%d" %(relation,all_cnt))
    else:
        print("%s\tINEQUAL\t%d\t%d\t%d\t%s" %(relation,prefix_limit_cnt,p_prefix_limit_cnt,all_cnt,prefix_limit_cnt+p_prefix_limit_cnt==all_cnt))
        # print(all_e1-prefix_limit_e1)

In [None]:
# check cnt>=100w
relation_list=optional_relation_dict.keys()
for relation in relation_list:
    query = '''
        SPARQL
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT COUNT DISTINCT ?e1 ?e2 ?st ?en ?time ?s
        WHERE
        {{
            ?e1 p:{} ?s.
            ?s ps:{} ?e2.
            ?s a <http://wikiba.se/ontology#Statement>.
            ?e1 a <http://wikiba.se/ontology#Item>.

            {{
                ?s pq:P580 ?st.
                OPTIONAL {{?s pq:P582 ?en.}}
                OPTIONAL {{?s pq:P585 ?time.}}
            }}
            UNION
            {{
                ?s pq:P582 ?en.
                OPTIONAL {{?s pq:P580 ?st.}}
                OPTIONAL {{?s pq:P585 ?time.}}
            }}
            UNION
            {{
                ?s pq:P585 ?time.
                OPTIONAL {{?s pq:P580 ?st.}}
                OPTIONAL {{?s pq:P582 ?en.}}
            }}
        }}
        '''.format(relation,relation)
    # print(query)
    results==query_with_odbc(query)
    print("%s\t%s"%(relation,results[0]['callret-0']['value']))

In [None]:
# check cnt>=100w
relation_list=["P569","P570"]
for relation in relation_list:
    query = '''
        SPARQL
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT COUNT DISTINCT ?e1 ?e2 ?st ?en ?s
        WHERE
        {{
            ?e1 p:{} ?s.
            ?s ps:{} ?e2.

            OPTIONAL {{?s pq:P580 ?st.}}
            OPTIONAL {{?s pq:P582 ?en.}}
        }}
        '''.format(relation,relation)
    # print(query)
    results==query_with_odbc(query)
    print("%s\t%s"%(relation,results[0]['callret-0']['value']))

SELECT ?typ ?typLabel
WHERE
{
  wd:P1435 (wdt:P1647|wdt:P279|wdt:P31)* ?typ.
  ?typ wdt:P31 wd:Q107649491.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}

In [None]:
p_type={"Q22964231":"Wikidata property for human relationships","Q18608871":"Wikidata property for items about people","Q57955292":"Wikidata property for items about people or organisations"}
for each_p_type in p_type.keys():
    print("P_TYPE:"+p_type[each_p_type])
    query = '''
        SPARQL
        SELECT ?typ ?label
        WHERE
        {{
            ?typ wdt:P31 wd:{}.
            ?typ rdfs:label ?label.filter(lang(?label)='en')
        }}
        '''.format(each_p_type)
    results=query_with_odbc(query)
    for each_p in results:
        p_suffix=each_p[0].replace("http://www.wikidata.org/entity/","")
        query='''
        SPARQL
        SELECT COUNT DISTINCT ?e1 ?e2
        WHERE
        {{
            ?e1 a <http://wikiba.se/ontology#Item>.
            ?e1 wdt:{} ?e2.
        }}
        '''.format(p_suffix)
        results=query_with_odbc(query)

        print("P:%s(%s) total count:%d" %(each_p[1],p_suffix,int(results[0][0])))

        query='''
        SPARQL
        SELECT DISTINCT ?e1typ ?e1Label ?e2typ ?e2Label COUNT(?e1typ) COUNT(?e2typ)
        WHERE
        {{
        ?e1 a <http://wikiba.se/ontology#Item>.
        ?e1 wdt:{} ?e2.
        ?e1 wdt:P31 ?e1typ.
        ?e1typ rdfs:label ?e1Label.filter(lang(?e1Label)='en').
        ?e2 wdt:P31 ?e2typ.
        ?e2typ rdfs:label ?e2Label.filter(lang(?e2Label)='en').
        }}
        GROUP BY ?e1typ ?e1Label ?e2typ ?e2Label
        ORDER BY DESC(COUNT(?e1typ)) 
        LIMIT 10
        '''.format(p_suffix)
        results=query_with_odbc(query)
        for each_typ_comb in results:
            print(each_typ_comb)
        print("-"*30)
    print("="*30)

In [5]:
for relation in optional_relation_dict.keys():
    query="""
    SPARQL
    SELECT ?ptyp ?ptyplabel
    WHERE
    {{
        wd:{} wdt:P31 ?ptyp.
        ?ptyp wdt:P31 wd:Q107649491.
        ?ptyp rdfs:label ?ptyplabel.filter(lang(?ptyplabel)='en').
    }}
    """.format(relation)
    results=query_with_odbc(query)
    for each_result in results:
        print(each_result)



In [33]:
p_type={"Q22964231":"Wikidata property for human relationships","Q18608871":"Wikidata property for items about people","Q57955292":"Wikidata property for items about people or organisations"}
file_path="raw/redundant"
for each_p_type in p_type.keys():
    print("P_TYPE:"+p_type[each_p_type])
    query = '''
        SPARQL
        SELECT ?typ ?label
        WHERE
        {{
            ?typ wdt:P31 wd:{}.
            ?typ rdfs:label ?label.filter(lang(?label)='en')
        }}
        '''.format(each_p_type)
    results_p_group=query_with_odbc(query)
    # print(results)
    for each_p in results_p_group:
        relation=each_p[0].replace("http://www.wikidata.org/entity/","")
        if relation not in optional_relation_dict.keys() and relation not in timeobj_relation_dict.keys():
            with open("%s/%sres.tsv" %(file_path,relation),"w") as f: 
                query = '''
                    SPARQL
                    PREFIX wd: <http://www.wikidata.org/entity/>
                    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
                    SELECT DISTINCT  ?e1 ?e2 
                    WHERE
                    {{
                        ?e1 wdt:{} ?e2.
                        ?e1 a <http://wikiba.se/ontology#Item>.
                    }}
                    '''.format(relation)
                results=query_with_odbc(query)
                print("%s query complete!" %relation)
                for each_result in results:
                    f.write("%s\t%s\t%s\tnull\tnull\n" %(each_result[0].replace("http://www.wikidata.org/entity/",""),relation,each_result[1].replace("http://www.wikidata.org/entity/","")))
            f.close()

P_TYPE:Wikidata property for human relationships
P25 query complete!
P1290 query complete!
P22 query complete!
P40 query complete!
P3373 query complete!
P1971 query complete!
P451 query complete!
P_TYPE:Wikidata property for items about people
P140 query complete!
P136 query complete!
P358 query complete!
P569 query complete!
P570 query complete!
P856 query complete!
P4193 query complete!
P1775 query complete!
P135 query complete!
P450 query complete!
P2298 query complete!
P1853 query complete!
P607 query complete!
P3716 query complete!
P109 query complete!
P2831 query complete!
P937 query complete!
P1416 query complete!
P553 query complete!
P1000 query complete!
P2021 query complete!
P470 query complete!
P1412 query complete!
P106 query complete!
P184 query complete!
P5054 query complete!
P641 query complete!
P4292 query complete!
P1308 query complete!
P1393 query complete!
P2097 query complete!
P21 query complete!
P2389 query complete!
P4353 query complete!
P531 query complete!
P598 

In [4]:
query="""
                SPARQL
                SELECT COUNT DISTINCT ?s ?e1 ?e2
                WHERE
                {{
                    ?e1 p:P463 ?s.
                    ?s ps:P463 ?e2.
                    ?s a <http://wikiba.se/ontology#Statement>.
                    ?e1 a <http://wikiba.se/ontology#Item>.

                }}
                """
results=query_with_odbc(query)
print(results)

[(246456, )]
