In [1]:
from rosette.api import API, DocumentParameters, RosetteException
import pandas as pd
import wikipedia
import requests
import numpy as np

In [None]:
def get_id(message):
    API_ENDPOINT = "https://www.wikidata.org/w/api.php"
    query = message
    params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'language': 'en',
        'search': query
    }
    r = requests.get(API_ENDPOINT, params = params)
    try:
        return (r.json()['search'][0]['id'])
    except Exception:
        return -1

In [None]:
def Analyse(message, alt_url='https://api.rosette.com/rest/v1/'):
    """ Run the example """
    # Create an API instance
    api = API(user_key="89350904c7392a44f0f9019563be727a", service_url=alt_url)

    # Set selected API options.
    # For more information on the functionality of these
    # and other available options, see Rosette Features & Functions
    # https://developer.rosette.com/features-and-functions#morphological-analysis-introduction

    # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only
    params = DocumentParameters()
    relationships_text_data = wikipedia.page(message).content[:20000]
    params["content"] = relationships_text_data
    rel = []
    gt = []
    message_id = get_id(message)
    try:
        RESULT = api.relationships(params)
        #print(RESULT)
        for r in RESULT['relationships']:
#             gt += add_ground_truth(r['predicate'], r['arg1'], r['arg2'])
            confidence = -1
            if "confidence" in r:
                confidence = r["confidence"]
            if get_id(r['arg2']) == message_id:
                rel.append({'Relationship':r['predicate']+'^-1', 'Subject':r['arg2'], 'Object':r['arg1'], 'Confidence': confidence})
            rel.append({'Relationship':r['predicate'],'Subject':r['arg1'],'Object':r['arg2'], 'Confidence': confidence})
        return rel, message_id
    except RosetteException as exception:
        print(exception)

In [None]:
sample_message = "Steve Jobs" #"Andrew Ng"
result, message_id = Analyse(sample_message)

In [None]:
df = pd.DataFrame(result, columns=['Subject','Relationship','Object','Confidence'])
# print (df)

In [None]:
df

In [None]:
main_df = df[df['Subject'].apply(lambda row: get_id(row)) == message_id]
other_df = df[~df.isin(main_df).all(1)]

In [None]:
####################### NEW ####################
e1Grp = df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x)).reset_index()
rows = []
_ = e1Grp.apply(lambda row: [rows.append([row['Subject'],row['Relationship'], e2, e3]) for e2,e3 in zip(row.Object, row.Confidence)], axis=1)
e1Grp = pd.DataFrame(rows, columns=e1Grp.columns).set_index(['Subject','Relationship'])

# e1Grp

In [None]:
main_df['RScore'] = np.random.randint(0, 100, main_df.shape[0])

In [None]:
main_df

In [None]:
main_df = main_df[[c for c in main_df if c not in ['Confidence']] + ['Confidence']]

In [None]:
print(main_df.to_html())

In [None]:
main_df = main_df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x))
main_df['Count'] = main_df['Object'].apply(lambda x: len(x))
# print(main_df)
main_df

In [None]:
other_df = other_df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x))
other_df['Count'] = other_df['Object'].apply(lambda x: len(x))
# print(other_df)
other_df

# Pseudo Ground Truth from Wikidata

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# query = """SELECT ?item ?itemLabel WHERE {
#   ?item wdt:P112 wd:Q19837.
#   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
# }"""


def get_results(query, value, endpoint_url="https://query.wikidata.org/sparql"):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query%value)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def ground_truth(relation, subject):
    results = []
    gt = []
    try:
        results = [get_results(query, get_id(subject)) for query in query_dict[relation]]
        for result in results:
            for r in result["results"]["bindings"]:
                gt.append(r['itemLabel']['value'])
    except:
        print (relation, subject)
#         gt = ['Unknown']
    return gt

In [None]:
query_dict = {'Organization Founded By^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P112 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization Founded By':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P112 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                        ],
              'Organization Headquarters':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P159 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                          ],
              'Organization Subsidiary Of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P355 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                              ],
              'Organization Subsidiary Of':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P355 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization top employees':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P169 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""", # CEO
                                            """SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P488 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""" # Chairperson
                                            ],
              'Person Employee or Member of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P108 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                                ],
              'Person Employee or Member of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P108 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                            ],
              'Person Place of Birth':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P19 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                      ],
              'Person Current and Past Location of Residence':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P551 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                                              ],
              'Person Parents':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P22 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Father
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P25 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Mother
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P1038 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""" #Relative (Adopted Parents?)
                                # Shall we include stepparents??
                               ],
              'Person Parents^-1':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P40 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                  ],
              'Person Siblings':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P3373 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                ],
              'Person Spouse':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P26 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                              ],
              'Citizen of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P27 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                           ],
              'Educated at':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P69 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                            ]
             }

In [None]:
results = []
results = [get_results(endpoint_url, query, get_id("Steve Jobs")) for query in query_dict['Educated at']]

gt = []
for result in results:
    for r in result["results"]["bindings"]:
        gt.append(r['itemLabel']['value'])
print (len(gt))
gt

In [None]:
temp['Relationship']

In [None]:
temp = main_df.reset_index()
temp

In [None]:
temp['Ground Truth'] = temp.apply(lambda row: ground_truth(row['Relationship'], row['Subject']), axis=1)
temp['Count_GT'] = temp['Ground Truth'].apply(lambda x: len(x))

In [None]:
temp.set_index(['Subject','Relationship'])

In [None]:
main_df

In [None]:
a = {'33':33}

In [None]:
def f1():
    global a
    a['33'] = 33
    print ("B\n",a)

In [None]:
def f2():
    global a
    with open('data.p', 'rb') as fp:
        a = pickle.load(fp)
    a['1222'] = 1222
    print ("A\n",a)
    f1()
    with open('data.p', 'wb') as fp:
        pickle.dump(a, fp, protocol=pickle.HIGHEST_PROTOCOL)
    print("C\n",a)

In [None]:
def f(batch_size=1,num_workers=1,shuffle=None,):
    print(batch_size)
    print(shuffle)
    print(num_workers)

In [None]:
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}

In [None]:
f(**params)

In [None]:
root = "./dataset/train/"

In [None]:
root[:-1]

In [None]:
os.listdir(root)

In [None]:
import os