In [14]:
from rosette.api import API, DocumentParameters, RosetteException
import pandas as pd
import wikipedia
import requests
from utils import *
import numpy as np

In [2]:
def get_id(message):
    API_ENDPOINT = "https://www.wikidata.org/w/api.php"
    query = message
    params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'language': 'en',
        'search': query
    }
    r = requests.get(API_ENDPOINT, params = params)
    try:
        return (r.json()['search'][0]['id'])
    except Exception:
        return -1

In [3]:
def Analyse(message, alt_url='https://api.rosette.com/rest/v1/'):
    """ Run the example """
    # Create an API instance
    api = API(user_key="89350904c7392a44f0f9019563be727a", service_url=alt_url)

    # Set selected API options.
    # For more information on the functionality of these
    # and other available options, see Rosette Features & Functions
    # https://developer.rosette.com/features-and-functions#morphological-analysis-introduction

    # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only
    params = DocumentParameters()
    relationships_text_data = wikipedia.page(message).content[:20000]
    params["content"] = relationships_text_data
    rel = []
    gt = []
    message_id = get_id(message)
    try:
        RESULT = api.relationships(params)
        #print(RESULT)
        for r in RESULT['relationships']:
#             gt += add_ground_truth(r['predicate'], r['arg1'], r['arg2'])
            confidence = -1
            if "confidence" in r:
                confidence = r["confidence"]
            if get_id(r['arg2']) == message_id:
                rel.append({'Relationship':r['predicate']+'^-1', 'Subject':r['arg2'], 'Object':r['arg1'], 'Confidence': confidence})
            rel.append({'Relationship':r['predicate'],'Subject':r['arg1'],'Object':r['arg2'], 'Confidence': confidence})
        return rel, message_id
    except RosetteException as exception:
        print(exception)

In [4]:
sample_message = "Steve Jobs" #"Andrew Ng"
result, message_id = Analyse(sample_message)

In [5]:
df = pd.DataFrame(result, columns=['Subject','Relationship','Object','Confidence'])
# print (df)

In [68]:
df

Unnamed: 0,Subject,Relationship,Object,Confidence
0,Pixar,Organization Founded By,NeXT,-1.0
1,Apple Computer,Organization Founded By,Steve Wozniak,-1.0
2,Steve Jobs,Person Place of Birth,"San Francisco, California",-1.0
3,Apple Computer,Organization Founded By,Steve Wozniak,-1.0
4,Steve Jobs,Organization Founded By^-1,Apple Computer,-1.0
5,Apple Computer,Organization Founded By,Steve Jobs,-1.0
6,Steve Jobs,Educated at,Apple Computer,0.887634
7,Steve Jobs,Person Employee or Member of,NeXT,-1.0
8,Steve Jobs,Person Employee or Member of,Apple Computer,-1.0
9,Abdulfattah Jandali,Person Current and Past Location of Residence,Homs,-1.0


In [6]:
main_df = df[df['Subject'].apply(lambda row: get_id(row)) == message_id]
other_df = df[~df.isin(main_df).all(1)]

In [72]:
####################### NEW ####################
e1Grp = df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x)).reset_index()
rows = []
_ = e1Grp.apply(lambda row: [rows.append([row['Subject'],row['Relationship'], e2, e3]) for e2,e3 in zip(row.Object, row.Confidence)], axis=1)
e1Grp = pd.DataFrame(rows, columns=e1Grp.columns).set_index(['Subject','Relationship'])

# e1Grp

In [79]:
main_df['RScore'] = np.random.randint(0, 100, main_df.shape[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [81]:
main_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Object,Count,Confidence,RScore
Subject,Relationship,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Steve Jobs,Educated at,"[Al, Apple Computer, Reed College, Reed College]",4,"[0.97393692, 0.8876341, 0.94075662, 0.94349325]",17
Steve Jobs,Organization Founded By^-1,[Apple Computer],1,[-1.0],15
Steve Jobs,Person Current and Past Location of Residence,"[India, San Francisco Bay Area, United States]",3,"[-1.0, -1.0, -1.0]",84
Steve Jobs,Person Employee or Member of,"[Apple Computer, Atari, NeXT]",3,"[-1.0, -1.0, -1.0]",57
Steve Jobs,Person Parents,[Clara Jobs],1,[-1.0],63
Steve Jobs,Person Place of Birth,"[San Francisco, California]",1,[-1.0],68
Steve Jobs,Person Siblings,"[Patricia, Paul Reinhold Jobs]",2,"[-1.0, -1.0]",83
Steve Jobs,Person Spouse,[Laurene],1,[-1.0],22


In [75]:
main_df = main_df[[c for c in main_df if c not in ['Confidence']] + ['Confidence']]

In [9]:
print(main_df.to_html())

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th></th>
      <th>Object</th>
      <th>Confidence</th>
      <th>Count</th>
    </tr>
    <tr>
      <th>Subject</th>
      <th>Relationship</th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th rowspan="8" valign="top">Steve Jobs</th>
      <th>Educated at</th>
      <td>[Al, Apple Computer, Reed College, Reed College]</td>
      <td>[0.97393692, 0.8876341, 0.94075662, 0.94349325]</td>
      <td>4</td>
    </tr>
    <tr>
      <th>Organization Founded By^-1</th>
      <td>[Apple Computer]</td>
      <td>[-1.0]</td>
      <td>1</td>
    </tr>
    <tr>
      <th>Person Current and Past Location of Residence</th>
      <td>[India, San Francisco Bay Area, United States]</td>
      <td>[-1.0, -1.0, -1.0]</td>
      <td>3</td>
    </tr>
    <tr>
      <th>Person Employee or Member of</th>
      <td>[Apple Computer, Atari, NeXT]</td>
   

In [7]:
main_df = main_df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x))
main_df['Count'] = main_df['Object'].apply(lambda x: len(x))
# print(main_df)
main_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Object,Confidence,Count
Subject,Relationship,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Steve Jobs,Educated at,"[Al, Apple Computer, Reed College, Reed College]","[0.97393692, 0.8876341, 0.94075662, 0.94349325]",4
Steve Jobs,Organization Founded By^-1,[Apple Computer],[-1.0],1
Steve Jobs,Person Current and Past Location of Residence,"[India, San Francisco Bay Area, United States]","[-1.0, -1.0, -1.0]",3
Steve Jobs,Person Employee or Member of,"[Apple Computer, Atari, NeXT]","[-1.0, -1.0, -1.0]",3
Steve Jobs,Person Parents,[Clara Jobs],[-1.0],1
Steve Jobs,Person Place of Birth,"[San Francisco, California]",[-1.0],1
Steve Jobs,Person Siblings,"[Patricia, Paul Reinhold Jobs]","[-1.0, -1.0]",2
Steve Jobs,Person Spouse,[Laurene],[-1.0],1


In [36]:
other_df = other_df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x))
other_df['Count'] = other_df['Object'].apply(lambda x: len(x))
# print(other_df)
other_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Confidence,Object,Count
Subject,Relationship,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abdulfattah Jandali,Person Current and Past Location of Residence,[-1.0],[Homs],1
Apple Computer,Organization Founded By,"[-1.0, -1.0]","[Steve Jobs, Steve Wozniak]",2
Atari,Organization Founded By,[-1.0],[Nolan Bushnell],1
Chrisann Brennan,Educated at,"[0.96420491, 0.92716759, 0.95626444, 0.9485969...","[Al, Homestead, Homestead, Reed College, Stanf...",5
Chrisann Brennan,Person Current and Past Location of Residence,[-1.0],[India],1
Clara Hagopian,Citizen of,[0.87220949],[Armenian],1
Clara Hagopian,Person Current and Past Location of Residence,"[-1.0, -1.0]","[San Francisco, California, Sunset District]",2
Daniel Kottke,Person Current and Past Location of Residence,[-1.0],[United States],1
Ernest Hemingway,Citizen of,"[0.92048609, 0.89305419]","[English, Yosemite]",2
Ernest Hemingway,Educated at,"[0.95877552, 0.94242692, 0.95963675, 0.9589715]","[Homestead, Homestead, Stanford University, St...",4


MultiIndex(levels=[['Steve Jobs'], ['Educated at', 'Organization Founded By^-1', 'Person Current and Past Location of Residence', 'Person Employee or Member of', 'Person Parents', 'Person Place of Birth', 'Person Siblings', 'Person Spouse']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5, 6, 7]],
           names=['Subject', 'Relationship'])

# Pseudo Ground Truth from Wikidata

In [241]:
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# query = """SELECT ?item ?itemLabel WHERE {
#   ?item wdt:P112 wd:Q19837.
#   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
# }"""


def get_results(query, value, endpoint_url="https://query.wikidata.org/sparql"):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query%value)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def ground_truth(relation, subject):
    results = []
    gt = []
    try:
        results = [get_results(query, get_id(subject)) for query in query_dict[relation]]
        for result in results:
            for r in result["results"]["bindings"]:
                gt.append(r['itemLabel']['value'])
    except:
        print (relation, subject)
#         gt = ['Unknown']
    return gt

In [242]:
query_dict = {'Organization Founded By^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P112 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization Founded By':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P112 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                        ],
              'Organization Headquarters':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P159 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                          ],
              'Organization Subsidiary Of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P355 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                              ],
              'Organization Subsidiary Of':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P355 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization top employees':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P169 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""", # CEO
                                            """SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P488 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""" # Chairperson
                                            ],
              'Person Employee or Member of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P108 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                                ],
              'Person Employee or Member of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P108 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                            ],
              'Person Place of Birth':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P19 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                      ],
              'Person Current and Past Location of Residence':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P551 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                                              ],
              'Person Parents':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P22 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Father
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P25 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Mother
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P1038 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""" #Relative (Adopted Parents?)
                                # Shall we include stepparents??
                               ],
              'Person Parents^-1':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P40 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                  ],
              'Person Siblings':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P3373 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                ],
              'Person Spouse':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P26 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                              ],
              'Citizen of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P27 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                           ],
              'Educated at':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P69 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                            ]
             }

In [190]:
results = []
results = [get_results(endpoint_url, query, get_id("Steve Jobs")) for query in query_dict['Educated at']]

gt = []
for result in results:
    for r in result["results"]["bindings"]:
        gt.append(r['itemLabel']['value'])
print (len(gt))
gt

2


['Reed College', 'Homestead High School']

In [196]:
temp['Relationship']

0                                      Educated at
1                       Organization Founded By^-1
2    Person Current and Past Location of Residence
3                     Person Employee or Member of
4                                   Person Parents
5                            Person Place of Birth
6                                  Person Siblings
7                                    Person Spouse
Name: Relationship, dtype: object

In [247]:
temp = main_df.reset_index()
temp

Unnamed: 0,Subject,Relationship,Object2,Count
0,Steve Jobs,Educated at,"[Al, Apple Computer, Reed College]",3
1,Steve Jobs,Organization Founded By^-1,[Apple Computer],1
2,Steve Jobs,Person Current and Past Location of Residence,"[India, San Francisco Bay Area, United States]",3
3,Steve Jobs,Person Employee or Member of,"[Apple Computer, Atari, NeXT]",3
4,Steve Jobs,Person Parents,[Clara Jobs],1
5,Steve Jobs,Person Place of Birth,"[San Francisco, California]",1
6,Steve Jobs,Person Siblings,"[Patricia, Paul Reinhold Jobs]",2
7,Steve Jobs,Person Spouse,[Laurene],1


In [248]:
temp['Ground Truth'] = temp.apply(lambda row: ground_truth(row['Relationship'], row['Subject']), axis=1)
temp['Count_GT'] = temp['Ground Truth'].apply(lambda x: len(x))

In [249]:
temp.set_index(['Subject','Relationship'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Object2,Count,Ground Truth,Count_GT
Subject,Relationship,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Steve Jobs,Educated at,"[Al, Apple Computer, Reed College]",3,"[Reed College, Homestead High School]",2
Steve Jobs,Organization Founded By^-1,[Apple Computer],1,"[Apple, NeXT Computer, Inc.]",2
Steve Jobs,Person Current and Past Location of Residence,"[India, San Francisco Bay Area, United States]",3,[Mountain View],1
Steve Jobs,Person Employee or Member of,"[Apple Computer, Atari, NeXT]",3,"[Apple, Pixar, NeXT Computer, Inc.]",3
Steve Jobs,Person Parents,[Clara Jobs],1,"[John Abdulfattah Jandali, Joanne Schieble, Pa...",4
Steve Jobs,Person Place of Birth,"[San Francisco, California]",1,[San Francisco],1
Steve Jobs,Person Siblings,"[Patricia, Paul Reinhold Jobs]",2,[Mona Simpson],1
Steve Jobs,Person Spouse,[Laurene],1,[Laurene Powell Jobs],1


In [250]:
main_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Object2,Count
Subject,Relationship,Unnamed: 2_level_1,Unnamed: 3_level_1
Steve Jobs,Educated at,"[Al, Apple Computer, Reed College]",3
Steve Jobs,Organization Founded By^-1,[Apple Computer],1
Steve Jobs,Person Current and Past Location of Residence,"[India, San Francisco Bay Area, United States]",3
Steve Jobs,Person Employee or Member of,"[Apple Computer, Atari, NeXT]",3
Steve Jobs,Person Parents,[Clara Jobs],1
Steve Jobs,Person Place of Birth,"[San Francisco, California]",1
Steve Jobs,Person Siblings,"[Patricia, Paul Reinhold Jobs]",2
Steve Jobs,Person Spouse,[Laurene],1


In [38]:
a = {'33':33}

In [29]:
def f1():
    global a
    a['33'] = 33
    print ("B\n",a)

In [33]:
def f2():
    global a
    with open('data.p', 'rb') as fp:
        a = pickle.load(fp)
    a['1222'] = 1222
    print ("A\n",a)
    f1()
    with open('data.p', 'wb') as fp:
        pickle.dump(a, fp, protocol=pickle.HIGHEST_PROTOCOL)
    print("C\n",a)

In [7]:
def f(batch_size=1,num_workers=1,shuffle=None,):
    print(batch_size)
    print(shuffle)
    print(num_workers)

In [8]:
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}

In [9]:
f(**params)

64
True
6


In [10]:
root = "./dataset/train/"

In [12]:
root[:-1]

'./dataset/train'

In [15]:
os.listdir(root)

FileNotFoundError: [WinError 3] The system cannot find the path specified: './dataset/train/'

In [14]:
import os