In [9]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import wptools
import wikipedia
import re
import numpy as np

from prefect import task, Flow, Parameter
from prefect.executors import LocalDaskExecutor
import nest_asyncio
nest_asyncio.apply()

#### web scrape company name, policy, link, story

In [2]:
url = 'https://buildremote.co/companies/companies-going-remote-permanently/'
user_agent = 'Mozilla/5.0 (iPad; U; CPU OS 3_2_1 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Mobile/7B405'

raw = requests.get(url, headers={'User-Agent': user_agent})
content_bs4 = BeautifulSoup(raw.content)

In [3]:
table_content = content_bs4.find_all('td', class_ = 'wptb-cell')
company_table = [i.get_text() for i in table_content]
company_table = company_table[4:]

In [5]:
link_content = table_content[7:]

link_list = []
for i, j in enumerate(link_content):
    if i%4 == 0:
        try:
            a_link = j.find(href= True)['href']
        except:
            a_link = ''
        
        link_list.append(a_link)

In [6]:
name = []
policy = []
story = []

for i, j in enumerate(company_table):
    if i%4 ==0:
        name.append(j.strip())
    elif i%4 == 1:
        policy.append(j)
    elif i%4 ==3:
        story.append(j)

In [7]:
output = pd.DataFrame()
output['name'] = name
output['policy'] = policy
output['link'] = link_list
output['story'] = story

#### fetch employee size

In [8]:
#### use wptools to filter not found + wikipedia.search

def get_wiki_disambiguation(clean_name):
    get_data = wptools.page(clean_name).get()
    if get_data is not None:
        if 'what' in get_data.data.keys():
            if get_data.data['what'] == 'Wikimedia disambiguation page':
                links = get_data.data['links']
                for i in links:
                    check_list = ['name', 'logo', 'type', 'traded_as', 'foundation', 'founded', 'released', 'num_employees', 'num_staff']
                    so = wptools.page(i).get_parse()
                    if 'infobox' in so.data.keys():
                        infobox = so.data['infobox']
                        if infobox is not None:
                            intersection = [value for value in check_list if value in infobox.keys()]
                            if len(intersection) >= 3:
                                break
                        else:
                            infobox = {'error': 'Wikipedia page Not Found'}
                    else:
                            infobox = {'error': 'Wikipedia page Not Found'}

            else:
                so = wptools.page(clean_name).get_parse()
                infobox = so.data['infobox']
        else:
            so = wptools.page(clean_name).get_parse()
            infobox = so.data['infobox']
    
    else:
        infobox = infobox = {'error': 'Wikipedia page Not Found'}

    if infobox is not None:        
        infobox['clean_name'] = clean_name
    else:
        infobox = {'error': 'Wikipedia page Not Found', 'clean_name': clean_name}
    
    return infobox


def get_wiki_suggest(clean_name):
    suggest = wikipedia.search(clean_name)
    if len(suggest)>0:
        suggest_name = suggest[0]
    else:
        suggest_name = clean_name
    return suggest_name



def get_wiki_improve(clean_name):
    try:
        infobox = get_wiki_disambiguation(clean_name)
        return infobox
    except:
        try:
            suggest_name = get_wiki_suggest(clean_name)
            infobox = get_wiki_disambiguation(suggest_name)
            return infobox

        except Exception as exc:
            return {'error': exc, 'clean_name': clean_name}

## employee size
def find_size(df, columnname):
    if 'num_employees' in columnname and df['num_employees'] is not np.nan:
        size = df['num_employees']
        employee_size = size
    elif 'num_staff' in columnname and df['num_staff'] is not np.nan:
        size = df['num_staff']
        employee_size = size
    else:
        employee_size = np.nan
    return employee_size

def process_size(size):
    try:
        size = str(size).replace(',', '')

        match = re.findall(r'\d+', size)
        res = list(map(int, match))
        return res[0]
    except:
        return np.nan

In [12]:
#utility function for getting flow results
def show_result(flow, run):
    last_task = list(flow.terminal_tasks())[0]
    print(list(flow.terminal_tasks()))
    print(last_task)
    return run.result[last_task].result

def show_dataframe_task(output):
    return pd.DataFrame(output)

def merge_dataframe(df1, df2):
    return pd.concat([df1, df2],axis = 1)

In [13]:
new_company =  output.rename(columns = {'name':'refined_name'})

with Flow('clean name and get wiki improve1', executor=LocalDaskExecutor()) as wiki_flow_improve1:
    refined_name = Parameter('refined_name')
    result1 = task(get_wiki_improve).map(refined_name)
    task(show_dataframe_task)(result1)

### run on batches to get wiki result

batch_num = int(len(new_company)/600)
for i in range(batch_num):
    globals()['batch_%s' % str(i+1)] = new_company[i*600:(i+1)*600].reset_index(drop=True)
globals()['batch_%s' % str(batch_num+1)] = new_company[(batch_num)*600:].reset_index(drop=True)


for i in range(1, batch_num+2):
    current_batch = globals()['batch_'+str(i)]
    namelist = current_batch[current_batch['refined_name'].notna()]['refined_name'].tolist()
    output = wiki_flow_improve1.run(refined_name = namelist)
    globals()['wiki_res_%s' % str(i)] = show_result(wiki_flow_improve1, output)


res_list = []
for i in range(1, batch_num+2):
    res_list.append(globals()['wiki_res_%s' % str(i)])

wiki_result = pd.concat(res_list).reset_index(drop=True)

[2022-01-20 10:07:32-0500] INFO - prefect.FlowRunner | Beginning Flow run for 'clean name and get wiki improve1'
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'refined_name': Starting task run...
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'refined_name': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve': Starting task run...
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve': Finished task run for task with final state: 'Mapped'
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[1]': Starting task run...
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[0]': Starting task run...
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[10]': Starting task run...
[2022-01-20 10:07:33-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[11]': Starting task run...


en.wikipedia.org (query) Twitter
en.wikipedia.org (query) Nationwide
en.wikipedia.org (query) Slack
en.wikipedia.org (query) Quora
en.wikipedia.org (parse) 223400
en.wikipedia.org (parse) 1322074
en.wikipedia.org (parse) 26749224
en.wikipedia.org (query) Twitter (&plcontinue=9988187|0|Lytro)
www.wikidata.org (wikidata) Q1904950
www.wikidata.org (wikidata) Q1216768
www.wikidata.org (wikidata) Q51711
www.wikidata.org (labels) Q4167410|P31|P1889|Q37555651|P373
www.wikidata.org (labels) Q4167410|P31
en.wikipedia.org (query) Twitter (&plcontinue=9988187|0|Web_appli...
www.wikidata.org (labels) Q9027|P4411|P31|P2003|P646|Q9035|P3553|...
en.wikipedia.org (restbase) /page/summary/Slack
en.wikipedia.org (restbase) /page/summary/Nationwide
Slack (en) data
{
  assessments: <dict(1)> Disambiguation
  claims: <dict(3)> P31, P1889, P373
  description: Topics referred to by the same term
  disambiguation: 25
  exhtml: <p><b>Slack</b> may refer to:</p>
  exrest: Slack may refer to:
  extext: **Slack**

[2022-01-20 10:07:36-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[11]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:36-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[12]': Starting task run...


George Slack (en) data
{
  pageid: 46797209
  parsetree: <str(1928)> <root>'''George W. Slack''' (April 2, 187...
  requests: <list(1)> parse
  title: George Slack
  wikibase: Q19975228
  wikidata_url: https://www.wikidata.org/wiki/Q19975228
  wikitext: <str(1495)> '''George W. Slack''' (April 2, 1874 &ndas...
}
en.wikipedia.org (parse) Resource slack
Quora (en) data
{
  infobox: <dict(20)> name, logo, collapsible, collapsetext, scree...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:Q...
  pageid: 26749224
  parsetree: <str(73307)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: Quora
  wikibase: Q51711
  wikidata_url: https://www.wikidata.org/wiki/Q51711
  wikitext: <str(55939)> {{Short description|Question-and-answer p...
}
en.wikipedia.org (query) Fujitsu
Resource slack (en) data
{
  pageid: 42417759
  parsetree: <str(8135)> <root>'''Resource slack''', in the [[busi...
  requests: <list(1)> parse
  title: Resource slack
  wikibase:

[2022-01-20 10:07:37-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[0]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:37-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[13]': Starting task run...


www.wikidata.org (labels) P452|P4742|P989|P1482|P31|P646|Q62|P227...
Nationwide (album) (en) data
{
  image: <list(1)> {'kind': 'parse-cover', 'file': 'File:Surgery -...
  infobox: <dict(14)> name, type, artist, cover, released, recorde...
  pageid: 38891895
  parsetree: <str(5571)> <root><template><title>Infobox album</tit...
  requests: <list(2)> parse, imageinfo
  title: Nationwide (album)
  wikibase: Q17061918
  wikidata_url: https://www.wikidata.org/wiki/Q17061918
  wikitext: <str(2450)> {{Infobox album| name       = Nationwide| ...
}
en.wikipedia.org (parse) 58222
Shanon Slack (en) data
{
  infobox: <dict(20)> name, other_names, image_size, birth_date, b...
  pageid: 40317678
  parsetree: <str(16103)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Shanon Slack
  wikibase: Q15987725
  wikidata_url: https://www.wikidata.org/wiki/Q15987725
  wikitext: <str(9573)> {{short description|American mixed martial...
}
en.wikipedia.org (query) Novartis
en.wi

[2022-01-20 10:07:38-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[10]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:38-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[14]': Starting task run...


Slack (software) (en) data
{
  infobox: <dict(14)> name, title, logo, author, developer, releas...
  pageid: 41202764
  parsetree: <str(47578)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: Slack (software)
  wikibase: Q17130715
  wikidata_url: https://www.wikidata.org/wiki/Q17130715
  wikitext: <str(39910)> {{Short description|Messaging software ap...
}
www.wikidata.org (labels) P3362|P856|P2002|P910|P268|P154|Q116679...
en.wikipedia.org (query) Siemens
www.wikidata.org (labels) Q1815128|P1448|P8687|Q5507497|P6298|P11...
en.wikipedia.org (parse) 168632
www.wikidata.org (labels) P2226|Q3552585|P2013|Q30338508|P166|P67...
Twitter (en) data
{
  aliases: <list(2)> twttr, twitter.com
  assessments: <dict(10)> California, Companies, Brands, Internet ...
en.wikipedia.org (restbase) /page/summary/Fujitsu  claims: <dict(110)> P277, P31, P373, P856, P910, P137, P227, P24...
www.wikidata.org (wikidata) Q81230

  description: American social networking service
 

[2022-01-20 10:07:39-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[1]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:39-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[15]': Starting task run...


Twitter (en) data
{
  infobox: <dict(30)> name, logo, logo_size, screenshot, caption, ...
  iwlinks: <list(9)> https://commons.wikimedia.org/wiki/Category:T...
  pageid: 9988187
  parsetree: <str(414233)> <root><template><title>Short descriptio...
  requests: <list(1)> parse
  title: Twitter
  wikibase: Q918
  wikidata_url: https://www.wikidata.org/wiki/Q918
  wikitext: <str(342238)> {{Short description|American social netw...
}
en.wikipedia.org (query) Zillow
en.wikipedia.org (imageinfo) File:Shiodome City Center 2012.JPG
www.wikidata.org (labels) Q29123421|P373|Q29123426|Q4830453|P3347...


[2022-01-20 10:07:40-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[12]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:40-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[16]': Starting task run...


Fujitsu (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Shiodome ...
  infobox: <dict(24)> name, native_name, native_name_lang, romaniz...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:F...
  pageid: 58222
  parsetree: <str(62460)> <root><template><title>short description...
  requests: <list(2)> parse, imageinfo
  title: Fujitsu
  wikibase: Q186394
  wikidata_url: https://www.wikidata.org/wiki/Q186394
  wikitext: <str(49613)> {{short description|Japanese multinationa...
}
en.wikipedia.org (parse) 4035097
www.wikidata.org (labels) P2003|P2427|P1365|Q7389502|Q17353641|Q1...
en.wikipedia.org (query) Basecamp
en.wikipedia.org (restbase) /page/summary/Novartis
www.wikidata.org (wikidata) Q8071921
en.wikipedia.org (imageinfo) File:Novartis K25 Basel.jpg|File:Ind...
en.wikipedia.org (parse) 20341
www.wikidata.org (labels) P2003|P31|P2427|P646|Q82059|P1454|P3242...Novartis (en) data

{
  aliases: <list(1)> Novartis International AG
  assessments: <dict(

[2022-01-20 10:07:41-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[13]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:41-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[17]': Starting task run...


www.wikidata.org (labels) P7033|Q820236|P646|P227|P3964|P2924|Q81...
Novartis (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Novartis ...
  infobox: <dict(24)> name, logo, logo_size, image, image_size, im...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:N...
  pageid: 159284
  parsetree: <str(154160)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Novartis
  wikibase: Q507154
  wikidata_url: https://www.wikidata.org/wiki/Q507154
  wikitext: <str(130211)> {{short description|Swiss multinational ...
}
en.wikipedia.org (query) Atlassian
en.wikipedia.org (restbase) /page/summary/Zillow
Zillow (en) data
{
  assessments: <dict(4)> United States, Companies, Websites, Home ...
  claims: <dict(25)> P856, P414, P31, P159, P17, P169, P571, P1454...
  description: American real estate website
  exhtml: <str(441)> <p><b>Zillow Group, Inc.</b>, or simply <b>Zi...
  exrest: <str(420)> Zillow Group, Inc., or simply Zi

[2022-01-20 10:07:41-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[15]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:41-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[18]': Starting task run...


www.wikidata.org (labels) P8785|P156|P5429|P7388|Q22857
Zillow (en) data
{
  infobox: <dict(17)> name, logo, logo_size, type, traded_as, foun...
  pageid: 4035097
  parsetree: <str(42280)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Zillow
  wikibase: Q8071921
  wikidata_url: https://www.wikidata.org/wiki/Q8071921
  wikitext: <str(32917)> {{short description|American real estate ...
}
www.wikidata.org (wikidata) Q757307
en.wikipedia.org (query) REI
Note: Wikidata item Q36908 missing 'instance of' (P31)
en.wikipedia.org (restbase) /page/summary/Mountaineering
en.wikipedia.org (parse) 548273
www.wikidata.org (labels) P31|P2427|P646|Q62|Q82059|P3553|P1454|P...
en.wikipedia.org (imageinfo) File:Summitting Island Peak.jpg|File...
en.wikipedia.org (restbase) /page/summary/Siemens
Mountaineering (en) data
{
  aliases: <list(2)> mountain climbing, alpinism
  assessments: <dict(3)> Mountains, Climbing, Backpacking
  claims: <dict(45)> P508, P373, P279, P910,

[2022-01-20 10:07:42-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[16]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:42-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[19]': Starting task run...


www.wikidata.org (labels) P2003|P31|P646|P2847|P1454|P856|P2002|P...
Siemens (en) data
{
  aliases: <list(5)> Siemens bestaat in Nederland sinds 1879, Siem...
  assessments: <dict(5)> Germany, Energy, Companies, Brands, Trains
  claims: <dict(94)> P214, P373, P31, P154, P112, P856, P910, P646...
  description: German multinational conglomerate company
  exhtml: <str(186)> <p><b>Siemens AG</b> is a German multinationa...
  exrest: <str(172)> Siemens AG is a German multinational conglome...
  extext: <str(897)> **Siemens AG** (German pronunciation: [ˈziːmə...
  extract: <str(1181)> <p class="mw-empty-elt"></p><p><b>Siemens A...
  image: <list(6)> {'kind': 'query-pageimage', 'file': 'File:The W...
  infobox: <dict(24)> name, logo, image, image_caption, type, trad...
  iwlinks: <list(10)> https://commons.wikimedia.org/wiki/Category:...
  label: Siemens
  labels: <dict(193)> Q7510906, Q107156523, Q1440421, P3362, P2403...
  length: 104,820
  links: <list(425)> 2004 Summer Olympic Games, 200

[2022-01-20 10:07:42-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[14]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:42-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[2]': Starting task run...


Siemens (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:The Wings...
  infobox: <dict(24)> name, logo, image, image_caption, type, trad...
  iwlinks: <list(10)> https://commons.wikimedia.org/wiki/Category:...
  pageid: 168632
  parsetree: <str(125510)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Siemens
  wikibase: Q81230
  wikidata_url: https://www.wikidata.org/wiki/Q81230
  wikitext: <str(104044)> {{short description|German multinational...
}
en.wikipedia.org (query) Square
www.wikidata.org (wikidata) Q5926631
www.wikidata.org (labels) P740|P3347|P112|Q6832945|P452|P463|Q679...
www.wikidata.org (labels) P2003|P31|P646|P3553|Q13677|P1454|P856|...en.wikipedia.org (restbase) /page/summary/REI

en.wikipedia.org (parse) 659939
REI (en) data
{
  aliases: <list(3)> R.E.I., REI, Recreational Equipment
  assessments: <dict(5)> United States, Companies, Cooperatives, B...
  claims: <dict(26)> P31, P856, P17, P159, P373, P571, 

[2022-01-20 10:07:43-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[18]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:43-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[20]': Starting task run...
[2022-01-20 10:07:43-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[17]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:43-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[21]': Starting task run...


en.wikipedia.org (query) Schroders
www.wikidata.org (labels) P2959|P7033|P31|Q815741|Q7333618|P646|P...
Atlassian (en) data
{
  infobox: <dict(15)> name, logo, type, traded_as, industry, found...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:A...
  pageid: 26569739
  parsetree: <str(33474)> <root><template><title>Cleanup</title><p...
  requests: <list(1)> parse
  title: Atlassian
  wikibase: Q757307
  wikidata_url: https://www.wikidata.org/wiki/Q757307
  wikitext: <str(26330)> {{Cleanup|date=August 2020|reason= Diffic...
}
en.wikipedia.org (query) Gett
en.wikipedia.org (parse) 1919438
en.wikipedia.org (restbase) /page/summary/HubSpot
HubSpot (en) data
{
  aliases: <list(1)> HubSpot Inc
  assessments: <dict(3)> United States, Companies, Marketing & Adv...
  claims: <dict(26)> P373, P31, P856, P159, P571, P1454, P154, P34...
  description: American marketing software company
  exhtml: <str(197)> <p><b>HubSpot</b> is an American developer an...
  exrest: <str(183)> HubS

[2022-01-20 10:07:44-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[19]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:44-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[22]': Starting task run...


en.wikipedia.org (parse) 36323895
www.wikidata.org (wikidata) Q1142797
HubSpot (en) data
{
  infobox: <dict(16)> name, logo, type, traded_as, founders, key_p...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:H...
  pageid: 22479089
  parsetree: <str(30932)> <root><template><title>short description...
  requests: <list(1)> parse
  title: HubSpot
  wikibase: Q5926631
  wikidata_url: https://www.wikidata.org/wiki/Q5926631
  wikitext: <str(22691)> {{short description|American marketing so...
}
en.wikipedia.org (query) Linklaters
www.wikidata.org (labels) P31|P646|P227|Q5225895|P361|P1454|P3642...
www.wikidata.org (labels) P279|Q209|P2812|P508|Q268132|P1036|P134...
www.wikidata.org (wikidata) Q5553771
en.wikipedia.org (parse) 809606
www.wikidata.org (labels) P577|P31|P856|Q620615|P306|P571|Q109792...
en.wikipedia.org (restbase) /page/summary/Square
en.wikipedia.org (imageinfo) File:SquareDefinition.svg|File:Regul...
www.wikidata.org (wikidata) Q1317942
www.wikidata.org (la

[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[2]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[23]': Starting task run...


Square (en) data
{
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:S...
  pageid: 659939
  parsetree: <str(22403)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: Square
  wikibase: Q164
  wikidata_url: https://www.wikidata.org/wiki/Q164
  wikitext: <str(17226)> {{Short description|Regular quadrilateral...
}
en.wikipedia.org (imageinfo) File:Site of Loriners Hall - 1 Londo...
en.wikipedia.org (query) JPMorgan Chase
en.wikipedia.org (restbase) /page/summary/Gett
en.wikipedia.org (imageinfo) File:HaBarzel street in Tel-Aviv.jpg
Schroders (en) data
{
  aliases: <list(1)> Schroders plc
  assessments: <dict(4)> Companies, London, Organizations, Finance...
  claims: <dict(34)> P31, P646, P227, P1320, P214, P414, P159, P24...
  description: British asset management company
  exhtml: <str(342)> <p><b>Schroders plc</b> is a British multinat...
  exrest: <str(328)> Schroders plc is a British multinational asse...
  extext: <str(588)> **Schroders 

[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[21]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[20]': Finished task run for task with final state: 'Success'


en.wikipedia.org (imageinfo) File:Site of Loriners Hall - 1 Londo...
Linklaters (en) data
{
  aliases: <list(1)> magic circle law firm
  assessments: <dict(3)> Law, Business, London
  claims: <dict(17)> P646, P159, P154, P856, P571, P31, P1454, P26...
  exhtml: <str(251)> <p><b>Linklaters LLP</b> is a multinational l...
  exrest: <str(237)> Linklaters LLP is a multinational law firm he...
  extext: <str(832)> **Linklaters LLP** is a multinational law fir...
  extract: <str(890)> <p class="mw-empty-elt"></p><p><b>Linklaters...
  infobox: <dict(12)> name, logo, headquarters, num_offices, num_l...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:L...
  label: Linklaters
  labels: <dict(22)> P31, P646, P361, P1454, P856, P2002, P154, P5...
  length: 14,620
  links: <list(222)> 2 Hare Court, 39 Essex Chambers, 3PB, 4 King'...
  modified: <dict(2)> page, wikidata
  pageid: 809606
  parsetree: <str(18501)> <root><template><title>Use dmy dates</ti...
  random: Africa Peacekeepi

[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[24]': Starting task run...
[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[25]': Starting task run...
[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[22]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:45-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[26]': Starting task run...


en.wikipedia.org (query) U.S. Xpress
en.wikipedia.org (query) Okta
Linklaters (en) data
{
  infobox: <dict(12)> name, logo, headquarters, num_offices, num_l...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:L...
  pageid: 809606
  parsetree: <str(18501)> <root><template><title>Use dmy dates</ti...
  requests: <list(1)> parse
  title: Linklaters
  wikibase: Q1317942
  wikidata_url: https://www.wikidata.org/wiki/Q1317942
  wikitext: <str(14531)> {{Use dmy dates|date=January 2017}}{{Use ...
}
en.wikipedia.org (query) Pinterest
en.wikipedia.org (parse) 7805702www.wikidata.org (wikidata) Q192314

en.wikipedia.org (parse) 32826316
www.wikidata.org (wikidata) Q674173
www.wikidata.org (labels) Q6813152|P3362|P2403|P856|P2002|P4103|P...
www.wikidata.org (labels) P31|P646|P111|Q830457|P6366|P3417|Q47574|P18en.wikipedia.org (query) HireRight

www.wikidata.org (wikidata) Q255381
en.wikipedia.org (parse) 12765023
www.wikidata.org (labels) Q9035|Q1412|Q922399|Q1075|P856|P2002|Q3...

[2022-01-20 10:07:47-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[25]': Finished task run for task with final state: 'Success'


www.wikidata.org (labels) P31|P646|P571|Q4830453|P856|Q49219|P159
Okta (en) data
{
  pageid: 7805702
  parsetree: <str(4443)> <root><template><title>short description<...
  requests: <list(1)> parse
  title: Okta
  wikibase: Q674173
  wikidata_url: https://www.wikidata.org/wiki/Q674173
  wikitext: <str(3881)> {{short description| A unit of measurement...
}
www.wikidata.org (labels) Q16185302|P2226|P2013|Q4970379|P8687|Q1...


[2022-01-20 10:07:47-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[27]': Starting task run...


en.wikipedia.org (query) RPG Enterprises
en.wikipedia.org (restbase) /page/summary/HireRight
HireRight (en) data
{
  claims: <dict(5)> P31, P856, P571, P159, P646
  exhtml: <str(136)> <p><b>HireRight</b> is a global family of bac...
  exrest: <str(122)> HireRight is a global family of background sc...
  extext: <str(125)> **HireRight** is a global family of backgroun...
  extract: <str(136)> <p><b>HireRight</b> is a global family of ba...
  infobox: <dict(10)> name, logo, type, traded_as, foundation, loc...
  label: HireRight
  labels: <dict(7)> P31, P646, P571, Q4830453, P856, Q49219, P159
  length: 12,345
  links: <list(31)> Alexandra Kelly, Altegrity Risk International,...
  modified: <dict(2)> page, wikidata
  pageid: 12765023
  parsetree: <str(15455)> <root><template><title>Infobox company</...
  random: Suchowola, Busko County
  redirects: <list(1)> {'pageid': 50707403, 'ns': 0, 'title': 'Pow...
  requests: <list(5)> query, parse, wikidata, labels, restbase
  title: HireRight
  u

[2022-01-20 10:07:47-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[24]': Finished task run for task with final state: 'Success'


en.wikipedia.org (parse) 17505265
www.wikidata.org (labels) Q30640353|P3861|Q9056|Q47265|Q4022396|Q...
HireRight (en) data
{
  infobox: <dict(10)> name, logo, type, traded_as, foundation, loc...
  pageid: 12765023
  parsetree: <str(15455)> <root><template><title>Infobox company</...
  requests: <list(1)> parse
  title: HireRight
  wikibase: Q7236674
  wikidata_url: https://www.wikidata.org/wiki/Q7236674
  wikitext: <str(12269)> {{Infobox company| name = HireRight| logo...
}


[2022-01-20 10:07:47-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[28]': Starting task run...


en.wikipedia.org (query) Verizon
www.wikidata.org (wikidata) Q7277463
www.wikidata.org (labels) Q778575|P31|P646|P856|P2002|P910|P154|Q...
www.wikidata.org (labels) Q12184641|Q180816|Q76313605|Q1814021
en.wikipedia.org (query) Verizon Communications (&plcontinue=1861...
en.wikipedia.org (restbase) /page/summary/JPMorgan_Chase
en.wikipedia.org (query) Verizon Communications (&plcontinue=1861...
en.wikipedia.org (imageinfo) File:383 Madison Ave Bear Stearns C ...
en.wikipedia.org (restbase) /page/summary/RPG_Group
JPMorgan Chase (en) data
{
  aliases: <list(5)> Bank of the Manhattan Company, JPMorgan Chase...
  assessments: <dict(4)> New York City, Companies, Brands, Finance...
  claims: <dict(61)> P1278, P373, P31, P155, P17, P159, P910, P856...
  description: American investment bank
  exhtml: <str(438)> <p><b>JPMorgan Chase &amp; Co.</b> is an Amer...
  exrest: <str(420)> JPMorgan Chase & Co. is an American multinati...
  extext: <str(1862)> **JPMorgan Chase & Co.** is an American mul

[2022-01-20 10:07:48-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[27]': Finished task run for task with final state: 'Success'


www.wikidata.org (labels) P112|P1687|Q202833|Q25167
RPG Group (en) data
{
  infobox: <dict(19)> name, logo, logo_size, type, foundation, fou...
  pageid: 17505265
  parsetree: <str(25925)> <root><template><title>Distinguish</titl...
  requests: <list(1)> parse
  title: RPG Group
  wikibase: Q7277463
  wikidata_url: https://www.wikidata.org/wiki/Q7277463
  wikitext: <str(18346)> {{Distinguish|RPSG Group}}{{short descrip...
}


[2022-01-20 10:07:48-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[29]': Starting task run...


en.wikipedia.org (query) Brex
en.wikipedia.org (restbase) /page/summary/Pinterest
en.wikipedia.org (imageinfo) File:383 Madison Ave Bear Stearns C ...
www.wikidata.org (wikidata) Q467752
Pinterest (en) data
{
  aliases: <list(1)> pinterest.com
  assessments: <dict(4)> California, Internet culture, Websites, Apps
  claims: <dict(53)> P407, P31, P373, P856, P646, P159, P154, P17,...
  description: American photo sharing and saving website
  exhtml: <str(436)> <p><b>Pinterest</b> is an image sharing and s...
  exrest: <str(415)> Pinterest is an image sharing and social medi...
  extext: <str(442)> **Pinterest** is an image sharing and social ...
  extract: <str(459)> <p><b>Pinterest</b> is an image sharing and ...
  image: <list(2)> {'kind': 'restbase-original', 'width': 421, 'he...
  infobox: <dict(20)> name, type, company_type, traded_as, logo, s...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:P...
  label: Pinterest
  labels: <dict(104)> Q9035, Q1412, Q922399, Q1075

[2022-01-20 10:07:48-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[23]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:48-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[3]': Starting task run...


JPMorgan Chase (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:383 Madis...
  infobox: <dict(33)> name, logo, logo_size, image, type, traded_a...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:J...
  pageid: 231001
  parsetree: <str(180956)> <root><template><title>Short descriptio...
  requests: <list(2)> parse, imageinfo
  title: JPMorgan Chase
  wikibase: Q192314
  wikidata_url: https://www.wikidata.org/wiki/Q192314
  wikitext: <str(153017)> {{Short description|American investment ...
}
www.wikidata.org (labels) P8501|Q25245117|P3362|P2403|P856|P2002|...
en.wikipedia.org (query) Coinbase
Pinterest (en) data
{
  infobox: <dict(20)> name, type, company_type, traded_as, logo, s...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:P...
  pageid: 32826316
  parsetree: <str(71058)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Pinterest
  wikibase: Q255381
  wikidata_url: https://www.wikidata.org/wiki/

[2022-01-20 10:07:49-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[26]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:49-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[30]': Starting task run...


en.wikipedia.org (query) Deutsche Bank
www.wikidata.org (wikidata) Q60741065
en.wikipedia.org (parse) 39596725
www.wikidata.org (labels) P2003|P31|Q62|P2002|P856|P154|P571|P239...
en.wikipedia.org (query) Deutsche Bank (&plcontinue=523937|0|Raym...
www.wikidata.org (wikidata) Q16972754
en.wikipedia.org (parse) 523937
www.wikidata.org (labels) P3153|P3479|P17|Q109626132|P4776|P6782|...
www.wikidata.org (labels) P2003|P31|P646|Q62|Q82059|Q3511068|P145...
www.wikidata.org (wikidata) Q66048
en.wikipedia.org (restbase) /page/summary/Brex
Brex (en) data
{
  assessments: <dict(2)> Companies, Articles for creation
  claims: <dict(14)> P31, P571, P159, P1951, P2002, P2003, P2397, ...
  exhtml: <str(458)> <p><b>Brex Inc</b> is an American financial s...
  exrest: <str(444)> Brex Inc is an American financial service and...
  extext: <str(443)> **Brex Inc** is an American financial service...
  extract: <str(458)> <p><b>Brex Inc</b> is an American financial ...
  label: Brex
  labels: <dict(24)> P

[2022-01-20 10:07:50-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[29]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:50-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[31]': Starting task run...


en.wikipedia.org (query) Indeed
www.wikidata.org (labels) P5531|P6366|P2581|P3797|Q7400269|P2137|...
www.wikidata.org (labels) P159|P2139|Q7865722|Q131723|P2013|P2295...
en.wikipedia.org (parse) 3743305
www.wikidata.org (wikidata) Q1045367
www.wikidata.org (labels) Q1913493|P1365|Q568041|Q1794|Q155718|P6...
en.wikipedia.org (restbase) /page/summary/Verizon_Communications
en.wikipedia.org (imageinfo) File:Verizon Building (8156005279).jpg
en.wikipedia.org (restbase) /page/summary/Coinbase
www.wikidata.org (labels) P31|P646|P856|P2002|Q16559|Q580148|P154...
Coinbase (en) data
{
  aliases: <list(3)> Coinbase, Inc., Coinbase Global, Coinbase Glo...
  assessments: <dict(4)> California, Companies, Websites, Cryptocu...
  claims: <dict(46)> P31, P1951, P159, P856, P571, P452, P3417, P1...
  description: American digital asset exchange company
  exhtml: <str(380)> <p><b>Coinbase Global, Inc.</b>, branded <b>C...
  exrest: <str(359)> Coinbase Global, Inc., branded Coinbase, is a...
  extext: <s

[2022-01-20 10:07:51-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[3]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:51-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[32]': Starting task run...


Coinbase (en) data
{
  infobox: <dict(17)> name, type, traded_as, logo, foundation, loc...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:B...
  pageid: 39596725
  parsetree: <str(70382)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Coinbase
  wikibase: Q16972754
  wikidata_url: https://www.wikidata.org/wiki/Q16972754
  wikitext: <str(60259)> {{short description|American digital asse...
}
en.wikipedia.org (query) Coca Cola India
en.wikipedia.org (imageinfo) File:Verizon Building (8156005279).jpg


[2022-01-20 10:07:51-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[28]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:51-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[33]': Starting task run...


en.wikipedia.org (restbase) /page/summary/Indeed
Verizon Communications (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Verizon B...
  infobox: <dict(33)> name, logo, logo_caption, image, image_size,...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:T...
  pageid: 18619278
  parsetree: <str(160261)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Verizon Communications
  wikibase: Q467752
  wikidata_url: https://www.wikidata.org/wiki/Q467752
  wikitext: <str(135462)> {{short description|American telecommuni...
}
en.wikipedia.org (query) Microsoft
Indeed (en) data
{
  aliases: <list(2)> Indeed.com, Indeed Jobs
  assessments: <dict(2)> United States, Websites
  claims: <dict(15)> P646, P31, P571, P856, P3417, P127, P2002, P1...
  description: <str(68)> American worldwide employment-related sea...
  exhtml: <str(519)> <p><b>Indeed</b> is an American worldwide emp...
  exrest: <str(505)> Indeed is an American

[2022-01-20 10:07:51-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[31]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:51-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[34]': Starting task run...


www.wikidata.org (labels) Q1289365|Q29584334|P1830|P414|P7534|P31...
Indeed (en) data
{en.wikipedia.org (query) Coca-Cola Indiaen.wikipedia.org (query) Microsoft (&plcontinue=19001|0|Microsoft_Word)

  infobox: <dict(13)> name, commercial, registration, type, logo, ...

  pageid: 3743305
  parsetree: <str(10192)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Indeed
  wikibase: Q1045367
  wikidata_url: https://www.wikidata.org/wiki/Q1045367
  wikitext: <str(7382)> {{short description|American worldwide emp...
}
en.wikipedia.org (query) Dropbox
en.wikipedia.org (parse) 52527698
en.wikipedia.org (parse) 19477138
en.wikipedia.org (parse) 19001
www.wikidata.org (wikidata) Q28173816
www.wikidata.org (labels) P1889|Q162222|P373|Q107159|P127|Q107150...
www.wikidata.org (wikidata) Q142539
www.wikidata.org (labels) P31|Q668|Q23556|P1454|P17|P571|Q1187656...
www.wikidata.org (wikidata) Q2283
en.wikipedia.org (restbase) /page/summary/Deutsche_Bank
www.wikidata.or

[2022-01-20 10:07:53-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[32]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:53-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[35]': Starting task run...


Coca-Cola India (en) data
{
  infobox: <dict(11)> name, logo, type, area_served, industry, pro...
  pageid: 52527698
  parsetree: <str(12263)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Coca-Cola India
  wikibase: Q28173816
  wikidata_url: https://www.wikidata.org/wiki/Q28173816
  wikitext: <str(8742)> {{short description|Coca-Cola India}}{{Inf...
}
en.wikipedia.org (query) Deloitte
en.wikipedia.org (imageinfo) File:Deutsche Bank Taunusanlage.jpg
www.wikidata.org (labels) Q2024390|Q15589464|Q483318|P1661|Q388|Q...


[2022-01-20 10:07:53-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[30]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:53-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[36]': Starting task run...


Deutsche Bank (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Deutsche ...
  infobox: <dict(21)> name, logo, logo_size, image, image_caption,...
  iwlinks: <list(5)> https://commons.wikimedia.org/wiki/Category:D...
  pageid: 523937
  parsetree: <str(147033)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Deutsche Bank
  wikibase: Q66048
  wikidata_url: https://www.wikidata.org/wiki/Q66048
  wikitext: <str(122298)> {{short description|German banking and f...
}
en.wikipedia.org (parse) 586015
en.wikipedia.org (query) Reddit
www.wikidata.org (labels) Q308913|Q863756|Q55173|Q211098|P7014|Q2...
en.wikipedia.org (restbase) /page/summary/Dropbox
www.wikidata.org (wikidata) Q491748
en.wikipedia.org (imageinfo) File:Dropbox (7463062672).jpg
en.wikipedia.org (parse) 3829005
Dropbox (en) data
{
  assessments: <dict(4)> California, Companies, Computing, Internet
  claims: <dict(42)> P178, P31, P306, P373, P856, P348, P646, P277...
  

[2022-01-20 10:07:54-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[34]': Finished task run for task with final state: 'Success'


www.wikidata.org (labels) P2652|Q20947086|P7827|P2403|P2002|Q1819...
www.wikidata.org (wikidata) Q1136
Dropbox (en) data
{
  infobox: <dict(11)> name, logo, developer, released, ver layout,...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:D...
  pageid: 19477138
  parsetree: <str(91764)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Dropbox
  wikibase: Q142539
  wikidata_url: https://www.wikidata.org/wiki/Q142539
  wikitext: <str(74091)> {{short description|Cloud storage and fil...
}


[2022-01-20 10:07:54-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[37]': Starting task run...


en.wikipedia.org (query) LogMeIn
www.wikidata.org (labels) P2652|P6760|Q3972943|P856|P2002|Q322039...
en.wikipedia.org (parse) 16993467
www.wikidata.org (labels) Q18046528|P4264|P159|P2139|Q2581028|Q14...
www.wikidata.org (wikidata) Q823303
www.wikidata.org (labels) Q3903488|P3222|P3365|Q1156115|Q765389|Q...
www.wikidata.org (labels) P2003|P31|P2427|P646|Q82059|P1365|P1454...
en.wikipedia.org (restbase) /page/summary/Deloitte
Deloitte (en) data
{
  aliases: <list(3)> Deloitte Touche Tohmatsu, DTT International, ...
  assessments: <dict(5)> Business, New York City, Companies, Londo...
  claims: <dict(45)> P154, P373, P31, P112, P856, P910, P159, P646...
  description: Multinational professional services network
  exhtml: <str(401)> <p><b>Deloitte Touche Tohmatsu Limited </b>, ...
  exrest: <str(380)> Deloitte Touche Tohmatsu Limited , commonly r...
  extext: <str(2148)> **Deloitte Touche Tohmatsu Limited** (), com...
  extract: <str(2261)> <p class="mw-empty-elt"></p><p><b>Deloitte ...


[2022-01-20 10:07:55-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[35]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:55-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[38]': Starting task run...


Deloitte (en) data
{
  infobox: <dict(14)> name, logo, trade_name, type, founder, area_...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:D...
  pageid: 586015
  parsetree: <str(82245)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: Deloitte
  wikibase: Q491748
  wikidata_url: https://www.wikidata.org/wiki/Q491748
  wikitext: <str(67583)> {{Short description|Multinational profess...
}
en.wikipedia.org (query) Capital One
en.wikipedia.org (restbase) /page/summary/LogMeIn
en.wikipedia.org (parse) 1168152
LogMeIn (en) data
{
  aliases: <list(1)> Log Me In
  assessments: <dict(1)> Software
  claims: <dict(22)> P31, P414, P646, P159, P571, P856, P154, P740...
  description: Software company
  exhtml: <str(298)> <p><b>LogMeIn, Inc.</b> is a provider of soft...
  exrest: <str(284)> LogMeIn, Inc. is a provider of software as a ...
  extext: <str(822)> **LogMeIn, Inc.** is a provider of software a...
  extract: <str(870)> <p><b>LogMeIn, Inc.</

[2022-01-20 10:07:56-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[37]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:56-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[39]': Starting task run...


www.wikidata.org (wikidata) Q1034654
www.wikidata.org (labels) Q2894122|Q94781916|Q5287259|P349|P6366|...
www.wikidata.org (labels) P9618|P127|Q16848217|Q63840441|P6009|P6...
LogMeIn (en) data
{
  infobox: <dict(12)> name, logo, type, industry, founded, hq_loca...
  pageid: 16993467
  parsetree: <str(20047)> <root><template><title>short description...
  requests: <list(1)> parse
  title: LogMeIn
  wikibase: Q823303
  wikidata_url: https://www.wikidata.org/wiki/Q823303
  wikitext: <str(15204)> {{short description|Software company}}{{I...
}
en.wikipedia.org (query) State Farm
www.wikidata.org (labels) P2003|P31|Q7055374|P646|P2427|Q13677|Q7...
en.wikipedia.org (restbase) /page/summary/Reddit
en.wikipedia.org (query) State Farm (&plcontinue=810859|0|Sompo_J...
en.wikipedia.org (imageinfo) File:ISS-46 Scott Kelly's first ever...
en.wikipedia.org (parse) 810859
www.wikidata.org (labels) P1320|P4264|Q364|Q3966|Q4922077|P166|P2...
Reddit (en) data
{
  aliases: <list(3)> Reddit Inc., redd.it, 

[2022-01-20 10:07:57-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[36]': Finished task run for task with final state: 'Success'


Reddit (en) dataen.wikipedia.org (restbase) /page/summary/Microsoft

{
  infobox: <dict(25)> name, logo, logo_alt, collapsible, collapset...
  iwlinks: <list(4)> https://commons.wikimedia.org/wiki/Category:R...
  pageid: 3829005
  parsetree: <str(224175)> <root><template><title>Short descriptio...
  requests: <list(1)> parse
  title: Reddit
  wikibase: Q1136
  wikidata_url: https://www.wikidata.org/wiki/Q1136
  wikitext: <str(184571)> {{Short description|Social news aggregat...
}
en.wikipedia.org (restbase) /page/summary/Capital_One


[2022-01-20 10:07:57-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[4]': Starting task run...


en.wikipedia.org (query) Facebook (Meta)
en.wikipedia.org (imageinfo) File:building92microsoft.jpg|File:Bu...
en.wikipedia.org (imageinfo) File:CapitalOneHQ body.jpg|File:Capi...
Microsoft (en) data
{
  aliases: <list(5)> MS, MSFT, Microsoft Corp., Micro-Soft, Micros...
  assessments: <dict(10)> United States, Wikipedia 1.0, Software, ...
  claims: <dict(152)> P1448, P1297, P1278, P1151, P946, P112, P17,...
  description: American multinational technology corporation
  exhtml: <str(820)> <p><b>Microsoft Corporation</b> is an America...
  exrest: <str(806)> Microsoft Corporation is an American multinat...
  extext: <str(3083)> **Microsoft Corporation** is an American mul...
  extract: <str(3201)> <p class="mw-empty-elt"></p><p><b>Microsoft...
  image: <list(8)> {'kind': 'query-pageimage', 'file': 'File:Build...
  infobox: <dict(30)> name, logo, logo_size, logo_alt, logo_captio...
  iwlinks: <list(4)> https://commons.wikimedia.org/wiki/Category:M...
  label: Microsoft
  labels: <dict(345

[2022-01-20 10:07:58-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[38]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:58-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[40]': Starting task run...
[2022-01-20 10:07:58-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[33]': Finished task run for task with final state: 'Success'


Capital One (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:CapitalOn...
  infobox: <dict(22)> name, logo, image, image_caption, areas_serv...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:C...
  pageid: 1168152
  parsetree: <str(66205)> <root><template><title>short description...
  requests: <list(2)> parse, imageinfo
  title: Capital One
  wikibase: Q1034654
  wikidata_url: https://www.wikidata.org/wiki/Q1034654
  wikitext: <str(53760)> {{short description|Bank holding company ...
}
Microsoft (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:building9...
  infobox: <dict(30)> name, logo, logo_size, logo_alt, logo_captio...
  iwlinks: <list(4)> https://commons.wikimedia.org/wiki/Category:M...
en.wikipedia.org (restbase) /page/summary/State_Farm
  pageid: 19001
  parsetree: <str(196131)> <root><template><title>Short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Microsoft
  wikibase: Q2283
  wikidata_url: https:/

[2022-01-20 10:07:59-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[41]': Starting task run...


State Farm (en) data
{
  aliases: <list(2)> State Farm Mutual Automobile Insurance Compan...
  assessments: <dict(3)> Illinois, Companies, Cooperatives
  claims: <dict(33)> P31, P112, P646, P159, P910, P373, P571, P17,...
  description: American insurance company
  exhtml: <str(157)> <p><b>State Farm Insurance</b> is a large gro...
  exrest: <str(143)> State Farm Insurance is a large group of insu...
  extext: <str(146)> **State Farm Insurance** is a large group of ...
  extract: <str(185)> <p class="mw-empty-elt"></p><p><b>State Farm...
  infobox: <dict(21)> name, logo, type, foundation, founder, locat...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:S...
  label: State Farm Insurance
  labels: <dict(59)> P3267, P31, P2427, P646, Q30030987, Q81822823...
  length: 33,793
  links: <list(627)> ACCO Brands, AGCO, AIA Group, AM General, ARB...
  modified: <dict(2)> page, wikidata
  pageid: 810859
  parsetree: <str(41664)> <root><template><title>short description...
  ran

[2022-01-20 10:07:59-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[39]': Finished task run for task with final state: 'Success'
[2022-01-20 10:07:59-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[42]': Starting task run...


www.wikidata.org (wikidata) Q380
en.wikipedia.org (parse) 41067
State Farm (en) data
{
  infobox: <dict(21)> name, logo, type, foundation, founder, locat...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:S...
  pageid: 810859
  parsetree: <str(41664)> <root><template><title>short description...
  requests: <list(1)> parse
  title: State Farm
  wikibase: Q2007336
  wikidata_url: https://www.wikidata.org/wiki/Q2007336
  wikitext: <str(33417)> {{short description|American insurance co...
}
en.wikipedia.org (query) Square Enix (&plcontinue=210868|0|Strate...
en.wikipedia.org (query) Lincoln Financial Group
www.wikidata.org (wikidata) Q258098
www.wikidata.org (labels) Q56276799|Q20947086|Q370321|P6378|P3362...
en.wikipedia.org (parse) 1591724
en.wikipedia.org (parse) 210868
www.wikidata.org (labels) P460|Q4167410|P31|P1889|Q64995210|P373|...
www.wikidata.org (wikidata) Q1825763
en.wikipedia.org (restbase) /page/summary/Drift
www.wikidata.org (wikidata) Q207784
Drift (en) d

[2022-01-20 10:08:01-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[42]': Finished task run for task with final state: 'Success'


Drift (2013 Australian film) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Drift (20...
  infobox: <dict(17)> name, image, caption, director, producer, sc...
  pageid: 39446209
  parsetree: <str(19527)> <root><template><title>advert</title><pa...
  requests: <list(2)> parse, imageinfo
  title: Drift (2013 Australian film)
  wikibase: Q3415674
  wikidata_url: https://www.wikidata.org/wiki/Q3415674
  wikitext: <str(15347)> {{advert|date=May 2014}}{{Use dmy dates|d...
}
en.wikipedia.org (parse) Drift (2013 Belgian film)
Lincoln National Corporation (en) data
{
  infobox: <dict(14)> name, logo, type, traded_as, industry, found...
  pageid: 1591724
  parsetree: <str(17640)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Lincoln National Corporation
  wikibase: Q1825763
  wikidata_url: https://www.wikidata.org/wiki/Q1825763
  wikitext: <str(13740)> {{short description|American insurance an...
}


[2022-01-20 10:08:01-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[43]': Starting task run...


en.wikipedia.org (query) Cimpress
en.wikipedia.org (parse) 45661682
en.wikipedia.org (restbase) /page/summary/Square_Enix
en.wikipedia.org (imageinfo) File:Square Enix HQ (Shinjuku Eastsi...
www.wikidata.org (wikidata) Q22121865
www.wikidata.org (labels) Q61058375|P2137|P949|P373|Q37033|Q10539...
www.wikidata.org (labels) P31|P2427|P646|Q82059|P1454|Q204956|P85...
Square Enix (en) data
{
  aliases: <list(5)> Square Enix Holdings Co., Ltd., SQUARE ENIX C...
  assessments: <dict(4)> Japan, Companies, Video games, Square Enix
  claims: <dict(59)> P159, P373, P31, P910, P966, P646, P452, P856...
  description: Japanese video game company
  exhtml: <str(428)> <p><b>Square Enix Holdings Co., Ltd.</b> is a...
  exrest: <str(393)> Square Enix Holdings Co., Ltd. is a Japanese ...
  extext: <str(1960)> **Square Enix Holdings Co., Ltd.** is a Japa...
  extract: <str(2097)> <p class="mw-empty-elt"></p><p><b>Square En...
  image: <list(8)> {'kind': 'query-pageimage', 'file': 'File:Squar...
  infobo

[2022-01-20 10:08:02-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[40]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:02-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[44]': Starting task run...


Square Enix (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Square En...
  infobox: <dict(29)> name, native_name, native_name_lang, romaniz...
  pageid: 210868
  parsetree: <str(169753)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Square Enix
  wikibase: Q207784
  wikidata_url: https://www.wikidata.org/wiki/Q207784
  wikitext: <str(138854)> {{short description|Japanese video game ...
}
Meta Platforms (en) data
{
  aliases: <list(6)> Facebook, Inc., Facebook, TheFacebook, Inc., ...
  assessments: <dict(6)> California, Business, Sociology, Companie...
  claims: <dict(98)> P112, P214, P414, P31, P227, P244, P646, P159...
  description: American multinational technology corporation
  exhtml: <str(584)> <p><b>Meta Platforms, Inc.</b>, trading as <b...
  exrest: <str(556)> Meta Platforms, Inc., trading as Meta and for...
  extext: <str(966)> **Meta Platforms, Inc.** , trading as **Meta*...
  extract: <str(1033)> <p class="mw

[2022-01-20 10:08:02-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[43]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:02-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[45]': Starting task run...
[2022-01-20 10:08:02-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[4]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:02-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[46]': Starting task run...


Meta Platforms (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Facebook ...
  infobox: <dict(33)> name, former_name, trade_name, logo, image, ...
  pageid: 62420226
  parsetree: <str(86574)> <root><template><title>Short description...
  requests: <list(2)> parse, imageinfo
  title: Meta Platforms
  wikibase: Q380
  wikidata_url: https://www.wikidata.org/wiki/Q380
  wikitext: <str(68387)> {{Short description|American multinationa...
}
en.wikipedia.org (query) SoFi
en.wikipedia.org (query) Aquent
www.wikidata.org (wikidata) Q941127
en.wikipedia.org (parse) 41086429
en.wikipedia.org (parse) 50550953
www.wikidata.org (labels) Q2025721|P856|P2002|P3875|P910|P154|Q54...
en.wikipedia.org (imageinfo) File:Drift 2015 poster.jpg
Drift (2015 film) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Drift 201...
  infobox: <dict(14)> name, image, director, producer, writer, sta...
  iwlinks: <list(1)> https://de.wikipedia.org/wiki/Driften_(Film)
  pageid: 4904

[2022-01-20 10:08:04-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[46]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:04-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[47]': Starting task run...


en.wikipedia.org (imageinfo) File:Emily Osment 2010.jpg
Aquent (en) data
{
  infobox: <dict(9)> name, type, key_people, industry, services, o...
  pageid: 50550953
  parsetree: <str(10626)> <root><template><title>Infobox company</...
  requests: <list(1)> parse
  title: Aquent
  wikibase: Q4782861
  wikidata_url: https://www.wikidata.org/wiki/Q4782861
  wikitext: <str(7601)> {{Infobox company| name = Aquent| logo = |...
}
en.wikipedia.org (restbase) /page/summary/SoFi
en.wikipedia.org (query) Capita
SoFi (en) data
{
  aliases: <list(3)> Social Finance (aka SoFi), SoFi Technologies,...
  assessments: <dict(1)> Companies
  claims: <dict(25)> P856, P159, P31, P17, P452, P1454, P571, P154...
  description: American financial services company
  exhtml: <str(293)> <p><b>SoFi Technologies, Inc.</b> is an Ameri...
  exrest: <str(279)> SoFi Technologies, Inc. is an American online...
  extext: <str(364)> **SoFi Technologies, Inc.** (now the parent c...
  extract: <str(383)> <p><b>SoFi Technolog

[2022-01-20 10:08:04-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[45]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:04-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[48]': Starting task run...


en.wikipedia.org (parse) 2018145
en.wikipedia.org (restbase) /page/summary/Salesforce
en.wikipedia.org (imageinfo) File:Erra Drift Album Cover Art.jpg
SoFi (en) data
{
  infobox: <dict(15)> name, logo, type, traded_as, key_people, ind...
  pageid: 41086429
  parsetree: <str(34797)> <root><template><title>short description...
  requests: <list(1)> parse
  title: SoFi
  wikibase: Q17020720
  wikidata_url: https://www.wikidata.org/wiki/Q17020720
  wikitext: <str(27622)> {{short description|American financial se...
}
en.wikipedia.org (imageinfo) File:Salesforce.com logo.svg|File:Sa...
en.wikipedia.org (query) British Airways


[2022-01-20 10:08:04-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[41]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:04-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[49]': Starting task run...


www.wikidata.org (wikidata) Q606013
Drift (Erra album) (en) data
{
  image: <list(1)> {'kind': 'parse-cover', 'file': 'File:Erra Drif...
  infobox: <dict(14)> name, type, artist, cover, released, genre, ...
  pageid: 51454476
  parsetree: <str(7049)> <root><template><title>Infobox album</tit...
  requests: <list(2)> parse, imageinfo
  title: Drift (Erra album)
  wikibase: Q28451943
  wikidata_url: https://www.wikidata.org/wiki/Q28451943
  wikitext: <str(3437)> {{Infobox album| name       = Drift| type ...
}
en.wikipedia.org (query) HSBC
Salesforce (en) data
{
  aliases: <list(4)> Salesforce, Salesforce.com, Inc., CRM, SFDC
  assessments: <dict(7)> United States, California, Companies, Bra...
  claims: <dict(54)> P373, P31, P646, P414, P910, P214, P159, P946...
  description: American software company
  exhtml: <str(304)> <p><b>Salesforce</b> is an American cloud-bas...
  exrest: <str(290)> Salesforce is an American cloud-based softwar...
  extext: <str(291)> **Salesforce** is an Americ

[2022-01-20 10:08:05-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[44]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:05-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[5]': Starting task run...


en.wikipedia.org (parse) 322572
Salesforce (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Salesforc...
  infobox: <dict(21)> name, logo, image, image_caption, trade_name...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:C...
  pageid: 2420207
  parsetree: <str(103277)> <root><template><title>Short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Salesforce
  wikibase: Q941127
  wikidata_url: https://www.wikidata.org/wiki/Q941127
  wikitext: <str(84020)> {{Short description|American software com...
}
en.wikipedia.org (query) Shopify
en.wikipedia.org (restbase) /page/summary/Capitawww.wikidata.org (wikidata) Q8766

Capita (en) data
{
  aliases: <list(3)> Capita plc, Capita Group, Capita (United Kingdom)
  assessments: <dict(2)> Companies, London
  claims: <dict(24)> P31, P646, P159, P414, P214, P946, P571, P856...
  exhtml: <str(166)> <p><b>Capita plc</b>, commonly known as <b>Ca...
  exrest: <str(145)> Capita plc, commonly known as Ca

[2022-01-20 10:08:05-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[47]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:05-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[50]': Starting task run...


Capita (en) data
{
  infobox: <dict(11)> name, logo, type, traded_as, foundation, loc...
  pageid: 2018145
  parsetree: <str(51472)> <root><template><title>For</title><part>...
  requests: <list(1)> parse
  title: Capita
  wikibase: Q606013
  wikidata_url: https://www.wikidata.org/wiki/Q606013
  wikitext: <str(43132)> {{For|the Latin phrase|Per capita}}{{Use ...
}
en.wikipedia.org (query) United States Air Force
www.wikidata.org (wikidata) Q7501150
www.wikidata.org (labels) P3362|P2403|P856|P2002|P4103|P910|P154|...
www.wikidata.org (labels) P2572|Q484847|P31|P2003|P646|P3553|Q136...
en.wikipedia.org (query) United States Air Force (&plcontinue=320...
www.wikidata.org (labels) Q1137051|P244|P571|Q54933328|P2670|Q120...
en.wikipedia.org (query) United States Air Force (&plcontinue=320...
www.wikidata.org (labels) P2226|Q1565913|P2013|Q466496|P1448|P868...
en.wikipedia.org (parse) 32090
www.wikidata.org (labels) Q16|P8687
www.wikidata.org (wikidata) Q11223
www.wikidata.org (labels) Q8840

[2022-01-20 10:08:07-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[5]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:07-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[51]': Starting task run...


Shopify (en) data
{
  infobox: <dict(18)> name, logo, logo_size, type, traded_as, indu...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:S...
  pageid: 26126491
  parsetree: <str(51762)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Shopify
  wikibase: Q7501150
  wikidata_url: https://www.wikidata.org/wiki/Q7501150
  wikitext: <str(39227)> {{short description|Canadian e-commerce c...
}
en.wikipedia.org (query) United States Navy
en.wikipedia.org (restbase) /page/summary/HSBC
www.wikidata.org (labels) Q5830|P373|Q768914|Q947947|P229|P127|Q3...
en.wikipedia.org (query) United States Navy (&plcontinue=20518076...
en.wikipedia.org (imageinfo) File:Canary Wharf HSBC 1.JPG
www.wikidata.org (labels) Q922661|Q192443|P8814|Q685211|Q7010680|...
HSBC (en) data
{
  aliases: <list(5)> HSBC Holdings plc, Hong Kong and Shanghai Ban...
  assessments: <dict(6)> Singapore, China, Hong Kong, Companies, U...
  claims: <dict(63)> P571, P112, P17, P31, P3

[2022-01-20 10:08:08-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[49]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:08-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[52]': Starting task run...


HSBC (en) datawww.wikidata.org (labels) Q19838999|Q459282|P6782|Q6851963|Q46979...
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Canary Wh...
  infobox: <dict(22)> name, logo, image, image_caption, type, trad...
  iwlinks: <list(3)> https://commons.wikimedia.org/wiki/Category:H...
  pageid: 322572
  parsetree: <str(151366)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: HSBC
  wikibase: Q190464
  wikidata_url: https://www.wikidata.org/wiki/Q190464
  wikitext: <str(123236)> {{short description|British multinationa...
}

en.wikipedia.org (parse) 20518076
en.wikipedia.org (query) Stellantis (Fiat Chrysler)
British Airways (en) data
{
  aliases: <list(3)> BA, British Airways plc, BAW
  assessments: <dict(7)> Companies, Brands, Aviation, London, Unit...
  claims: <dict(84)> P946, P31, P113, P121, P229, P230, P114, P159...
  description: Flag carrier of the United Kingdom
  exhtml: <str(171)> <p><b>British Airways</b> (<b>BA</b>) is th

[2022-01-20 10:08:08-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[48]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:08-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[53]': Starting task run...


www.wikidata.org (wikidata) Q11220
British Airways (en) data
{
  infobox: <dict(19)> airline, IATA, ICAO, callsign, aoc, hubs, fr...
  iwlinks: <list(3)> https://commons.wikimedia.org/wiki/Category:B...
  pageid: 3970
  parsetree: <str(157841)> <root><template><title>short descriptio...
  requests: <list(1)> parse
  title: British Airways
  wikibase: Q8766
  wikidata_url: https://www.wikidata.org/wiki/Q8766
  wikitext: <str(133896)> {{short description|Flag carrier of the ...
}
en.wikipedia.org (query) Amgen
en.wikipedia.org (query) Stellantis
www.wikidata.org (labels) P121|Q3498988|P1019|Q174534|Q182027|Q81...
www.wikidata.org (labels) Q11208|Q6683|P856|P2002|P3221|P910|P154...
en.wikipedia.org (parse) 932897
en.wikipedia.org (query) Stellantis (&plcontinue=64563392|0|Marqu...
www.wikidata.org (wikidata) Q470517
en.wikipedia.org (parse) 64563392
www.wikidata.org (labels) Q507443|P2003|P31|P2427|P646|Q82059|P36...
www.wikidata.org (labels) P8687|Q5569170|P361|Q22695377|Q8676|Q56...
www

[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[53]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[54]': Starting task run...
[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[52]': Finished task run for task with final state: 'Success'


Amgen (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Amgenhead...
  infobox: <dict(22)> name, former_names, logo, logo_size, image, ...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:Amgen
  pageid: 932897
  parsetree: <str(37864)> <root><template><title>Short description...
  requests: <list(2)> parse, imageinfo
  title: Amgen
  wikibase: Q470517
  wikidata_url: https://www.wikidata.org/wiki/Q470517
  wikitext: <str(29578)> {{Short description|American multinationa...
}
en.wikipedia.org (query) Splunk
Stellantis (en) data
{
  infobox: <dict(17)> name, logo, type, traded_as, industry, prede...
  iwlinks: <list(8)> https://commons.wikimedia.org/wiki/Category:A...
  pageid: 64563392
  parsetree: <str(33175)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Stellantis
  wikibase: Q97439162
  wikidata_url: https://www.wikidata.org/wiki/Q97439162
  wikitext: <str(24628)> {{short description|Multinational automot...
}
en

[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[55]': Starting task run...
[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[50]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[51]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[56]': Starting task run...
[2022-01-20 10:08:11-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[57]': Starting task run...


United States Air Force (en) data
{
  image: <list(0)> 
  infobox: <dict(45)> unit_name, image, start_date, country, type,...
  iwlinks: <list(7)> https://commons.wikimedia.org/wiki/United_Sta...
  pageid: 32090
  parsetree: <str(146821)> <root><template><title>Short descriptio...
  requests: <list(1)> parse
  title: United States Air Force
  wikibase: Q11223
  wikidata_url: https://www.wikidata.org/wiki/Q11223
  wikitext: <str(122917)> {{Short description|Air service branch o...
}
en.wikipedia.org (query) Instacart
en.wikipedia.org (parse) 19135734
United States Navy (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Emblem of...
  infobox: <dict(42)> unit_name, image, caption, country, type, ro...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:U...
  pageid: 20518076
  parsetree: <str(165850)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: United States Navy
  wikibase: Q11220
  wikidata_url: https://www.

[2022-01-20 10:08:13-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[54]': Finished task run for task with final state: 'Success'


Splunk (en) data
{
  infobox: <dict(18)> name, logo, logo_alt, type, traded_as, found...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:C...
  pageid: 19135734
  parsetree: <str(46440)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Splunk
  wikibase: Q1835753
  wikidata_url: https://www.wikidata.org/wiki/Q1835753
  wikitext: <str(35383)> {{short description|American technology c...
}
www.wikidata.org (labels) P1978|P1036|P1343|P1245|P462|P646|P1582...


[2022-01-20 10:08:14-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[55]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:14-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[56]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:14-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[58]': Starting task run...
[2022-01-20 10:08:14-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[59]': Starting task run...


Snowflake (en) dataInstacart (en) data
{
  infobox: <dict(14)> name, logo, type, location, founder, key_peo...
  pageid: 46634143
  parsetree: <str(45436)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Instacart
  wikibase: Q22909236
  wikidata_url: https://www.wikidata.org/wiki/Q22909236
  wikitext: <str(37318)> {{short description|Internet-based grocer...
}

{
  iwlinks: <list(4)> https://commons.wikimedia.org/wiki/Category:S...
  pageid: 23607823
  parsetree: <str(35904)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: Snowflake
  wikibase: Q550147
  wikidata_url: https://www.wikidata.org/wiki/Q550147
  wikitext: <str(28023)> {{Short description|Single ice crystal or...
}
en.wikipedia.org (query) Synchrony Financial


[2022-01-20 10:08:14-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[6]': Starting task run...


en.wikipedia.org (query) Asda
en.wikipedia.org (query) Box
en.wikipedia.org (parse) 42193518
en.wikipedia.org (parse) 413522
en.wikipedia.org (parse) 167937
Note: Wikidata item Q89 missing 'instance of' (P31)
en.wikipedia.org (restbase) /page/summary/Apple
www.wikidata.org (wikidata) Q18387139
en.wikipedia.org (imageinfo) File:Honeycrisp.jpg|File:Assorted Re...
www.wikidata.org (wikidata) Q188075
www.wikidata.org (wikidata) Q297410
www.wikidata.org (labels) P2003|P31|P646|Q13677|P361|P1454|Q16138...
www.wikidata.org (labels) P186|P646|Q389782|P3553|Q262959|P910|P2...
www.wikidata.org (labels) P2003|P31|P646|P1454|P856|P2002|Q480409...
Note: Wikidata item Q188075 missing 'instance of' (P31)
en.wikipedia.org (restbase) /page/summary/Box
Apple (en) data
{
  assessments: <dict(3)> Plants, Wikipedia 1.0, Food and drink
  claims: <dict(60)> P1582, P1245, P373, P508, P18, P910, P349, P6...
  description: Fruit and tree
  exhtml: <str(535)> <p>An <b>apple</b> is an edible fruit produce...
  ex

[2022-01-20 10:08:15-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[57]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:15-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[60]': Starting task run...
[2022-01-20 10:08:15-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[6]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:15-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[61]': Starting task run...
[2022-01-20 10:08:15-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[58]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:15-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[62]': Starting task run...


Apple (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Pink lady...
  infobox: <dict(9)> name, image, image_caption, image2, image2_ca...
  iwlinks: <list(6)> https://commons.wikimedia.org/wiki/Category:A...
  pageid: 18978754
  parsetree: <str(100281)> <root><template><title>Short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Apple
  wikibase: Q89
  wikidata_url: https://www.wikidata.org/wiki/Q89
  wikitext: <str(79211)> {{Short description|Fruit and tree}}{{Abo...
}
en.wikipedia.org (imageinfo) File:Asda House, Leeds (19th July 20...
Box (en) data
{
  iwlinks: <list(4)> https://commons.wikimedia.org/wiki/Category:B...
  pageid: 413522
  parsetree: <str(9762)> <root><template><title>other uses</title>...
  requests: <list(1)> parse
  title: Box
  wikibase: Q188075
  wikidata_url: https://www.wikidata.org/wiki/Q188075
  wikitext: <str(8112)> {{other uses|Box (disambiguation)|The Box ...
}
en.wikipedia.org (query) Nedbank
Synchrony Financial (en) data

[2022-01-20 10:08:16-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[59]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:16-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[63]': Starting task run...


www.wikidata.org (labels) P2003|P31|Q100146248|P646|P856|P2002|Q8...
en.wikipedia.org (parse) 970755
Asda (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Asda Hous...
  infobox: <dict(21)> name, logo, image, image_caption, trade_name...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:Asda
  pageid: 167937
  parsetree: <str(94096)> <root><template><title>short description...
  requests: <list(2)> parse, imageinfo
  title: Asda
  wikibase: Q297410
  wikidata_url: https://www.wikidata.org/wiki/Q297410
  wikitext: <str(77651)> {{short description|British supermarket c...
}
en.wikipedia.org (query) Cisco
www.wikidata.org (labels) P2003|P31|P646|P3553|P1454|P856|P2002|Q...
en.wikipedia.org (query) Cisco Systems (&plcontinue=51746|0|Willi...
www.wikidata.org (wikidata) Q213660
en.wikipedia.org (parse) 51746
en.wikipedia.org (restbase) /page/summary/Nedbank
www.wikidata.org (labels) P577|Q9035|P856|P2002|Q3220391|P3875|P9...
en.wikipedia.org (imageinfo) Fi

[2022-01-20 10:08:17-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[60]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:17-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[64]': Starting task run...


Nedbank (en) data
{
  infobox: <dict(21)> name, logo, logo_size, trading_name, type, t...
  pageid: 2521673
  parsetree: <str(11191)> <root><template><title>Use dmy dates</ti...
  requests: <list(1)> parse
  title: Nedbank
  wikibase: Q2751701
  wikidata_url: https://www.wikidata.org/wiki/Q2751701
  wikitext: <str(8147)> {{Use dmy dates|date= June 2018}}{{More ci...
}
en.wikipedia.org (query) Alphabet (Google)
www.wikidata.org (labels) Q20947086|Q5064182|Q1769702|Q2745629|Q3...
en.wikipedia.org (restbase) /page/summary/Pratt_&_Whitney
en.wikipedia.org (query) Alphabet Inc. (&plcontinue=47489893|0|Go...
en.wikipedia.org (imageinfo) File:PrattAndWhitneyHeadquarters.JPG
www.wikidata.org (labels) P1320|P691|P2184|P4264|P2013|Q7913|P868...
Pratt & Whitney (en) data
{
  aliases: <list(3)> P&W, PW, Pratt and Whitney Company
  assessments: <dict(3)> Companies, Connecticut, Aviation
  claims: <dict(36)> P31, P373, P127, P856, P646, P159, P910, P214...
  description: Aircraft engine manufacturer

[2022-01-20 10:08:18-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[61]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:18-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[65]': Starting task run...


Pratt & Whitney (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:PrattAndW...
  infobox: <dict(18)> name, logo, image, image_caption, type, foun...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:P...
  pageid: 51519
  parsetree: <str(38456)> <root><template><title>Short description...
  requests: <list(2)> parse, imageinfo
  title: Pratt & Whitney
  wikibase: Q173150
  wikidata_url: https://www.wikidata.org/wiki/Q173150
  wikitext: <str(31521)> {{Short description|Aircraft engine manuf...
}
en.wikipedia.org (query) PwC
www.wikidata.org (labels) P3362|P2403|P856|P910|P1451|P154|Q11962...
www.wikidata.org (labels) P5161|P2347|Q34057|P373|Q150|P127|Q1684...
en.wikipedia.org (parse) 148172
en.wikipedia.org (restbase) /page/summary/LinkedIn
www.wikidata.org (labels) P10206|P3987|P1482|P31|P646|Q62|Q82059|...
en.wikipedia.org (imageinfo) File:Linkedin screenshot.png
www.wikidata.org (wikidata) Q488048
www.wikidata.org (labels) P3222|Q55513046|P138|Q92747

[2022-01-20 10:08:19-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[62]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:19-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[66]': Starting task run...


LinkedIn (en) data
{
  infobox: <dict(26)> logo, logo_size, screenshot, caption, founde...
  iwlinks: <list(4)> https://commons.wikimedia.org/wiki/Category:L...
  pageid: 970755
  parsetree: <str(140253)> <root><template><title>Short descriptio...
  requests: <list(1)> parse
  title: LinkedIn
  wikibase: Q213660
  wikidata_url: https://www.wikidata.org/wiki/Q213660
  wikitext: <str(113316)> {{Short description|Professional network...
}
en.wikipedia.org (query) Amazon
en.wikipedia.org (parse) 29621629
Cisco Systems (en) data
{
  aliases: <list(3)> Cisco Systems, Inc., Cisco, CISCO
  assessments: <dict(7)> California, Companies, Brands, Computing,...
  claims: <dict(75)> P1056, P373, P31, P138, P910, P856, P414, P64...
  description: American multinational technology company
  exhtml: <str(705)> <p><b>Cisco Systems, Inc.</b> (Cisco) is an A...
  exrest: <str(691)> Cisco Systems, Inc. (Cisco) is an American mu...
  extext: <str(1777)> **Cisco Systems, Inc.** (Cisco) is an Americ...
  extr

[2022-01-20 10:08:20-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[63]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:20-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[67]': Starting task run...


www.wikidata.org (labels) Q4167410|P31|Q1173793|P460
Cisco Systems (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Cisco Sys...
  infobox: <dict(23)> name, logo, image, image_size, image_caption...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:Cisco
  pageid: 51746
  parsetree: <str(117747)> <root><template><title>Short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Cisco Systems
  wikibase: Q173395
  wikidata_url: https://www.wikidata.org/wiki/Q173395
  wikitext: <str(93644)> {{Short description|American multinationa...
}
en.wikipedia.org (restbase) /page/summary/PricewaterhouseCoopers
en.wikipedia.org (query) Arena Pharmaceuticals
en.wikipedia.org (imageinfo) File:Madrid - CTBA, Torre PwC y Torr...
en.wikipedia.org (imageinfo) File:Googleplex HQ (cropped).jpg


[2022-01-20 10:08:21-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[64]': Finished task run for task with final state: 'Success'


en.wikipedia.org (restbase) /page/summary/Amazon
en.wikipedia.org (parse) 21470125
Amazon (en) data
{
  assessments: <dict(1)> Disambiguation
  claims: <dict(2)> P31, P460
  description: Topics referred to by the same term
  disambiguation: 67
  exhtml: <str(297)> <p><b>Amazon</b> most often refers to:</p><ul...
  exrest: <str(238)> Amazon most often refers to:Amazons, a tribe ...
  extext: <str(303)> **Amazon** most often refers to:  * Amazons, ...
  extract: <str(354)> <p><b>Amazon</b> most often refers to:</p><u...
  iwlinks: <list(2)> https://en.wiktionary.org/wiki/Amazon, https:...
  label: Amazon
  labels: <dict(4)> Q4167410, P31, Q1173793, P460
  length: 5,163
  links: <list(67)> 1042 Amazone, Amason (disambiguation), Amazon ...
  modified: <dict(2)> page, wikidata
  pageid: 29621629
  parsetree: <str(6089)> <root><template><title>wiktionary</title>...
  random: Minister for Housing, Local Government and Heritage
  redirects: <list(9)> {'pageid': 1344759, 'ns': 0, 'title': 'Amaz

[2022-01-20 10:08:21-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[68]': Starting task run...


PricewaterhouseCoopers (en) data
{
  aliases: <list(3)> PwC, PwC network, pwc.com
  assessments: <dict(4)> Business, Companies, United Kingdom, Taxation
  claims: <dict(43)> P373, P31, P159, P452, P856, P910, P646, P571...
  description: Multinational professional services network
  exhtml: <str(314)> <p><b>PricewaterhouseCoopers</b> is a multina...
  exrest: <str(293)> PricewaterhouseCoopers is a multinational pro...
  extext: <str(1422)> **PricewaterhouseCoopers** is a multinationa...
  extract: <str(1491)> <p><b>PricewaterhouseCoopers</b> is a multi...
  image: <list(5)> {'kind': 'query-pageimage', 'file': 'File:Price...
  infobox: <dict(13)> name, logo, trade_name, type, industry, foun...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:P...
  label: PricewaterhouseCoopers
  labels: <dict(70)> Q2807, P31, Q181487, P646, P1365, P2003, P355...
  length: 123,330
  links: <list(309)> 89th Academy Awards, Academy Award for Best P...
  modified: <dict(2)> page, wikidata
 

[2022-01-20 10:08:21-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[65]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:21-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[69]': Starting task run...


PricewaterhouseCoopers (en) data
{
  infobox: <dict(13)> name, logo, trade_name, type, industry, foun...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:P...
  pageid: 148172
  parsetree: <str(148077)> <root><template><title>Short descriptio...
  requests: <list(1)> parse
  title: PricewaterhouseCoopers
  wikibase: Q488048
  wikidata_url: https://www.wikidata.org/wiki/Q488048
  wikitext: <str(122692)> {{Short description|Multinational profes...
}
en.wikipedia.org (query) Muck Rack
www.wikidata.org (wikidata) Q7272267
Amason (en) data
{
  pageid: 19967244
  parsetree: <str(422)> <root>'''Amason''' may refer to:* [[Alvin ...
  requests: <list(1)> parse
  title: Amason
  wikibase: Q455435
  wikidata_url: https://www.wikidata.org/wiki/Q455435
  wikitext: <str(252)> '''Amason''' may refer to:* [[Alvin Eli Ama...
}
en.wikipedia.org (parse) Amazon (1780 ship)
www.wikidata.org (labels) Q17157326|P31|P646|Q65|P17|Q4830453|P15...
Amazon (1780 ship) (en) data
{
  infobox: <dict(3

[2022-01-20 10:08:22-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[67]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:22-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[7]': Starting task run...


en.wikipedia.org (parse) 207676
Quinn Emanuel Urquhart & Sullivan (en) data
{
  aliases: <list(1)> Quinn Emanuel Urquhart & Sullivan, LLP
  assessments: <dict(2)> California, Law
  claims: <dict(7)> P159, P571, P856, P17, P31, P646, P2002
  description: American law firm
  exhtml: <str(215)> <p><b>Quinn Emanuel Urquhart &amp; Sullivan, ...
  exrest: <str(197)> Quinn Emanuel Urquhart & Sullivan, LLP is a g...
  extext: <str(199)> **Quinn Emanuel Urquhart & Sullivan, LLP** is...
  extract: <str(215)> <p><b>Quinn Emanuel Urquhart &amp; Sullivan,...
  infobox: <dict(12)> firm_name, firm_logo, headquarters, num_offi...
  label: Quinn Emanuel Urquhart & Sullivan
  labels: <dict(12)> Q17157326, P31, P646, Q65, P17, Q4830453, P15...
  length: 18,157
  links: <list(60)> Alex Spiro, AmLaw 100 firm, Andrew H. Schapiro...
  modified: <dict(2)> page, wikidata
  pageid: 2550748
  parsetree: <str(23154)> <root><template><title>short description...
  random: Crawl Space (Breaking Bad)
  redirected: <l

[2022-01-20 10:08:22-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[68]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:22-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[70]': Starting task run...


www.wikidata.org (wikidata) Q1474197
Quinn Emanuel Urquhart & Sullivan (en) data
{
  infobox: <dict(12)> firm_name, firm_logo, headquarters, num_offi...
  pageid: 2550748
  parsetree: <str(23154)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Quinn Emanuel Urquhart & Sullivan
  wikibase: Q7272267
  wikidata_url: https://www.wikidata.org/wiki/Q7272267
  wikitext: <str(18049)> {{short description|American law firm}}{{...
}
en.wikipedia.org (query) Robinhood
en.wikipedia.org (imageinfo) File:Amazon 1997 poster.jpgen.wikipedia.org (parse) 10002787

Amazon (1997 film) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Amazon 19...
  infobox: <dict(15)> name, image, caption, director, producer, wr...
  pageid: 20568858
  parsetree: <str(4179)> <root><template><title>short description<...
  requests: <list(2)> parse, imageinfo
  title: Amazon (1997 film)
  wikibase: Q456054
  wikidata_url: https://www.wikidata.org/wiki/Q456054
  wikitext: <

[2022-01-20 10:08:24-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[7]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:24-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[71]': Starting task run...


www.wikidata.org (labels) P1417|P3417|P646
Amazon (automobile) (en) data
{
  pageid: 853329
  parsetree: <str(1208)> <root><template><title>Use dmy dates</tit...
  requests: <list(1)> parse
  title: Amazon (automobile)
  wikibase: Q456085
  wikidata_url: https://www.wikidata.org/wiki/Q456085
  wikitext: <str(897)> {{Use dmy dates|date=January 2018}}{{Use Br...
}
en.wikipedia.org (imageinfo) File:Dsa-logo cmyk 1.png
en.wikipedia.org (parse) Amazon (brigantine)
Upwork (en) data
{
  infobox: <dict(14)> name, logo, company_type, traded_as, industr...
  pageid: 10002787
  parsetree: <str(15051)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: Upwork
  wikibase: Q3344170
  wikidata_url: https://www.wikidata.org/wiki/Q3344170
  wikitext: <str(11288)> {{Short description|An American freelance...
}
en.wikipedia.org (query) Direct Line Group
Note: Wikidata item Q1474197 missing 'instance of' (P31)
en.wikipedia.org (restbase) /page/summary/Muckraker
Doncaster Shef

[2022-01-20 10:08:25-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[69]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:25-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[8]': Starting task run...


Amazon (chess) (en) data
{
  pageid: 36392754
  parsetree: <str(13763)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Amazon (chess)
  wikibase: Q647433
  wikidata_url: https://www.wikidata.org/wiki/Q647433
  wikitext: <str(6596)> {{short description|Fairy chess piece}}[[F...
}
en.wikipedia.org (parse) Amazon (color)
Muckraker (en) data
{
  iwlinks: <list(2)> https://en.wikisource.org/wiki/The_Man_with_t...
  pageid: 207676
  parsetree: <str(45647)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Muckraker
  wikibase: Q1474197
  wikidata_url: https://www.wikidata.org/wiki/Q1474197
  wikitext: <str(34488)> {{short description|Progressive Era refor...
}
en.wikipedia.org (query) VMware
George Alexander Macfarren (en) data
{
  iwlinks: <list(2)> http://www.cpdl.org/wiki/index.php/George_Ale...
  pageid: 2314058
  parsetree: <str(32344)> <root><template><title>Short description...
  requests: <list(1)> parse
  title: George

[2022-01-20 10:08:26-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[66]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:26-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[71]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:26-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[9]': Starting task run...


Amazon (company) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Amazon Sp...
  infobox: <dict(34)> name, logo, logo_size, logo_caption, image, ...
  iwlinks: <list(3)> https://commons.wikimedia.org/wiki/Category:A...
  pageid: 90451
  parsetree: <str(223427)> <root><template><title>Short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Amazon (company)
  wikibase: Q3884
  wikidata_url: https://www.wikidata.org/wiki/Q3884
  wikitext: <str(174100)> {{Short description|American multination...
}
en.wikipedia.org (imageinfo) File:Robert Muchamore - Lisbon Book ...
Direct Line Group (en) data
{
  infobox: <dict(17)> name, logo, type, traded_as, foundation, fou...
  pageid: 36908610
  parsetree: <str(16062)> <root><template><title>short description...
  requests: <list(1)> parse
  title: Direct Line Group
  wikibase: Q5280285
  wikidata_url: https://www.wikidata.org/wiki/Q5280285
  wikitext: <str(11508)> {{short description|British insurance com...
}
en.wiki

[2022-01-20 10:08:27-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[8]': Finished task run for task with final state: 'Success'


Robin Hood (1912 film) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Robin Hoo...
  infobox: <dict(12)> name, image, caption, director, producer, wr...
  pageid: 15110213
  parsetree: <str(5776)> <root><template><title>short description<...
  requests: <list(2)> parse, imageinfo
  title: Robin Hood (1912 film)
  wikibase: Q3939269
  wikidata_url: https://www.wikidata.org/wiki/Q3939269
  wikitext: <str(3989)> {{short description|1912 film}}{{Infobox f...
}
en.wikipedia.org (parse) Robin Hood (1922 film)
www.wikidata.org (labels) Q7014241|Q1196955|P856|P2002|P910|P1451...
VMware (en) data
{
  infobox: <dict(20)> name, logo, image_caption, type, traded_as, ...
  iwlinks: <list(1)> https://commons.wikimedia.org/wiki/Category:VMware
  pageid: 312018
  parsetree: <str(76940)> <root><template><title>short description...
  requests: <list(1)> parse
  title: VMware
  wikibase: Q11407
  wikidata_url: https://www.wikidata.org/wiki/Q11407
  wikitext: <str(58217)> {{short des

[2022-01-20 10:08:30-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[9]': Finished task run for task with final state: 'Success'


en.wikipedia.org (imageinfo) File:Robin Hood (1991 film) cover.jpg
Robin Hood (1991 British film) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Robin Hoo...
  infobox: <dict(18)> name, image, caption, director, producer, sc...
  pageid: 6309072
  parsetree: <str(11699)> <root><template><title>for</title><part>...
  requests: <list(2)> parse, imageinfo
  title: Robin Hood (1991 British film)
  wikibase: Q689658
  wikidata_url: https://www.wikidata.org/wiki/Q689658
  wikitext: <str(8304)> {{for|the 1991 film starring Kevin Costner...
}
en.wikipedia.org (parse) Robin Hood (2006 TV series)
en.wikipedia.org (imageinfo) File:Robinhoods2titlescreen.jpg
Robin Hood (2006 TV series) (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:Robinhood...
  infobox: <dict(21)> name, image, caption, genre, creator, starri...
  iwlinks: <list(1)> https://en.wikiquote.org/wiki/Robin_Hood_(BBC)
  pageid: 4017773
  parsetree: <str(31340)> <root><template><title>Use dmy 

[2022-01-20 10:08:35-0500] INFO - prefect.TaskRunner | Task 'get_wiki_improve[70]': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:36-0500] INFO - prefect.TaskRunner | Task 'show_dataframe_task': Starting task run...
[2022-01-20 10:08:36-0500] INFO - prefect.TaskRunner | Task 'show_dataframe_task': Finished task run for task with final state: 'Success'
[2022-01-20 10:08:36-0500] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[<Task: show_dataframe_task>]
<Task: show_dataframe_task>


Robin Hood (album) (en) data
{
  infobox: <dict(8)> name, type, artist, released, genre, length, ...
  pageid: 8890924
  parsetree: <str(3899)> <root><template><title>Unreferenced</titl...
  requests: <list(1)> parse
  title: Robin Hood (album)
  wikibase: Q7352492
  wikidata_url: https://www.wikidata.org/wiki/Q7352492
  wikitext: <str(2340)> {{Unreferenced|date=January 2007}}{{Infobo...
}


In [15]:
new_company = pd.concat([new_company.reset_index(drop = True), wiki_result], axis=1)
columnname = new_company.columns
new_company['size_wiki'] = new_company.apply(lambda x: find_size(x, columnname),axis=1).apply(process_size)

In [18]:
new_company = new_company[['refined_name','policy','link','story','size_wiki']]

In [23]:
new_company.groupby('policy').count()

Unnamed: 0_level_0,refined_name,link,story,size_wiki
policy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fully Remote,3,3,3,1
Hybrid/Optional,2,2,2,2
Hybrid/Partial,6,6,6,4
Optional,13,13,13,11
Optional/Partial,1,1,1,1
Partial,17,17,17,14
Partial/Optional,2,2,2,2
Partial/Remote First,1,1,1,1
Partial,1,1,1,1
Remote First,22,22,22,11


In [21]:
# new_company.to_excel('Companies with Remote Programs JH Edit.xlsx')