## Wikidata
 
Se va a consultar wikidata para obtener los autores de UNLP y CIC mas relevantes y obtener sus ORCID para poder cruzarlos con otros sistemas

In [1]:
import pandas as pd

In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

sparql.setQuery("""

# tool: scholia
# title: Employees and affiliated with a specified organization

PREFIX target: <http://www.wikidata.org/entity/Q784171>

SELECT
  (SAMPLE(?number_of_works_) AS ?works)
  (SAMPLE(?wikis_) AS ?wikis)
  ?researcher ?researcherLabel ?researcherDescription
  (SAMPLE(?orcid_) AS ?orcid)
WITH {
  SELECT DISTINCT ?researcher WHERE {
    ?researcher ( wdt:P108 | wdt:P463 | wdt:P1416 ) / wdt:P361* target: .
  } 
} AS %researchers
WITH {
  SELECT
    (COUNT(?work) AS ?number_of_works_) ?researcher
  WHERE {
    INCLUDE %researchers

    # No biological pathways; they skew the statistics too much 
    MINUS { ?work wdt:P31 wd:Q4915012 } 

    # This OPTIONAL query should be after the MINUS query, otherwise
    # researchers might not show if they do not have any papers.
    OPTIONAL { ?work wdt:P50 ?researcher . }
  } 
  GROUP BY ?researcher
} AS %researchers_and_number_of_works
WHERE {
  INCLUDE %researchers_and_number_of_works
  OPTIONAL { ?researcher wdt:P496 ?orcid_ . }
  OPTIONAL { ?researcher wikibase:sitelinks ?wikis_ }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en,da,de,es,fr,nl,no,ru,sv,zh" . } 
}
GROUP BY ?researcher ?researcherLabel ?researcherDescription 
ORDER BY DESC(?works)


""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [3]:
authors = {'researcherLabel': [], 'researcher': [], 'works': [], 'researcherDescription': [], 'wikis': [], 'orcid': [] }

for data in results['results']['bindings']:
    author = {}

    authors['researcherLabel'].append(data['researcherLabel']['value']) if 'researcherLabel' in data else authors['researcherLabel'].append(None)
    authors['researcher'].append(data['researcher']['value']) if 'researcher' in data else authors['researcher'].append(None)
    authors['works'].append(data['works']['value']) if 'works' in data else authors['works'].append(None)
    authors['researcherDescription'].append(data['researcherDescription']['value']) if 'researcherDescription' in data else authors['researcherDescription'].append(None)
    authors['wikis'].append(data['wikis']['value']) if 'wikis' in data else authors['wikis'].append(None)
    authors['orcid'].append(data['orcid']['value']) if 'orcid' in data else authors['orcid'].append(None)



In [4]:
df = pd.DataFrame(data=authors)
df


Unnamed: 0,researcherLabel,researcher,works,researcherDescription,wikis,orcid
0,María Teresa Dova,http://www.wikidata.org/entity/Q61718357,1151,Argentine physicist,4,0000-0001-6113-0878
1,Fernando Monticelli,http://www.wikidata.org/entity/Q61724322,879,Argentine physicist,0,0000-0002-6974-1443
2,Martín Fernando Tripiana,http://www.wikidata.org/entity/Q61739947,825,Argentine physicist,0,
3,Francisco Alonso,http://www.wikidata.org/entity/Q61820692,699,Argentine physicist,0,0000-0001-9431-8156
4,Hernan Wahlberg,http://www.wikidata.org/entity/Q61761507,632,Argentine physicist,0,0000-0003-0616-7330
...,...,...,...,...,...,...
1257,Edgardo R. Montaldi,http://www.wikidata.org/entity/Q114228427,0,Argentine plant physiologist,0,
1258,Catalina A. Rotunno,http://www.wikidata.org/entity/Q114766658,0,Argentine chemist,0,
1259,Juan Alberto Schnack,http://www.wikidata.org/entity/Q115012042,0,Argentine ecologist,0,
1260,Juan José Gagliardino,http://www.wikidata.org/entity/Q115269612,0,Argentine diabetologist,0,


Podemos ver que existen 617 autores con ORCID

In [None]:
df[df['orcid'].notna()]

Unnamed: 0,researcherLabel,researcher,works,researcherDescription,wikis,orcid
0,María Teresa Dova,http://www.wikidata.org/entity/Q61718357,1149,Argentine physicist,4,0000-0001-6113-0878
1,Fernando Monticelli,http://www.wikidata.org/entity/Q61724322,877,Argentine physicist,0,0000-0002-6974-1443
3,Francisco Alonso,http://www.wikidata.org/entity/Q61820692,697,Argentine physicist,0,0000-0001-9431-8156
4,Hernan Wahlberg,http://www.wikidata.org/entity/Q61761507,630,Argentine physicist,0,0000-0003-0616-7330
5,María Josefina Alconada Verzini,http://www.wikidata.org/entity/Q61958783,616,Argentine physicist,0,0000-0003-2212-7830
...,...,...,...,...,...,...
1208,John Pérez-Calderón,http://www.wikidata.org/entity/Q87994731,0,researcher,0,0000-0003-2311-5305
1220,Pablo Ernesto Pérez,http://www.wikidata.org/entity/Q78212406,0,Argentine economist,0,0000-0002-2661-8584
1240,Inés Kessler,http://www.wikidata.org/entity/Q112425500,0,,0,0000-0003-4048-7418
1242,Mónica G. Pené,http://www.wikidata.org/entity/Q112426001,0,,0,0000-0002-0257-7634
