In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON

Examples: https://rdflib.github.io/sparqlwrapper/

In [51]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    PREFIX dbp: <http://dbpedia.org/property/>
    SELECT ?label
    WHERE { ?label dbp:tradedAs ?p
    FILTER (?p IN (<http://dbpedia.org/resource/S&P_500>, <http://dbpedia.org/resource/S&P_500_Index> ) ) }
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
print(f"{len(results['results']['bindings'])} Results:")
for result in results["results"]["bindings"]:
    print(result["label"]["value"])

379 Results:
http://dbpedia.org/resource/Texas_Instruments
http://dbpedia.org/resource/Xerox
http://dbpedia.org/resource/Robert_Half_International
http://dbpedia.org/resource/CenturyLink
http://dbpedia.org/resource/Dr_Pepper_Snapple_Group
http://dbpedia.org/resource/Stericycle
http://dbpedia.org/resource/Dentsply_Sirona
http://dbpedia.org/resource/Diamond_Offshore_Drilling
http://dbpedia.org/resource/Assurant
http://dbpedia.org/resource/D._R._Horton
http://dbpedia.org/resource/Edison_International
http://dbpedia.org/resource/Illinois_Tool_Works
http://dbpedia.org/resource/Microchip_Technology
http://dbpedia.org/resource/ONEOK
http://dbpedia.org/resource/SCANA
http://dbpedia.org/resource/TSYS
http://dbpedia.org/resource/United_Rentals
http://dbpedia.org/resource/WEC_Energy_Group
http://dbpedia.org/resource/XL_Catlin
http://dbpedia.org/resource/Cognizant
http://dbpedia.org/resource/Newfield_Exploration
http://dbpedia.org/resource/Akamai_Technologies
http://dbpedia.org/resource/Ball_Corpo

### DBpedia Spotlight

In [2]:
import spotlight

In [73]:
# News from SAP Qualtrics Deal -> Recognizes Qualtrics and SAP SE
annotations = spotlight.annotate('https://api.dbpedia-spotlight.org/en/annotate',
                                 'The tech industry’s newest billionaires are a pair of brothers who started a software company in their parents’ basement in Utah. Now Ryan and Jared Smith are selling Qualtrics International Inc. to European giant SAP SE for $8 billion — and they’ll get to keep running the business.',
                                 confidence=0.5, support=0)
[x for x in annotations if 'Company' in x['types']]

[{'URI': 'http://dbpedia.org/resource/Qualtrics',
  'support': 16,
  'types': 'Wikidata:Q43229,Wikidata:Q24229398,DUL:SocialPerson,DUL:Agent,Schema:Organization,DBpedia:Organisation,DBpedia:Company,DBpedia:Agent',
  'surfaceForm': 'Qualtrics',
  'offset': 167,
  'similarityScore': 1.0,
  'percentageOfSecondRank': 0.0},
 {'URI': 'http://dbpedia.org/resource/SAP_SE',
  'support': 1908,
  'types': 'Wikidata:Q43229,Wikidata:Q24229398,DUL:SocialPerson,DUL:Agent,Schema:Organization,DBpedia:Organisation,DBpedia:Company,DBpedia:Agent',
  'surfaceForm': 'SAP SE',
  'offset': 214,
  'similarityScore': 1.0,
  'percentageOfSecondRank': 0.0}]

### Elasticsearch

Guide: https://www.elastic.co/guide/en/elasticsearch/reference/master/search-aggregations-bucket-significanttext-aggregation.html

Docs: https://elasticsearch-py.readthedocs.io/en/master/api.html

In [10]:
from datetime import datetime
from elasticsearch import Elasticsearch

IP_ADDRESS = 'http://172.16.64.23:9200'
es = Elasticsearch([IP_ADDRESS])
index = 'bloomberg_reuters'

In [5]:
 es.indices.get_alias("*")

{'new_guardian': {'aliases': {}},
 'ss': {'aliases': {}},
 '.monitoring-es-6-2018.11.20': {'aliases': {}},
 'wpi': {'aliases': {}},
 'new_dailymail': {'aliases': {}},
 'new_telegraph': {'aliases': {}},
 'shakespeare': {'aliases': {}},
 'new_independent': {'aliases': {}},
 'bloomberg_reuters': {'aliases': {}},
 'semantic-scholar': {'aliases': {}},
 '.monitoring-kibana-6-2018.11.20': {'aliases': {}},
 '.monitoring-es-6-2018.11.21': {'aliases': {}},
 '.monitoring-kibana-6-2018.11.21': {'aliases': {}},
 'new_rssfeedbbc': {'aliases': {}},
 'quagga_enron': {'aliases': {}},
 '.kibana_1': {'aliases': {'.kibana': {}}}}

In [11]:
es.count(index)

{'count': 554914,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}

In [29]:
res = es.search(index=index, body={
    "query": {"match" : {"article": "volkswagen"}},
    "size": 0,
    "aggregations" : {
       "my_sample" : {
           "sampler" : {
               "shard_size" : 5000
           },
           "aggregations": {
               "keywords" : {
                   "significant_text" : { "field" : "article" }
               }
           }
       }
   }
})
print("%d documents found" % res['hits']['total'])
# print(res)
for bucket in res['aggregations']['my_sample']['keywords']['buckets']:
    print(f"{bucket['key']} - Count={bucket['doc_count']}; Score={bucket['score']}; Bg_count={bucket['bg_count']}")

4046 documents found
volkswagen - Count=4046; Score=136.15126050420167; Bg_count=4046
vow - Count=1809; Score=45.94792416756137; Bg_count=2391
vw - Count=1118; Score=35.691342459519795; Bg_count=1178
audi - Count=1123; Score=27.482010773572004; Bg_count=1540
wolfsburg - Count=575; Score=16.412570251634154; Bg_count=677
porsche - Count=757; Score=16.302891677934706; Bg_count=1178
motoren - Count=903; Score=15.680570757416096; Bg_count=1738
werke - Count=902; Score=15.500860141336506; Bg_count=1754
bmw - Count=979; Score=14.406013583744956; Bg_count=2218
carmaker - Count=1694; Score=14.333345124795336; Bg_count=6594
