### Import packages, instantiate models

In [15]:
import wikipedia as wp
from SPARQLWrapper import SPARQLWrapper, JSON
from transformers import pipeline, set_seed

In [16]:
# isntantiate a question answer model
QA = pipeline('question-answering')

No model was supplied, defaulted to distilbert-base-cased-distilled-squad (https://huggingface.co/distilbert-base-cased-distilled-squad)


In [17]:
# instaniate a summarization model
summarizer = pipeline("summarization")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 (https://huggingface.co/sshleifer/distilbart-cnn-12-6)


In [18]:
# define a function to run SPARQL queries against DBPedia
def run_query(query):
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setReturnFormat(JSON)

    sparql.setQuery(query)  # the previous query as a literal string

    return sparql.query().convert()

### Clean entities, pull abstract from DBpedia

In [19]:
# Assume we have extracted these from an input text
entity = "George W. Bush"
property = 'Was'
value = 'The president'

In [20]:
# Query the DBPEdia page for George Bush
query = entity
result = wp.search(query, results = 1)[0]
wiki_title = result.replace(' ', '_')
wiki_title

'George_W._Bush'

In [21]:
# we want the abstract
prop = 'abstract'

In [22]:
# define sparql query to extract abstract from DBPedia
query = (
'PREFIX dbpedia: <http://dbpedia.org/resource/> \n'

'SELECT ?abstract \n'
'WHERE { \n'
  f'dbpedia:{wiki_title} dbo:{prop} ?abstract \n'
  "FILTER ( LANG ( ?abstract ) = 'en' )"
'}'
)

In [23]:
# Query abstract and print first characters
result_dict = dict(run_query(query))
abstract = result_dict['results']['bindings'][0]['abstract']['value']
abstract[:100]

'George Walker Bush (born July 6, 1946) is an American politician who served as the 43rd president of'

### Testing with full abstract

In [24]:
QA(question='What was george bush the president of?',
         context = abstract)['answer']

'United States'

In [25]:
QA(question='When was george bush the president of the united states?',
         context = abstract)['answer']

'2001 to 2009'

In [26]:
QA(question='Was George bush the president of the united states in 2008?',
         context = abstract)['answer']

'43rd'

In [27]:
QA(question="Who was george w. bush?",
         context = abstract)['answer']

'41st president'

### Testing with summarized abstract

In [28]:
# summarize the abstract using summarization model, then run question and answer
summary = summarizer(abstract)[0]['summary_text']
summary

' George Walker Bush served as 43rd president of the United States from 2001 to 2009 . He was elected governor of Texas in 1994 and re-elected president in 2004 . Bush is the son of 41st president George H. W. Bush . He is a member of the Republican Party, Bush family .'

In [29]:
QA(question='What was george bush the president of?',
         context = summary)['answer']

'United States'

In [30]:
QA(question='When was george bush the president of the united states?',
         context = summary)['answer']

'2001'

In [32]:
QA(question='Was George bush the president of the united statesin 2008?',
         context = summary)['answer']

'43rd'

In [33]:
QA(question="Who was george w. bush?",
         context = summary)['answer']

'41st president'

In [34]:
# expirimenting with GPT2
generator = pipeline('text-generation')
set_seed(42)

No model was supplied, defaulted to gpt2 (https://huggingface.co/gpt2)


In [36]:
generator("Was george bush the president of the united states in 2008?", max_length=100)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Was george bush the president of the united states in 2008? A decade ago, this was a question that you\'d have to have had to ask yourself, "Does it sound kind of boring? You\'re always being asked questions like, \'what is going on in Iraq?\' It would be hard to write something short about it if that was your primary focus, but obviously Obama was very close to the president who was in office."\n\nIs it possible that this is a rhetorical argument to win'}]

ChatGPT:  
    No, George Bush was not the president in 2008. According to the text you provided, George Walker Bush served as the 43rd president of the United States from 2001 to 2009. So, his term as president ended in 2009, and he was succeeded by President Barack Obama, who took office on January 20, 2009. Therefore, George Bush was not the president in 2008; he was the president from 2001 to 2009.

Chat gpt can do yes or no questions, but does not understand periods of time - can correclty identify george bush as president, but even with sample text can not tell that 2008 is in between 2002 and 2009.m

Distilbert can not do yes or no questions