In [1]:
# NEWSAPI_KEY = ''

In [2]:
PAGESIZE=20
NUM_QUESTIONS = 3

In [3]:
from newspaper import Article
import requests
import json

def get_news(api, pagesize=20):
    url = "https://newsapi.org/v2/everything?"

    # Specify the query and number of returns
    parameters = {
        'q': 'finance OR economics OR bitcoin', # query phrase
        'pageSize': pagesize,  # maximum is 100
        'apiKey': api, # your own API key
    }

    # Make the request
    response = requests.get(url, params=parameters)

    # Convert the response to JSON format
    data = response.json()

    news_dict = {}
    news_list = []
    for article in data["articles"]:
        # Use newspaper3k to extract text
        news_article = Article(article["url"])
        news_article.download()
        news_article.parse()
        news_dict[article["url"]] = news_article.text
        news_list.append(news_article.text)
        
    return news_list, news_dict

In [5]:
news_list, news_dict = get_news(NEWSAPI_KEY, PAGESIZE)

In [6]:
urls = list(news_dict.keys())
idx = 0
print(f'news from {urls[idx]}')
print('--------------------------------------------')
print(news_dict[urls[idx]])

news from https://lifehacker.com/what-to-do-when-a-contractor-ghosts-you-mid-project-1850674402
--------------------------------------------
When you’re ready to tackle a big home renovation or repair, finding a good contractor is a challenge . Getting one to even give you the time of day can be harder than you might think, i n part because of a shortage of skilled tradespeople, and in part because of the economics of construction—your dinky little half bath reno simply may not be profitable enough for an established, busy contractor. This might lead you to hire someone who is a bit less established, which in turn can lead to the nightmare scenario in which they ghost you after starting the work, leaving you with a mess.



If you skipped some of your due diligence in your rush to hire someone, it’s too late to go back and do that now. But there are steps you can take when a contractor vanishes.

Read your contract and s tart a paper trail

First things first: Read your contract. Unles

In [7]:
# generate questions from context
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

device = "cuda" if torch.cuda.is_available else "cpu"

tokenizer = AutoTokenizer.from_pretrained("voidful/bart-eqg-question-generator")
model = AutoModelForSeq2SeqLM.from_pretrained("voidful/bart-eqg-question-generator").to(device)

In [8]:
tokens = tokenizer(news_list, padding=True, truncation=True, return_tensors="pt").to(device)

In [9]:
outputs = model.generate(tokens["input_ids"], max_length=50, num_return_sequences=NUM_QUESTIONS, temperature=0.9)

In [10]:
outputs.shape

torch.Size([300, 28])

In [11]:
decoded_outputs = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

In [12]:
questions = [
    "What is the main event or issue described in the article?",
    "Who are the primary individuals, groups, or entities involved in the story?",
    "Where does the event or issue take place, and how is the location relevant?",
    "When did the events described occur? Is there a timeline of key occurrences?",
    "Why is this event or issue significant? What are its implications or consequences?",
    "How does this event or issue relate to other recent events or broader trends?",
    "What sources are cited, and how credible are they?",
    "What are the differing viewpoints or opinions presented, if any?",
    "Are there any apparent biases in the article? If so, what might be the underlying motivations?",
    "What is the tone of the article? Is it purely informative, or is there an emotional aspect?",
    "What unanswered questions or ambiguities remain after reading the article?",
    "How might this article connect to your own knowledge or interests?",
    "What can be inferred about the future from this article? Are there any predictions or warnings?",
    "What can be inferred about the future from this article? Are there any predictions or warnings?",
    "Are there any supporting visuals such as images, charts, or graphs? What information do they convey?",
    "What are the key quotations or statements that encapsulate the article's main ideas?"
]

In [13]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

# model_name = "deepset/roberta-base-squad2"
model_name = "deepset/roberta-large-squad2"

# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name, device=0)

In [14]:
print(len(news_list))

100


In [16]:
news_list[0]



In [15]:
QA_inputs = []
for i in range (PAGESIZE*NUM_QUESTIONS):
    QA_inputs.append(
        {
            "question":decoded_outputs[i],
            "context":news_list[i//NUM_QUESTIONS]
        }
    )

In [21]:
QA_inputs_gpt = []
for news in news_list:
    for q in questions:
        QA_inputs_gpt.append(
            {
                "question":q,
                "context":news
            }
        )

In [18]:
res = nlp(QA_inputs)

In [19]:
print('question:', decoded_outputs[0])
print()
print('context:', news_list[0])
print()
print('answer:', res[0]['answer'])

question: what should we do if we find a new contractor is unsatisfactory?

context: When you’re ready to tackle a big home renovation or repair, finding a good contractor is a challenge . Getting one to even give you the time of day can be harder than you might think, i n part because of a shortage of skilled tradespeople, and in part because of the economics of construction—your dinky little half bath reno simply may not be profitable enough for an established, busy contractor. This might lead you to hire someone who is a bit less established, which in turn can lead to the nightmare scenario in which they ghost you after starting the work, leaving you with a mess.



If you skipped some of your due diligence in your rush to hire someone, it’s too late to go back and do that now. But there are steps you can take when a contractor vanishes.

Read your contract and s tart a paper trail

First things first: Read your contract. Unless you hired someone under the table and off the books, y

In [22]:
res_gpt = nlp(QA_inputs_gpt)

In [23]:
print('question:', questions[0])
print()
print('context:', news_list[0])
print()
print('answer:', res_gpt[0]['answer'])

question: What is the main event or issue described in the article?

context: When you’re ready to tackle a big home renovation or repair, finding a good contractor is a challenge . Getting one to even give you the time of day can be harder than you might think, i n part because of a shortage of skilled tradespeople, and in part because of the economics of construction—your dinky little half bath reno simply may not be profitable enough for an established, busy contractor. This might lead you to hire someone who is a bit less established, which in turn can lead to the nightmare scenario in which they ghost you after starting the work, leaving you with a mess.



If you skipped some of your due diligence in your rush to hire someone, it’s too late to go back and do that now. But there are steps you can take when a contractor vanishes.

Read your contract and s tart a paper trail

First things first: Read your contract. Unless you hired someone under the table and off the books, you have

In [28]:
print(len(questions))

16


In [31]:
data = {
    "instruction":[],
    "output":[]
}

for i in range (PAGESIZE*NUM_QUESTIONS):
    data["instruction"].append(decoded_outputs[i])
    data["output"].append(res[i]["answer"])

In [34]:
len_q = len(questions)
for i in range(len(res_gpt)):
    data['instruction'].append(questions[i%len_q])
    data['output'].append(res_gpt[i])

In [35]:
len(data['output'])

1900

In [33]:
questions[:2]

['What is the main event or issue described in the article?',
 'Who are the primary individuals, groups, or entities involved in the story?']