In [None]:
# !pip install -Uqq fastbook
# import fastbook
# fastbook.setup_book()

from ipywidgets import interact

import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.display import HTML
import urllib.request


from fastai.text.all import *


In [None]:
MODEL_URL = "https://www.dropbox.com/s/wg7gaa9gz8lmfrm/classifier_2020-11-11_1807.pkl?dl=1"
urllib.request.urlretrieve(MODEL_URL, "classifier_2020-11-11_1807.pkl")

In [4]:
classifier = load_learner("classifier_2020-11-11_1807.pkl")

In [5]:
def get_articles(query: str, country: str):
  url = "https://api.gdeltproject.org/api/v2/doc/doc"
  if country == 'world':
    query_modifier =  ''
  else:
    query_modifier = 'sourcecountry:{}'.format(country)
  payload = {
      'query': '{} {} sourcelang:english'.format(query, query_modifier),
      'mode': "ArtList",
      'format': 'RSS',
      'maxrecords': 250,
      'timespan':'1d'
  }
  r = requests.get(url, params=payload)
  return(r)

def parse_articles(r: requests.Response):
  soup = BeautifulSoup(r.text, 'xml')
  data = [{'headline':i.title.text, 'url':i.link.text} for i in soup.find_all('item') if i.link is not None]
  df = pd.DataFrame(data)
  return(df)

def output_prediction_table(query: str, country: str):
  r = get_articles(query, country)
  df = parse_articles(r)
  df = df.groupby('headline', as_index=False).agg(first)
  headlines = df.headline.to_list()
  test_dl = classifier.dls.test_dl(headlines)
  df['score'] = [float(i[1]) for i in classifier.get_preds(dl=test_dl)[0]]
  df['score'] = np.round(df['score'] * 100).astype(int)
  df = df.sort_values('score', ascending=False)
  df = df[df.score > 0]

  def make_href(row: pd.Series):
    return '<a href="{}">{}</a>'.format(row.url, row.headline)

  df['headline'] = df.apply(make_href, axis = 1)
  html_string = df[['score', 'headline']].to_html(index=False)
  html_string = (
      html_string.replace('&lt;', '<')
      .replace('&gt;', '>')
      .replace('&lt;/a&gt;', '/a')
  )
  return HTML(html_string)

In [None]:
interact(output_prediction_table, 
         query = "(covid OR coronavirus OR virus OR pandemic) (rules OR restrictions OR shutdown OR measures OR lockdown)",
         country="world")