## Load News
Load news from News-API, remember to set environment variable `NEWSAPI_KEY` first. You can get your key from https://newsapi.org/

In [3]:
from wi_news.algorithms.get_news import get_news


articles = get_news('Hong Kong')['articles']
articles[:10]

[{'source': {'id': 'bbc-news', 'name': 'BBC News'},
  'author': 'https://www.facebook.com/bbcnews',
  'title': 'Hong Kong passes tough security law',
  'description': 'Authorities say Article 23 is necessary for stability, but critics fear it will further erode civil liberties.',
  'url': 'https://www.bbc.co.uk/news/world-asia-china-68594448',
  'urlToImage': 'https://ichef.bbci.co.uk/news/1024/branded_news/0E83/production/_115651730_breaking-promo-976.png',
  'publishedAt': '2024-03-19T11:03:03Z',
  'content': 'Hong Kong has passed a tough security law which authorities say is necessary for stability, but which critics fear will further erode civil liberties.\r\nArticle 23 targets new offences like external i… [+3973 chars]'},
 {'source': {'id': 'bbc-news', 'name': 'BBC News'},
  'author': 'https://www.facebook.com/bbcnews',
  'title': "Hong Kong’s new law is 'final nail in coffin' - critics",
  'description': "Beijing and Hong Kong defend the controversial law, saying it is necessary

## Vectorization

In [9]:
from wi_news.algorithms.vectorization import preprocess


data = [preprocess(x) for x in articles]
data[0]

(array([ 5.31070195e-02,  9.89044532e-02,  5.41169122e-02,  9.94805396e-02,
        -3.79125308e-03,  3.90521111e-03,  3.05063967e-02, -1.59596913e-02,
        -7.08721131e-02, -5.73532237e-03,  9.19580385e-02, -1.17650367e-01,
        -1.08660422e-02, -1.06587689e-02,  1.12925395e-02, -1.91974249e-02,
         1.24648947e-03, -3.38339284e-02, -4.63185199e-02, -7.69830942e-02,
        -3.78220379e-02, -3.17372009e-03, -8.09480157e-03,  4.74415310e-02,
        -4.04129028e-02,  2.33878996e-02,  3.65331699e-03,  1.56141107e-03,
         4.64712568e-02, -2.98814327e-02, -6.48129955e-02,  4.97444570e-02,
         5.72109297e-02,  6.95987418e-02,  7.18266936e-03,  3.18246856e-02,
        -1.70804951e-02, -1.46926576e-02,  6.72563612e-02,  1.43014621e-02,
         5.12931608e-02, -5.05573489e-02,  7.29314983e-02, -5.71735725e-02,
         5.12935445e-02,  1.68376435e-02,  3.42354476e-02,  6.42575789e-03,
        -3.87048237e-02,  6.44187257e-02, -3.45614180e-02,  5.07570021e-02,
        -5.4

## Hashtags Extraction

Extract hashtags via TF-IDF.

In [8]:
from wi_news.algorithms.hashtags_extraction import extract_hashtags


hashtags = extract_hashtags(data, n_hashtags=5)
hashtags[:10]

array([['article_23', 'law', 'hong_kong', 'world', 'fashion'],
       ['hong_kong', 'city', 'law', 'world', 'filmart'],
       ['company', 'last_year', 'world', 'chinese', 'fashion'],
       ['apple', 'day', 'one', 'china', 'world'],
       ['government', 'hong_kong', 'article_23', 'national_security',
        'city'],
       ['legislature', 'people', 'hong_kong', 'kong', 'hong'],
       ['chinese', 'government', 'world', 'fashion', 'exhibition'],
       ['world', 'this_week', 'china', 'fashion', 'exhibition'],
       ['content', 'hong_kong', 'world', 'chinese', 'fashion'],
       ['world', 'hong_kong', 'chinese', 'fashion', 'exhibition']],
      dtype='<U21')