In [24]:
import pandas as pd
import requests
from keys import API_KEY # my api key is stored in a separate file

In [103]:
# get all articles about music from the last 30 days
url = ('https://newsapi.org/v2/everything?'
       'q=music&'
       'from=2023-04-09&'
       'sortBy=popularity&'
       'apiKey=' + API_KEY)
response = requests.get(url)
json_response = response.json()
articles = json_response['articles']

In [104]:
# convert to dataframe
df = pd.DataFrame(articles)
df.head()

In [106]:
# convert source column to just the name of the source
if 'source' in df.columns:
    df['source'] = df['source'].apply(lambda x: x['name'])
df.head()

In [113]:
# clean the title column
import re

def clean(title):
    return re.sub(r'[^a-zA-Z0-9 ]', '', title)

df['title'] = df['title'].apply(clean)
df.head()

In [114]:
from sklearn.feature_extraction.text import TfidfVectorizer

# create tfidf vectorized matrix
vectMatrix = TfidfVectorizer(ngram_range=(1, 2))
tfidf = vectMatrix.fit_transform(df['title'])

In [117]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    title = clean(title)
    title_vect = vectMatrix.transform([title])
    similarity = cosine_similarity(title_vect, tfidf).flatten()
    top5 = np.argsort(similarity)[-5:][::-1]
    results = df.iloc[top5]
    return results

In [116]:
# test it out and display results
import ipywidgets as widgets
from IPython.display import display

article_input = widgets.Text(
    value='Youtube music',
    description='Article:',
    disabled=False
)

article_list = widgets.Output()

def on_type(change):
    article_list.clear_output()
    with article_list:
        title = change['new']
        if len(title) > 5:
            display(search(title))

article_input.observe(on_type, names='value')

display(article_input, article_list)

In [111]:
# trying the second api
import eventregistry as er
evr = er.EventRegistry(apiKey='f931c431-041e-4b72-8619-a16e97fa86d0')
q = er.QueryArticlesIter(
    keywords='politics',
    keywordsLoc='title',
    ignoreKeywords='coronavirus'
)
articles = []
for article in q.execQuery(evr, sortBy='date', maxItems=100):
    articles.append(article)

In [112]:
articles

In [101]:
# convert to dataframe
df = pd.DataFrame(articles)
df.head()

Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,url,title,body,source,authors,image,eventUri,sentiment,wgt,relevance
0,7536641921,eng,True,2023-05-10,01:34:00,2023-05-10T01:34:00Z,2023-05-10T00:01:00Z,news,0.0,https://www.devdiscourse.com/article/politics/...,Treasury debt advisers warn of 'seismic' impac...,Wall Street executives who have advised the U....,"{'uri': 'devdiscourse.com', 'dataType': 'news'...",[],https://www.devdiscourse.com/remote.axd?https:...,,-0.003922,421378440,1
1,7536605774,eng,False,2023-05-10,01:34:00,2023-05-10T01:34:00Z,2023-05-09T23:13:00Z,news,0.0,https://100percentfedup.com/northwestern-unive...,Northwestern University Student Government Pul...,Northwestern University's student government h...,"{'uri': '100percentfedup.com', 'dataType': 'ne...","[{'uri': 'amber_crawford@100percentfedup.com',...",https://100percentfedup.com/wp-content/uploads...,,-0.294118,421378440,1
2,7536594447,eng,False,2023-05-10,01:15:00,2023-05-10T01:15:00Z,2023-05-09T23:05:00Z,news,0.0,https://www.haaretz.com/opinion/editorial/2023...,Editorial | Gaza assassinations were all about...,"A week ago, administrative detainee Khader Adn...","{'uri': 'haaretz.com', 'dataType': 'news', 'ti...","[{'uri': 'haaretz_editorial@haaretz.com', 'nam...",https://img.haarets.co.il/bs/00000188-006b-dc7...,,-0.466667,421377300,1
3,7535165867,eng,False,2023-05-10,01:12:00,2023-05-10T01:12:00Z,2023-05-09T01:40:00Z,news,0.0,https://www.chinadaily.com.cn/a/202305/09/WS64...,US politics no more normal: Washington Post,NEW YORK -- There's a powerful case that at th...,"{'uri': 'chinadaily.com.cn', 'dataType': 'news...",[],http://img2.chinadaily.com.cn/images/202305/09...,,-0.2,421377120,1
4,7536617569,eng,False,2023-05-10,01:06:00,2023-05-10T01:06:00Z,2023-05-09T23:38:00Z,news,0.0,https://nampa.org/index.php?model=categories&f...,US-politics-economy-debt-budget-Biden-diplomacy,URGENT Biden says may call off Asia trip if no...,"{'uri': 'nampa.org', 'dataType': 'news', 'titl...",[],,,-0.145098,421376760,1


In [102]:
if 'source' in df.columns:
    df['source'] = df['source'].apply(lambda x: x['uri'])


if 'authors' in df.columns:
    df['author'] = df['authors'].apply(lambda x: x[0]['name'] if len(x) == 1 else None)
    df['author_link'] = df['authors'].apply(lambda x: x[0]['uri'] if len(x) == 1 else None)
    df.drop(columns=['authors'], inplace=True)
# clean the title column
df['title'] = df['title'].apply(clean)
df.head()

Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,url,title,body,source,image,eventUri,sentiment,wgt,relevance,author,author_link
0,7536641921,eng,True,2023-05-10,01:34:00,2023-05-10T01:34:00Z,2023-05-10T00:01:00Z,news,0.0,https://www.devdiscourse.com/article/politics/...,Treasury debt advisers warn of seismic impact ...,Wall Street executives who have advised the U....,devdiscourse.com,https://www.devdiscourse.com/remote.axd?https:...,,-0.003922,421378440,1,,
1,7536605774,eng,False,2023-05-10,01:34:00,2023-05-10T01:34:00Z,2023-05-09T23:13:00Z,news,0.0,https://100percentfedup.com/northwestern-unive...,Northwestern University Student Government Pul...,Northwestern University's student government h...,100percentfedup.com,https://100percentfedup.com/wp-content/uploads...,,-0.294118,421378440,1,Amber Crawford,amber_crawford@100percentfedup.com
2,7536594447,eng,False,2023-05-10,01:15:00,2023-05-10T01:15:00Z,2023-05-09T23:05:00Z,news,0.0,https://www.haaretz.com/opinion/editorial/2023...,Editorial Gaza assassinations were all about ...,"A week ago, administrative detainee Khader Adn...",haaretz.com,https://img.haarets.co.il/bs/00000188-006b-dc7...,,-0.466667,421377300,1,Haaretz Editorial,haaretz_editorial@haaretz.com
3,7535165867,eng,False,2023-05-10,01:12:00,2023-05-10T01:12:00Z,2023-05-09T01:40:00Z,news,0.0,https://www.chinadaily.com.cn/a/202305/09/WS64...,US politics no more normal Washington Post,NEW YORK -- There's a powerful case that at th...,chinadaily.com.cn,http://img2.chinadaily.com.cn/images/202305/09...,,-0.2,421377120,1,,
4,7536617569,eng,False,2023-05-10,01:06:00,2023-05-10T01:06:00Z,2023-05-09T23:38:00Z,news,0.0,https://nampa.org/index.php?model=categories&f...,USpoliticseconomydebtbudgetBidendiplomacy,URGENT Biden says may call off Asia trip if no...,nampa.org,,,-0.145098,421376760,1,,


In [100]:
#

1