In [1]:
#Import needed modules
import os
import sys

from transformers import pipeline, AutoTokenizer
from dotenv import load_dotenv
from newsapi import NewsApiClient


In [2]:
#Load Environment Variables
load_dotenv()


#Get API Key
news_api_key = os.environ.get('NewsAPIKey')

print(news_api_key)

cb6c8cf55b814822b32f5b3ca6cc9fc3


In [15]:
#Initialize News API
newsapi = NewsApiClient(api_key=news_api_key)

#Get News Headlines
top_headlines = newsapi.get_top_headlines(language='en', country='us')

#Get News Articles
articles = top_headlines['articles']

#Get Article Titles
titles = [article['title'] for article in articles]

#Get Article Descriptions
descriptions = [article['description'] for article in articles]

#Get Article URLs
urls = [article['url'] for article in articles]

#Get Article Content
content = [article['content'] for article in articles]

#Get Article Sources
sources = [article['source']['name'] for article in articles]

#Get Article Published Dates
published_dates = [article['publishedAt'] for article in articles]

#Get Article Authors
authors = [article['author'] for article in articles]

#Get Article Images
images = [article['urlToImage'] for article in articles]

#Get Article Sentiments
nlp = pipeline('sentiment-analysis')
sentiments = [nlp(article['title'])[0] for article in articles]

#Get Article Keywords
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
keywords = [tokenizer.tokenize(article['title']) for article in articles]

#Print Article Titles as a formatted list with numbers and content
print("Articles:")
for i in range(len(titles)):
    print(f"{i+1}. {titles[i]}")
    print(f"Content: {content[i]}")
    print(f"Sentiment: {sentiments[i]}")
    print(f"Keywords: {keywords[i]}")
    print(f"Description: {descriptions[i]}")
    print(f"URL: {urls[i]}")
    print(f"Source: {sources[i]}")
    print(f"Published Date: {published_dates[i]}")
    print(f"Author: {authors[i]}")
    print(f"Image: {images[i]}")
    print("\n")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Articles:
1. Mayorkas impeachment trial delayed until next week - The Washington Post
Content: House Speaker Mike Johnson (R-La.) delayed until Monday the impeachment trial of the first sitting Cabinet member in history, responding to the concern of some conservatives worried about launching a… [+9680 chars]
Sentiment: {'label': 'NEGATIVE', 'score': 0.9400644898414612}
Keywords: ['mayor', '##kas', 'imp', '##ea', '##chment', 'trial', 'delayed', 'until', 'next', 'week', '-', 'the', 'washington', 'post']
Description: Conservative senators balked at starting an impeachment trial on the day the upper chamber planned to leave town.
URL: https://www.washingtonpost.com/nation/2024/04/09/mayorkas-impeachment-senate-trial-immigration-republicans/
Source: The Washington Post
Published Date: 2024-04-10T00:34:43Z
Author: Jacqueline Alemany, Leigh Ann Caldwell, Liz Goodwin
Image: https://www.washingtonpost.com/wp-apps/imrs.php?src=https://arc-anglerfish-washpost-prod-washpost.s3.amazonaws.com/public

In [21]:
#Get Japanese News Headlines
top_headlines = newsapi.get_top_headlines( country='jp')

#Get Spanish News Articles
articles = top_headlines['articles']

#Get Spanish Article Titles
titles = [article['title'] for article in articles]

#Get Spanish Article Descriptions
descriptions = [article['description'] for article in articles]

#Get Spanish Article URLs
urls = [article['url'] for article in articles]

#Get Spanish Article Content
content = [article['content'] for article in articles]

#Get Spanish Article Sources
sources = [article['source']['name'] for article in articles]

#Get Spanish Article Published Dates
published_dates = [article['publishedAt'] for article in articles]

#Get Spanish Article Authors
authors = [article['author'] for article in articles]

#Get Spanish Article Images
images = [article['urlToImage'] for article in articles]

#Get Spanish Article Sentiments
nlp = pipeline('sentiment-analysis')
sentiments = [nlp(article['title'])[0] for article in articles]

#Get Spanish Article Keywords
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
keywords = [tokenizer.tokenize(article['title']) for article in articles]

#Print Spanish Article Titles as a formatted list with numbers and content
print("Japanese Articles:")
for i in range(len(titles)):
    print(f"{i+1}. {titles[i]}")
    print(f"Content: {content[i]}")
    print(f"Sentiment: {sentiments[i]}")
    print(f"Keywords: {keywords[i]}")
    print(f"Description: {descriptions[i]}")
    print(f"URL: {urls[i]}")
    print(f"Source: {sources[i]}")
    print(f"Published Date: {published_dates[i]}")
    print(f"Author: {authors[i]}")
    print(f"Image: {images[i]}")
    print("\n")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Spanish Articles:
1. なでしこジャパン、ブラジルにPK戦で敗戦…アメリカ遠征は勝利なしで終了 - Goal.com
Content: None
Sentiment: {'label': 'NEGATIVE', 'score': 0.9071683287620544}
Keywords: ['な', '##て', '##し', '##こ', '##シ', '##ャ', '##ハ', '##ン', '、', 'フ', '##ラ', '##シ', '##ル', '##に', '##p', '##k', '戦', 'て', '[UNK]', '戦', '…', 'ア', '##メ', '##リ', '##カ', '[UNK]', '[UNK]', 'は', '勝', '[UNK]', 'な', '##し', '##て', '[UNK]', '[UNK]', '-', 'goal', '.', 'com']
Description: None
URL: https://www.sportingnews.com/ca/soccer
Source: Sporting News
Published Date: 2024-04-09T22:17:00Z
Author: None
Image: None


