# Parsing financial news headlines from finviz and News API

In [None]:
import pendulum

In [None]:
s = "AAPL"
date = pendulum.yesterday(tz="UTC")
start_date = date # start date is included
end_date = (date + pendulum.duration(days=1)) # end date is excluded

## finviz

In [None]:
import finviz
import numpy as np
import pandas as pd

In [None]:
news_fv = finviz.get_news(s)
news_fv_df = pd.DataFrame(news_fv)
news_fv_df.columns = ["Datetime", "Title", "URL", "Source"]
news_fv_df.insert(0, "Symbol", s)
news_fv_df.insert(3, "Description", pd.NA)
news_fv_df.insert(5, "Author", pd.NA)
news_fv_df = news_fv_df[["Symbol", "Datetime", "Title", "Description", "Source", "Author","URL"]]
news_fv_df["Datetime"] = pd.to_datetime(news_fv_df["Datetime"])
news_fv_df["Datetime"] = news_fv_df["Datetime"].dt.tz_localize("US/Eastern")
news_fv_df["Datetime"] = news_fv_df["Datetime"].dt.tz_convert("UTC")
news_fv_df = news_fv_df.loc[(np.logical_and(start_date <= news_fv_df["Datetime"], news_fv_df["Datetime"] < end_date)), :]
news_fv_df.sort_values(by=["Datetime"], inplace=True, ascending=True)
news_fv_df.reset_index(inplace=True, drop=True)

In [None]:
news_fv_df.head()

In [None]:
news_fv_df.tail()

In [None]:
news_fv_df.dtypes

## News API

In [None]:
import os

from newsapi import NewsApiClient
from dotenv import load_dotenv

In [None]:
load_dotenv()
api = NewsApiClient(api_key=os.environ.get("NEWS_API_TOKEN"))

In [None]:
news_api = api.get_everything(q=s, from_param=start_date.to_date_string(),
                              to=(end_date - pendulum.duration(days=1)).to_date_string(), # to date is included in resp -> subtract one day
                              language="en")

In [None]:
news_api_list = []
for a in news_api["articles"]:
    source = a["source"]["name"]
    author = a["author"]
    title = a["title"]
    description = a["description"]
    url = a["url"]
    published = a["publishedAt"]
    news_api_list.append((s, published, title, description, source, author, url))
news_api_df = pd.DataFrame(news_api_list,
                           columns=["Symbol", "Datetime", "Title", "Description", "Source", "Author", "URL"])
news_api_df["Datetime"] = pd.to_datetime(news_api_df["Datetime"])
news_api_df.sort_values(by=["Datetime"], inplace=True, ascending=True)
news_api_df.reset_index(inplace=True, drop=True)

In [None]:
news_api_df.head()

In [None]:
news_api_df.tail()

In [None]:
news_api_df.dtypes == news_fv_df.dtypes