In [1]:
import pandas as pd
import numpy as np

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

pd.set_option('display.max_rows', 10000)

In [2]:
from datetime import datetime
from functools import reduce

def news_dataframe(org):
    org_news = pd.read_json("../data/raw/news/{}_historical.json".format(org))
    headline_arr = []
    date_arr = []
    summary_arr = []
    
    for month in range(org_news["headline"].count()):
        for headline in org_news["headline"][month]:
            headline_arr.append(headline)
        for date in org_news["date"][month]:
            date_arr.append(date)
        if "summary" in org_news:
            for summary in org_news["summary"][month]:
                summary_arr.append(summary)
    if summary_arr:
        org_df = pd.DataFrame({"Time": date_arr, "Headline": headline_arr, "Summary": summary_arr})
    else:
        org_df = pd.DataFrame({"Time": date_arr, "Headline": headline_arr,})
    org_df["Time"] = org_df["Time"].transform(lambda time : datetime.utcfromtimestamp(time).strftime("%Y-%m-%d %H:%M:%S"))
    org_df.sort_values(by=["Time"], inplace=True)
    org_df = org_df.reset_index(drop=True)
    return org_df

def news_merge(news):
    if len(news) == 0:
        return
    elif len(news) == 1:
        return news[0]
    else:
        merged_news = reduce(lambda left, right : pd.merge(left, right, how="outer", on=["Time", "Headline"]), news)     
        merged_news.sort_values(by=["Time"], inplace=True)
        merged_news = merged_news.reset_index(drop=True)
        return merged_news

def generate_sentiment_score(news):
    sid = SentimentIntensityAnalyzer()
    score = []
    for headline, summary in zip(news.Headline, news.Summary):
        if pd.isna(summary):
            text = headline
            score.append(sid.polarity_scores(text)["compound"])
        else:
            text = headline + summary
            score.append(sid.polarity_scores(text)["compound"])
    news_score = pd.DataFrame({"News_Sentiment": score})
    news["News_Sentiment"] = news_score
    return news
        
reuters = news_dataframe("reuters")
daily_fx = news_dataframe("dailyfx")
forex_live = news_dataframe("forexlive")
news = news_merge([reuters, daily_fx, forex_live])
news = generate_sentiment_score(news)

In [3]:
news

Unnamed: 0,Time,Headline,Summary,News_Sentiment
0,2018-01-02 02:23:00,Australian Dollar Steady Despite China PMI Bea...,,0.0000
1,2018-01-02 04:10:00,ASX 200 Technical Analyis: Escape Velocity Fro...,,0.1779
2,2018-01-02 06:47:00,"Asian Stocks Mostly Higher, US Dollar Mired Ag...",,0.0000
3,2018-01-02 09:03:00,Will The Bank of England Offer a Bitcoin-Style...,,0.0000
4,2018-01-02 09:55:00,GBP Falls as UK Manufacturing Misses Lofty Exp...,,-0.2263
...,...,...,...,...
23953,2020-12-31 13:24:00,Rand Dollar Forecast: USD/ZAR Dances Sideways ...,,0.0000
23954,2020-12-31 16:00:00,US Dollar 1Q 2021 Forecast: Safe Haven Status ...,,0.6705
23955,2020-12-31 17:23:00,EUR/GBP IG Client Sentiment: Our data shows tr...,,0.0000
23956,2020-12-31 18:00:00,Euro Technical Forecast 1Q 2021: EUR/USD Pullb...,,0.0000


In [4]:
def currency_sentiment(currencies_dict):
    country_df = pd.DataFrame()
    for currency in currencies_dict:
        for entity in currencies_dict[currency]["positive"]:
            news_lower = news["Headline"].transform(lambda headline : headline.lower())
            currency_df = news[news_lower.str.contains(entity)]
            currency_df = currency_df[{"Time", "News_Sentiment"}]
            currency_df = currency_df.rename(columns={"News_Sentiment": currency.upper()})
            if country_df.empty:
                country_df = currency_df
            elif not currency.upper() in country_df.columns:
                country_df = country_df.merge(currency_df, how="outer", on="Time")
            else:
                country_df = country_df.merge(currency_df, how="outer", on=["Time", currency.upper()])
            print(currency, currency_df["Time"].count())
        for entity in currencies_dict[currency]["negative"]:
            news_lower = news["Headline"].transform(lambda headline : headline.lower())
            currency_df = news[news_lower.str.contains(entity)]
            currency_df = currency_df[{"Time", "News_Sentiment"}]
            if not currency_df["News_Sentiment"].empty:
                currency_df["News_Sentiment"] = currency_df["News_Sentiment"].transform(lambda score : -score)
            currency_df = currency_df.rename(columns={"News_Sentiment": currency.upper()})
            if country_df.empty:
                country_df = currency_df
            elif not currency.upper() in country_df.columns:
                country_df = country_df.merge(currency_df, how="outer", on="Time")
            else:
                country_df = country_df.merge(currency_df, how="outer", on=["Time", currency.upper()])
            print(currency, currency_df["Time"].count())
                
    time_frame = pd.date_range(start="2018-01-01 22:00:00", freq="1T", end="2020-12-31 21:59:00")
    time_frame = pd.DataFrame(time_frame, columns=["Time"])
    time_frame["Time"] = time_frame["Time"].dt.strftime("%Y-%m-%d %H:%M")
    
    country_df = time_frame.merge(country_df, how="outer", on="Time")
    country_df = country_df.fillna(0)
    
    return country_df

currencies = {
              "usd": {"positive": ["usd/", "u.s.", "greenback", "buck", "barnie", "america", "united states"], "negative": ["/usd", "cable"]},
              "aud": {"positive": ["aud/", "gold", "aussie", "australia"], "negative": ["/aud"]}, 
              "gbp": {"positive": ["gbp/", "sterling", "pound", "u.k.", "united kingdom", "cable", "guppy"], "negative": ["/gbp"]},
              "nzd": {"positive": ["nzd/", "gold", "kiwi", "new zealand"], "negative": ["/nzd"]},
              "cad": {"positive": ["cad/", "oil", "loonie", "canada"], "negative": ["/cad"]},
              "chf": {"positive": ["chf/", "swiss"], "negative": ["/chf"]},
              "jpy": {"positive": ["jpy/", "asian", "japan"], "negative": ["/jpy", "guppy"]},
              "eur": {"positive": ["eur/", "fiber", "euro"], "negative": ["/eur"]}
             }
currency_sentiment = currency_sentiment(currencies)

usd 2121
usd 2097
usd 6
usd 9
usd 16
usd 8
usd 0
usd 4127
usd 80
aud 1225
aud 2490
aud 232
aud 768
aud 66
gbp 1280
gbp 1204
gbp 1010
gbp 245
gbp 0
gbp 80
gbp 1
gbp 268
nzd 519
nzd 2490
nzd 76
nzd 203
nzd 131
cad 80
cad 1913
cad 97
cad 130
cad 986
chf 10
chf 101
chf 194
jpy 0
jpy 289
jpy 494
jpy 1234
jpy 1
eur 2184
eur 0
eur 1810
eur 0


In [9]:
np.count_nonzero(currency_sentiment["EUR"])

2299

In [6]:
currency_sentiment.to_csv("../data/processed/news/news_sentiment.csv")

In [7]:
count = 0
for headline in news["Headline"]:
    if "" in headline.lower():
        count += 1
print(count)

23958
