In [65]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import timedelta

def fetch_news():

    headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0'}

    u = 'https://www.forexfactory.com/news/'

    session = requests.Session()
    r = session.get(u, timeout=30, headers=headers)

    soup = BeautifulSoup(r.content, 'html.parser')

    news_high = []

    # High impacts
    for news_item in soup.select('.flexposts__item.flexposts__story.flexposts__story--high'):
        time = news_item.select_one('.flexposts__nowrap.flexposts__time').text if news_item.select_one('.flexposts__nowrap.flexposts__time') else '' 
        title = news_item.select_one('.flexposts__story-title').text if news_item.select_one('.flexposts__story-title') else ''  
        impact = 'high'
        
        # Find the 'a' tag with the specific class attributes within this news_item
        a_tag = news_item.find('a', {'href': True})
        
        # Extract the 'title' attribute from the 'a' tag if it exists
        href = a_tag.get('href') if a_tag and a_tag.has_attr('href') else ''

        url_news = 'https://www.forexfactory.com/' + str(href)

        n = session.get(url_news, timeout=30, headers=headers)
        soup_news = BeautifulSoup(n.content, 'html.parser')

        content = soup_news.select_one('.flexBox.noflex.news__story').text
        
        # Append all the information as a dictionary to news_low
        news_high.append({'Time': time, 'Title': title, 'Impact': impact, 'Content': content})

    df = pd.DataFrame(news_high)

    df['TimeDelta'] = df['Time'].apply(convert_to_timedelta)

    df = df.sort_values(by='TimeDelta', ascending=False)

    return df

def fetch_news_calendar():
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0'}
    u = 'https://www.forexfactory.com/calendar?day=sep15.2023'

    session = requests.Session()
    r = session.get(u, timeout=30, headers=headers)

    soup = BeautifulSoup(r.content, 'html.parser')

    events_cal = []

    for events in soup.select('.calendar__row'):
        time_elem = events.select_one('.calendar__cell.calendar__time')
        currency_elem = events.select_one('.calendar__cell.calendar__currency')
        title_elem = events.select_one('.calendar__cell.calendar__event')
        actual_elem = events.select_one('.calendar__cell.calendar__actual')
        forecast_elem = events.select_one('.calendar__cell.calendar__forecast')
        previous_elem = events.select_one('.calendar__cell.calendar__previous')
        impact_elem = events.select_one('.calendar__cell.calendar__impact')
        impact_class = impact_elem.find('span').get('class') if impact_elem and impact_elem.find('span') else ''
        if impact_class == ['icon', 'icon--ff-impact-red']:
            impact = 'high'
        else:
            impact = ''

        time = time_elem.text if time_elem else ''
        currency = currency_elem.text if currency_elem else ''
        title = title_elem.text if title_elem else ''
        actual = actual_elem.text if actual_elem else ''
        forecast = forecast_elem.text if forecast_elem else ''
        previous = previous_elem.text if previous_elem else ''

        events_cal.append({'Time': time,
                           'Title': title,
                           'Currency': currency,
                           'Impact': impact,
                           'Actual': actual,
                           'Previous': previous,
                           'Forecast': forecast})

    df = pd.DataFrame(events_cal)
    df = df[df['Impact']=='high']
    
    return df

def convert_to_timedelta(s):
    num, unit = s.split()[:-1]
    num = int(num)
    if unit == 'hr':
        return timedelta(hours=num)
    elif unit == 'min':
        return timedelta(minutes=num)

In [66]:
SYMBOLS = ['USD', 'EUR', 'GBP']


def NewsSignal(symbol):
    cal = fetch_news_calendar()
    cal.reset_index(drop=True, inplace=True)
    
    if symbol == 'EURUSD':
        currency_chosen = 'EUR'
        cal = cal[(cal['Currency'] == 'EUR') | (cal['Currency'] == 'USD')]
    elif symbol == 'GBPUSD':
        currency_chosen = 'GBP'
        cal = cal[(cal['Currency'] == 'GBP') | (cal['Currency'] == 'USD')]
    else:
        return

    signal = []
    
    for i, row in cal.iterrows():
        curr_signal = 0
        if row['Actual'] != '':
            actual = float(row['Actual'][:len(row['Actual']) - 1])
            forecast = float(row['Forecast'][:len(row['Forecast']) - 1])
            
            if row['Title'] not in ['Unemployment Claims', 'Unemployment Rate']:
                curr_signal = 1 if actual > forecast else -1 if actual < forecast else 0
            else:
                curr_signal = -1 if actual > forecast else 1 if actual < forecast else 0
            
            if row['Currency'] != currency_chosen:
                curr_signal *= -1
            
        signal.append(curr_signal)

    cal['signal'] = signal

    return cal

In [67]:
df=NewsSignal('EURUSD')

In [68]:
df

Unnamed: 0,Time,Title,Currency,Impact,Actual,Previous,Forecast,signal
1,,Empire State Manufacturing Index,USD,high,1.9,-19.0,-9.9,-1
2,4:00pm,Prelim UoM Consumer Sentiment,USD,high,67.7,69.5,69.0,1


In [215]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch


def SentimentAnalysis(df):
    tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finbert')
    model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

    financial_news=list(df['Content'])
    headlines_list=list(df['Title'])

    inputs = tokenizer(financial_news, padding = True, truncation = True, return_tensors='pt')
    outputs = model(**inputs)

    predictions = torch.nn.functional.softmax(outputs.logits, dim=1)

    positive = predictions[:, 0].tolist()
    negative = predictions[:, 1].tolist()
    neutral = predictions[:, 2].tolist()

    table = {'Headline':headlines_list,
            "Positive":positive,
            "Negative":negative, 
            "Neutral":neutral, }
        
    df = pd.DataFrame(table, columns = ["Headline", "Positive", "Negative", "Neutral", "Content"])

    return df

In [216]:
df=SentimentAnalysis(df_news)

In [214]:
df

Unnamed: 0,Headline,Positive,Negative,Neutral,Content
0,Monetary policy decisions,0.171657,0.086211,0.742132,Monetary policy decisions From ecb.euro...
1,ECB’s Lagarde: Economy Likely to Re...,0.087447,0.838456,0.074097,ECB’s Lagarde: Economy Likely to Remain S...
2,ECB’s Lagarde: ECB Didn’t Discuss D...,0.055684,0.076623,0.867693,ECB’s Lagarde: ECB Didn’t Discuss Duratio...
3,ECB’s Lagarde: Some Governors Would...,0.121846,0.069325,0.808829,ECB’s Lagarde: Some Governors Would Have ...
4,ECB’s Lagarde: Economy Likely to Re...,0.087447,0.838456,0.074097,ECB’s Lagarde: Economy Likely to Remain S...
5,August retail sales pick up steam a...,0.910577,0.065059,0.024364,August retail sales pick up steam as cons...
