In [1]:
import pandas as pd
pd.options.display.max_colwidth = 150
import numpy as np
import regex as re
from transformers import PegasusTokenizer, TFPegasusForConditionalGeneration, BertTokenizer, TFBertModel, AutoTokenizer,AutoModelForSequenceClassification, BertForSequenceClassification,pipeline
import pytextrank
import spacy
#!python -m spacy download en_core_web_trf

# Warnings
import warnings
warnings.simplefilter(action='ignore')
from transformers import logging as hf_logging
hf_logging.set_verbosity_error()

In [2]:
df = pd.read_csv('data/articles_clean.csv')
df.head(3)

Unnamed: 0,date,ticker,link,articles
0,2023-03-29,aapl,https://finance.yahoo.com//news/binance-investors-withdraw-enforcement-action-us-regulators-113316406.html,"Investors have pulled out $2bn (£1.62bn) from Binance over the past week, according to crypto analytics firm Nansen. ""The pace of withdrawals is h..."
1,2023-03-29,aapl,https://finance.yahoo.com//news/apple-inc-nasdaq-aapl-intrinsic-110123495.html,"Using the 2 Stage Free Cash Flow to Equity, Apple fair value estimate is US$120 Apple is estimated to be 31% overvalued based on current share pri..."
2,2023-03-29,aapl,https://finance.yahoo.com//news/apple-pay-later-affirm-klarna-091700811.html,"Apple is finally ready to hand out loans to its users, nine months since first teasing the service. Apple Pay Later, which Apple introduced in a l..."


### Spacy

In [3]:
def apply_spacy(text):
    nlp = spacy.load('en_core_web_trf')
    
    nlp.add_pipe('textrank')
    doc = nlp(text)
    
    spacy_summary = ' '.join([str(sentence) for sentence in doc._.textrank.summary(limit_sentences=8)])
    return spacy_summary

df['spacy_summary'] = df.articles.apply(apply_spacy)
df.to_csv('data/articles_summary.csv',index=False)

In [2]:
df = pd.read_csv('data/articles_summary.csv')

### Pegasus one line summary

In [3]:
def pegasus_summary(text):
    
    # Loading model
    model_name = "human-centered-summarization/financial-summarization-pegasus"
    tokenizer = PegasusTokenizer.from_pretrained(model_name)
    model = TFPegasusForConditionalGeneration.from_pretrained(model_name)
    
    try:
        input_ids = tokenizer(text, return_tensors = 'tf').input_ids
        output = model.generate(
            input_ids,
            max_length=35,
            num_beams=7,
            early_stopping=True
        )
        summarized = tokenizer.decode(output[0], skip_special_tokens=True)
        return summarized
    
    except:
        # Dropping articles with too many tokens. 
        return np.nan

df['pegasus_one_line'] = df.spacy_summary.apply(pegasus_summary)

df.dropna(inplace=True)
df.to_csv('data/articles_short_summary.csv',index=False)

In [4]:
df = pd.read_csv('data/articles_short_summary.csv')
df

Unnamed: 0,date,ticker,link,articles,spacy_summary,pegasus_one_line
0,2023-03-29,aapl,https://finance.yahoo.com//news/binance-investors-withdraw-enforcement-action-us-regulators-113316406.html,"Investors have pulled out $2bn (£1.62bn) from Binance over the past week, according to crypto analytics firm Nansen. ""The pace of withdrawals is h...","According to digital asset data provider Kaiko, the level of market dominance that Binance holds over the crypto industry surpasses that of Apple ...",Nansen overtakes Samsung as largest crypto data provider. Publicly disclosed unregistered cryptocurrency assets seen increasing.
1,2023-03-29,aapl,https://finance.yahoo.com//news/apple-inc-nasdaq-aapl-intrinsic-110123495.html,"Using the 2 Stage Free Cash Flow to Equity, Apple fair value estimate is US$120 Apple is estimated to be 31% overvalued based on current share pri...","We assume companies with shrinking free cash flow will slow their rate of shrinkage, and that companies with growing free cash flow will see their...",Apple has a 2 stage free cash flow value of US$1.9t.
2,2023-03-29,aapl,https://finance.yahoo.com//news/apple-pay-later-affirm-klarna-091700811.html,"Apple is finally ready to hand out loans to its users, nine months since first teasing the service. Apple Pay Later, which Apple introduced in a l...","The so-called Buy Now Pay Later (BNPL) service allows users to “easily track, manage, and repay their Apple Pay Later loans in one convenient loca...","Users can apply for loans of up to $1,000. Apple Pay Later service is available in the U.S."
3,2023-03-28,aapl,https://finance.yahoo.com//news/apple-gangnam-welcome-first-customers-230000658.html,"Apple celebrates the dynamic Gangnam District with a special Today at Apple session featuring K-pop group NewJeans SEOUL, South Korea, March 28, 2...","Apple, the Apple logo, Today at Apple, iPhone, Apple Pay, GarageBand, Mac, Apple Music, Apple TV, and Apple Store are trademarks of Apple. Apple’s...","Gangnam District in Seoul, South Korea to host Apple TV, Apple Music Store."
4,2023-03-28,aapl,https://finance.yahoo.com//news/apple-starts-roll-pay-later-214437026.html,"(Bloomberg) -- Apple Inc. began rolling out its first “buy now, pay later”-style service after a lengthy delay, entering a field currently dominat...","The iPhone maker also rolled out Apple Music Classical, a new streaming service that works with Apple Music. (Bloomberg) -- Apple Inc. began rolli...","Users will be able to borrow up to $1,000 via the app. Music streaming service adds new features with Apple Music"
...,...,...,...,...,...,...
1059,2023-03-18,GC=F,https://finance.yahoo.com//news/president-petro-t-afford-ban-150000305.html,A contentious move by Colombia’s first leftist President Gustavo Petro to end hydrocarbon exploration in the strife-torn Latin American country ha...,"Oil spills and other incidents became common place, especially as leftist guerillas stepped up attacks on industry infrastructure, notably pipelin...",Colombia’s dependence on crude oil generates significant community dissent.
1060,2023-03-17,GC=F,https://finance.yahoo.com//news/10-best-copper-stocks-buy-214215168.html,"In this article, we discuss 10 best copper stocks to buy for 2023. If you want to see more stocks in this selection, check out 5 Best Copper Stock...","Referring to a shortage of resources and a significant requirement for copper in the renewable energy sector, Barratt told CNBC on March 2: “Our ...",CEO says ‘something to have to have in your portfolio’. Taseko’s primary focus is on copper concentrate production in Qba state
1061,2023-03-17,GC=F,https://finance.yahoo.com//news/global-markets-global-equities-fall-205500518.html,"(Updates prices throughout) * Wall Street, European stock indexes fall * First Republic Bank gets $30 bln injection, worries linger * Fed data sho...","Fed data on Thursday showed banks sought record amounts of emergency liquidity in recent days, which helped undo months of central bank effort to ...",Silicon Valley Bank lost confidence last Friday. First Republic gets $30 billion in emergency deposits
1062,2023-03-17,GC=F,https://finance.yahoo.com//news/global-markets-global-equities-fall-185128647.html,"(New throughout with updated prices, comment) * Wall Street, European stock indexes fall * First Republic Bank gets $30 bln injection, worries lin...","Fed data on Thursday showed banks sought record amounts of emergency liquidity in recent days, which helped undo months of central bank effort to ...",First Republic Bank gets $30 billion in deposits from Wall Street. U.S. consumer sentiment falls for first time in four months


# Applying FinBERT to get sentiment labels for each article

In [15]:
def sentiment(short_sum):
    
    finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
    tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')    
    nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)
    
    try:
        res = nlp(short_sum)    
    except:
        res = 'ERROR'
    return res

df['sentiment'] = df.pegasus_one_line.apply(sentiment)

def label(sentiment):
    
    if isinstance(sentiment, list) and len(sentiment) > 0:
        label_dict = sentiment[0]
        if 'Negative' in label_dict['label']:
            return -1
        elif 'Positive' in label_dict['label']:
            return 1
        elif 'Neutral' in label_dict['label']:
            return 0
        
df['sentiment'] = df.sentiment.apply(label)
df.to_csv('data/final_data.csv',index=False)

In [17]:
df

Unnamed: 0,date,ticker,link,articles,spacy_summary,pegasus_one_line,sentiment
0,2023-03-29,aapl,https://finance.yahoo.com//news/binance-investors-withdraw-enforcement-action-us-regulators-113316406.html,"Investors have pulled out $2bn (£1.62bn) from Binance over the past week, according to crypto analytics firm Nansen. ""The pace of withdrawals is h...","According to digital asset data provider Kaiko, the level of market dominance that Binance holds over the crypto industry surpasses that of Apple ...",Nansen overtakes Samsung as largest crypto data provider. Publicly disclosed unregistered cryptocurrency assets seen increasing.,1
1,2023-03-29,aapl,https://finance.yahoo.com//news/apple-inc-nasdaq-aapl-intrinsic-110123495.html,"Using the 2 Stage Free Cash Flow to Equity, Apple fair value estimate is US$120 Apple is estimated to be 31% overvalued based on current share pri...","We assume companies with shrinking free cash flow will slow their rate of shrinkage, and that companies with growing free cash flow will see their...",Apple has a 2 stage free cash flow value of US$1.9t.,0
2,2023-03-29,aapl,https://finance.yahoo.com//news/apple-pay-later-affirm-klarna-091700811.html,"Apple is finally ready to hand out loans to its users, nine months since first teasing the service. Apple Pay Later, which Apple introduced in a l...","The so-called Buy Now Pay Later (BNPL) service allows users to “easily track, manage, and repay their Apple Pay Later loans in one convenient loca...","Users can apply for loans of up to $1,000. Apple Pay Later service is available in the U.S.",0
3,2023-03-28,aapl,https://finance.yahoo.com//news/apple-gangnam-welcome-first-customers-230000658.html,"Apple celebrates the dynamic Gangnam District with a special Today at Apple session featuring K-pop group NewJeans SEOUL, South Korea, March 28, 2...","Apple, the Apple logo, Today at Apple, iPhone, Apple Pay, GarageBand, Mac, Apple Music, Apple TV, and Apple Store are trademarks of Apple. Apple’s...","Gangnam District in Seoul, South Korea to host Apple TV, Apple Music Store.",0
4,2023-03-28,aapl,https://finance.yahoo.com//news/apple-starts-roll-pay-later-214437026.html,"(Bloomberg) -- Apple Inc. began rolling out its first “buy now, pay later”-style service after a lengthy delay, entering a field currently dominat...","The iPhone maker also rolled out Apple Music Classical, a new streaming service that works with Apple Music. (Bloomberg) -- Apple Inc. began rolli...","Users will be able to borrow up to $1,000 via the app. Music streaming service adds new features with Apple Music",1
...,...,...,...,...,...,...,...
1059,2023-03-18,GC=F,https://finance.yahoo.com//news/president-petro-t-afford-ban-150000305.html,A contentious move by Colombia’s first leftist President Gustavo Petro to end hydrocarbon exploration in the strife-torn Latin American country ha...,"Oil spills and other incidents became common place, especially as leftist guerillas stepped up attacks on industry infrastructure, notably pipelin...",Colombia’s dependence on crude oil generates significant community dissent.,-1
1060,2023-03-17,GC=F,https://finance.yahoo.com//news/10-best-copper-stocks-buy-214215168.html,"In this article, we discuss 10 best copper stocks to buy for 2023. If you want to see more stocks in this selection, check out 5 Best Copper Stock...","Referring to a shortage of resources and a significant requirement for copper in the renewable energy sector, Barratt told CNBC on March 2: “Our ...",CEO says ‘something to have to have in your portfolio’. Taseko’s primary focus is on copper concentrate production in Qba state,0
1061,2023-03-17,GC=F,https://finance.yahoo.com//news/global-markets-global-equities-fall-205500518.html,"(Updates prices throughout) * Wall Street, European stock indexes fall * First Republic Bank gets $30 bln injection, worries linger * Fed data sho...","Fed data on Thursday showed banks sought record amounts of emergency liquidity in recent days, which helped undo months of central bank effort to ...",Silicon Valley Bank lost confidence last Friday. First Republic gets $30 billion in emergency deposits,-1
1062,2023-03-17,GC=F,https://finance.yahoo.com//news/global-markets-global-equities-fall-185128647.html,"(New throughout with updated prices, comment) * Wall Street, European stock indexes fall * First Republic Bank gets $30 bln injection, worries lin...","Fed data on Thursday showed banks sought record amounts of emergency liquidity in recent days, which helped undo months of central bank effort to ...",First Republic Bank gets $30 billion in deposits from Wall Street. U.S. consumer sentiment falls for first time in four months,-1


In [19]:
df.to_csv('data/final_data.csv',index=False)