## An example of using the utilities library in order to create a training dataset for the final classifier.

In [8]:
# The Hugging Face transformers library. Neccessary to use FinBERT

# EXECUTE ONCE, IN ORDER TO INSTALL transformers LIBRARY
#!pip install transformers
!pip install yfinance
!pip install pandas_datareader

Collecting pandas_datareader
  Using cached pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
Installing collected packages: pandas-datareader
Successfully installed pandas-datareader-0.10.0


In [10]:
import pandas as pd 
# Our library
from utilities import financial_dataset, read_rph, merge_fin_news, sentim_analyzer

In [27]:
apple_headlines = read_rph('AAPL')
apple_headlines

Read 32 headlines from raw_partner_headlines.csv, regarding AAPL stock


Unnamed: 0,headline,date,stock
4067,American Pie,2020-06-02,AAPL
4068,Tech Giants Dare Antitrust Deal Watchdogs,2020-06-02,AAPL
4069,MoneyGram Shares Jump 50% As Western Union Rep...,2020-06-02,AAPL
4070,All Eyes on Market Volatility,2020-06-01,AAPL
4071,Warren Buffett's Berkshire Hathaway Turns Up S...,2020-06-01,AAPL
4072,COVID 2.0?,2020-06-01,AAPL
4073,Apple CEO Writes To Employees About George Flo...,2020-05-31,AAPL
4074,First the Deflationary Deluge of Assets Crashi...,2020-05-31,AAPL
4075,"Market Briefing For Monday, June 1",2020-05-31,AAPL
4076,Five Monster Stock Market Predictions For The ...,2020-05-31,AAPL


In [14]:
aapl_fin = financial_dataset('AAPL', 0.25)
aapl_fin

[*********************100%***********************]  1 of 1 completed
AAPL dataframe dimensions  (2770, 6)
Positive changes : 1248
Negative changes : 1066
No changes : 456


Unnamed: 0_level_0,Open,Close,Volume,Price_change,date
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-12-31,7.611786,7.526071,352410800,0,0
2010-01-04,7.622500,7.643214,493729600,1,2010-01-04
2010-01-05,7.664286,7.656429,601904800,0,2010-01-05
2010-01-06,7.656429,7.534643,552160000,-1,2010-01-06
2010-01-07,7.562500,7.520714,477131200,0,2010-01-07
...,...,...,...,...,...
2020-12-24,131.320007,131.970001,54930100,1,2020-12-24
2020-12-28,133.990005,136.690002,124486200,1,2020-12-28
2020-12-29,138.050003,134.869995,121047300,-1,2020-12-29
2020-12-30,135.580002,133.720001,96452100,-1,2020-12-30


### Headlines published during the weekend are lost during merge of financial and news dataframes.

In [15]:
merged_apple = merge_fin_news(aapl_fin, apple_headlines)
merged_apple

Unnamed: 0,date,stock,Open,Close,Volume,headline,Price_change
0,2020-05-27,AAPL,79.035004,79.527496,112945200,The Topsy-Turvy S&P 500 Earnings Picture,1
1,2020-05-28,AAPL,79.192497,79.5625,133560800,The Momentum Trade Driving Stocks Higher May B...,0
2,2020-05-28,AAPL,79.192497,79.5625,133560800,Tweedy Browne's —…—… Annual Letter to Sharehol...,0
3,2020-05-28,AAPL,79.192497,79.5625,133560800,Kroger: Capital Preservation and More at a Goo...,0
4,2020-05-28,AAPL,79.192497,79.5625,133560800,"Tredje AP-fonden Buys Microsoft Corp, Amazon.c...",0
5,2020-05-28,AAPL,79.192497,79.5625,133560800,"The Zacks Analyst Blog Highlights: Apple, Exxo...",0
6,2020-05-28,AAPL,79.192497,79.5625,133560800,Growth Stocks for 2020: Trading Tech Stocks + ...,0
7,2020-05-28,AAPL,79.192497,79.5625,133560800,Stocks Are Struggling To Post Gains On May 28 ...,0
8,2020-05-28,AAPL,79.192497,79.5625,133560800,Swiss National Bank Ready To Buy Much More Tec...,0
9,2020-05-29,AAPL,79.8125,79.485001,153532400,DXC Technology (DXC) Q4 Earnings Top Estimates...,0


#### Initialize transformer model for sentiment analysis

In [25]:
#!pip install transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")



### Conduct the sentiment analysis (transformer inference) on the merged dataframe and get the sentiment scores. (Feature Engineering)

In [26]:
train_apple_df = sentim_analyzer(merged_apple, tokenizer, model)
train_apple_df

Unnamed: 0,date,stock,Open,Close,Volume,headline,Positive,Negative,Neutral,Price_change
0,2020-05-27,AAPL,79.035004,79.527496,112945200,The Topsy-Turvy S&P 500 Earnings Picture,0.093772,0.389471,0.516757,1
1,2020-05-28,AAPL,79.192497,79.5625,133560800,The Momentum Trade Driving Stocks Higher May B...,0.45419,0.31093,0.23488,0
2,2020-05-28,AAPL,79.192497,79.5625,133560800,Tweedy Browne's —…—… Annual Letter to Sharehol...,0.124023,0.033853,0.842124,0
3,2020-05-28,AAPL,79.192497,79.5625,133560800,Kroger: Capital Preservation and More at a Goo...,0.784677,0.00824,0.207083,0
4,2020-05-28,AAPL,79.192497,79.5625,133560800,"Tredje AP-fonden Buys Microsoft Corp, Amazon.c...",0.088424,0.011014,0.900561,0
5,2020-05-28,AAPL,79.192497,79.5625,133560800,"The Zacks Analyst Blog Highlights: Apple, Exxo...",0.028047,0.029387,0.942566,0
6,2020-05-28,AAPL,79.192497,79.5625,133560800,Growth Stocks for 2020: Trading Tech Stocks + ...,0.040885,0.019898,0.939216,0
7,2020-05-28,AAPL,79.192497,79.5625,133560800,Stocks Are Struggling To Post Gains On May 28 ...,0.012209,0.970809,0.016982,0
8,2020-05-28,AAPL,79.192497,79.5625,133560800,Swiss National Bank Ready To Buy Much More Tec...,0.671759,0.180035,0.148206,0
9,2020-05-29,AAPL,79.8125,79.485001,153532400,DXC Technology (DXC) Q4 Earnings Top Estimates...,0.045347,0.035089,0.919563,0


In [23]:
train_apple_df.to_csv('Financial_News/train_apple.csv')

In [28]:
train_apple_df['date'].unique()

array(['2020-05-27', '2020-05-28', '2020-05-29', '2020-06-01',
       '2020-06-02'], dtype=object)