In [1]:
import news # Module for getting news feeds, source code in news.py
import pandas as pd

from transformers import pipeline
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [17]:
# # Download vader_lexicon model
# import nltk
# nltk.download()

## Load News Feeds

In [18]:
# Get news feeds using Google RSS
news_feeds = news.GoogleNews('Apple')

Data successfully loaded.


In [19]:
# Articles found
len(news_feeds.feeds)

108

In [20]:
# Get 10 news titles
titles = news_feeds.get_titles()[:10]
titles

["Apple is changing the charging port with iPhone 15, and it won't be like the last time - 9to5Mac",
 'Why Apple Is Moving Your Health Information to the iPad - CNET',
 'Apple reportedly cut its Vision Pro headset first year production goals by more than 50% because it’s so complex for manufacturers to make - Fortune',
 'Apple Leak Details All-New iPhone 15, iPhone 15 Pro Design Changes - Forbes',
 'Apple will permanently delete photos in July — how to keep yours safe - New York Post ',
 "Apple Is Now Worth $3 Trillion. How Its Stock Could Rise Another 30%. - Barron's",
 'Apple Says Latest 13-Inch MacBook Air Now Supports Bluetooth 5.3 - MacRumors',
 "Intel prevented Apple from making a 15-inch MacBook Air: “It just did not say 'Air' to us” - 9to5Mac",
 'Best July 4th Apple deals: 15-inch M2 MacBook Air, AirPods, more - 9to5Mac',
 'Apple to ask US Supreme Court to undo App Store order in Epic Games case - Reuters.com']

## NLTK Vader model

In [21]:
# Run NLTK sentiment analysis socres on news titles
for title in titles:
    print(title)
    
    # negative score (0 ~ 1)
    # neutral score (0 ~ 1)
    # positive score (0 ~ 1)
    # overall compound score (-1 ~ 1)
    print(SentimentIntensityAnalyzer().polarity_scores(title))

Apple is changing the charging port with iPhone 15, and it won't be like the last time - 9to5Mac
{'neg': 0.11, 'neu': 0.89, 'pos': 0.0, 'compound': -0.2755}
Why Apple Is Moving Your Health Information to the iPad - CNET
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Apple reportedly cut its Vision Pro headset first year production goals by more than 50% because it’s so complex for manufacturers to make - Fortune
{'neg': 0.08, 'neu': 0.843, 'pos': 0.077, 'compound': -0.0258}
Apple Leak Details All-New iPhone 15, iPhone 15 Pro Design Changes - Forbes
{'neg': 0.179, 'neu': 0.821, 'pos': 0.0, 'compound': -0.34}
Apple will permanently delete photos in July — how to keep yours safe - New York Post 
{'neg': 0.0, 'neu': 0.828, 'pos': 0.172, 'compound': 0.4404}
Apple Is Now Worth $3 Trillion. How Its Stock Could Rise Another 30%. - Barron's
{'neg': 0.0, 'neu': 0.872, 'pos': 0.128, 'compound': 0.2263}
Apple Says Latest 13-Inch MacBook Air Now Supports Bluetooth 5.3 - MacRumors
{'neg': 0.0

Results does not look useful. Look for other model specialized in news analysis.

## Huggingface (Transformers) models

In [22]:
# Run huggingface (transformers) pipeline bertweet sentiment model
sentiment_pipeline = pipeline('sentiment-analysis', model='finiteautomata/bertweet-base-sentiment-analysis')
pipe_result = sentiment_pipeline(titles)

for title, result in zip(titles, pipe_result):
    print(title)
    print(result)

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


Apple is changing the charging port with iPhone 15, and it won't be like the last time - 9to5Mac
{'label': 'NEU', 'score': 0.6830855011940002}
Why Apple Is Moving Your Health Information to the iPad - CNET
{'label': 'NEU', 'score': 0.7866937518119812}
Apple reportedly cut its Vision Pro headset first year production goals by more than 50% because it’s so complex for manufacturers to make - Fortune
{'label': 'NEG', 'score': 0.8146339654922485}
Apple Leak Details All-New iPhone 15, iPhone 15 Pro Design Changes - Forbes
{'label': 'NEU', 'score': 0.9010047912597656}
Apple will permanently delete photos in July — how to keep yours safe - New York Post 
{'label': 'NEU', 'score': 0.8521539568901062}
Apple Is Now Worth $3 Trillion. How Its Stock Could Rise Another 30%. - Barron's
{'label': 'NEU', 'score': 0.5104926824569702}
Apple Says Latest 13-Inch MacBook Air Now Supports Bluetooth 5.3 - MacRumors
{'label': 'POS', 'score': 0.6306926608085632}
Intel prevented Apple from making a 15-inch MacB

In [23]:
# Run huggingface (transformers) pipeline roberta-financial-news sentiment model
sentiment_pipeline = pipeline('sentiment-analysis', model='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
pipe_result = sentiment_pipeline(titles)

for title, result in zip(titles, pipe_result):
    print(title)
    print(result)

Apple is changing the charging port with iPhone 15, and it won't be like the last time - 9to5Mac
{'label': 'neutral', 'score': 0.9998282194137573}
Why Apple Is Moving Your Health Information to the iPad - CNET
{'label': 'neutral', 'score': 0.9998749494552612}
Apple reportedly cut its Vision Pro headset first year production goals by more than 50% because it’s so complex for manufacturers to make - Fortune
{'label': 'negative', 'score': 0.9980727434158325}
Apple Leak Details All-New iPhone 15, iPhone 15 Pro Design Changes - Forbes
{'label': 'neutral', 'score': 0.999840497970581}
Apple will permanently delete photos in July — how to keep yours safe - New York Post 
{'label': 'neutral', 'score': 0.9998084902763367}
Apple Is Now Worth $3 Trillion. How Its Stock Could Rise Another 30%. - Barron's
{'label': 'positive', 'score': 0.9995013475418091}
Apple Says Latest 13-Inch MacBook Air Now Supports Bluetooth 5.3 - MacRumors
{'label': 'neutral', 'score': 0.9972571730613708}
Intel prevented App

Financial news specific model seem more useful.

## Use labeled data to test models

In [61]:
# source : https://huggingface.co/datasets/zeroshot/twitter-financial-news-sentiment
# 'label' : {
#     0 : 'bearish',
#     1 : 'bullish',
#     2 : 'neutral'
# }
financialNews_df = pd.read_parquet('csv-train.parquet')
financialNews_df.head()

Unnamed: 0,text,label
0,$BYND - JPMorgan reels in expectations on Beyo...,0
1,$CCL $RCL - Nomura points to bookings weakness...,0
2,"$CX - Cemex cut at Credit Suisse, J.P. Morgan ...",0
3,$ESS: BTIG Research cuts to Neutral https://t....,0
4,$FNKO - Funko slides after Piper Jaffray PT cu...,0


In [70]:
financialNews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9543 entries, 0 to 9542
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    9543 non-null   object
 1   label   9543 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 149.2+ KB


In [63]:
financialNews_df['text'][0]

'$BYND - JPMorgan reels in expectations on Beyond Meat https://t.co/bd0xbFGjkT'

In [64]:
# Function for cleaning urls from text data
def remove_urls(strings: pd.Series) -> pd.Series:
    parts = strings.str.split('https://', n=1)
    result = parts.str[0].fillna('')
    return result

In [65]:
# News title after modification
financialNews_df['text'] = remove_urls(financialNews_df['text'])
financialNews_df['text'][0]

'$BYND - JPMorgan reels in expectations on Beyond Meat '

In [None]:
# Test sentiment model using twitter financial news data
modelTest_pipeline = pipeline('sentiment-analysis', model='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
modelTest_result = modelTest_pipeline(list(financialNews_df['text']))
modelTest_result[0:10]

In [73]:
# Join results with financial news data
modelTest_df = pd.DataFrame(modelTest_result).rename(columns={'label': 'test_label'})

joined_df = financialNews_df.join(modelTest_df)
joined_df.head()

Unnamed: 0,text,label,test_label,score
0,$BYND - JPMorgan reels in expectations on Beyo...,0,positive,0.999046
1,$CCL $RCL - Nomura points to bookings weakness...,0,negative,0.998623
2,"$CX - Cemex cut at Credit Suisse, J.P. Morgan ...",0,negative,0.996079
3,$ESS: BTIG Research cuts to Neutral,0,negative,0.966363
4,$FNKO - Funko slides after Piper Jaffray PT cut,0,negative,0.864107


In [74]:
# Change test label to integer label
joined_df['test_label_int'] = joined_df['test_label'].map({'negative': 0, 'positive': 1, 'neutral': 2})
joined_df.head()

Unnamed: 0,text,label,test_label,score,test_label_int
0,$BYND - JPMorgan reels in expectations on Beyo...,0,positive,0.999046,1
1,$CCL $RCL - Nomura points to bookings weakness...,0,negative,0.998623,0
2,"$CX - Cemex cut at Credit Suisse, J.P. Morgan ...",0,negative,0.996079,0
3,$ESS: BTIG Research cuts to Neutral,0,negative,0.966363,0
4,$FNKO - Funko slides after Piper Jaffray PT cut,0,negative,0.864107,0


In [77]:
# Model accuracy
sum(joined_df['label'] == joined_df['test_label_int']) / joined_df.shape[0]

0.744419993712669

## Create new data and pipeline, then store in dataframe

In [2]:
# Get news feeds for Apple
AAPL_feeds = news.GoogleNews('Apple')

Data loaded successfully.
106 articles found.


In [4]:
# Get news titles
AAPL_titles = AAPL_feeds.get_titles()
AAPL_titles[:5]

['Giant telecom company that once almost bought Apple is teetering on the brink of failure - Fortune',
 'Apple iPhone from 2007 sells for $190,000 at auction - BBC',
 'Forget The MacBook Pro, Apple Has Something Much Better - Forbes',
 "iOS 17 Proves Apple Doesn't Need a Foldable Phone... Yet - CNET",
 'Apple has a juicy $40 billion opportunity ahead of it - MarketWatch']

In [5]:
# Use roberta-financial-news sentiment model
sentiment_pipeline = pipeline('sentiment-analysis', model='mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
pipe_result = sentiment_pipeline(AAPL_titles)

In [7]:
# Combine pipeline results with news titles
for i in range(len(AAPL_titles)):
    pipe_result[i]['title'] = AAPL_titles[i]
    
pipe_result[:10]

[{'label': 'negative',
  'score': 0.9832817912101746,
  'title': 'Giant telecom company that once almost bought Apple is teetering on the brink of failure - Fortune'},
 {'label': 'neutral',
  'score': 0.9998540878295898,
  'title': 'Apple iPhone from 2007 sells for $190,000 at auction - BBC'},
 {'label': 'positive',
  'score': 0.9862778782844543,
  'title': 'Forget The MacBook Pro, Apple Has Something Much Better - Forbes'},
 {'label': 'neutral',
  'score': 0.9993267059326172,
  'title': "iOS 17 Proves Apple Doesn't Need a Foldable Phone... Yet - CNET"},
 {'label': 'positive',
  'score': 0.9611907601356506,
  'title': 'Apple has a juicy $40 billion opportunity ahead of it - MarketWatch'},
 {'label': 'positive',
  'score': 0.8608031272888184,
  'title': "India will account for 20% of Apple's user growth over the next five years, Morgan Stanley estimates - CNBC"},
 {'label': 'positive',
  'score': 0.72763592004776,
  'title': "Apple's Stock Gets a Price Target Bump. Pay Attention to Reve

In [8]:
# Transform into dataframe
sentiment_df = pd.DataFrame.from_dict(pipe_result)
sentiment_df.head(10)

Unnamed: 0,label,score,title
0,negative,0.983282,Giant telecom company that once almost bought ...
1,neutral,0.999854,"Apple iPhone from 2007 sells for $190,000 at a..."
2,positive,0.986278,"Forget The MacBook Pro, Apple Has Something Mu..."
3,neutral,0.999327,iOS 17 Proves Apple Doesn't Need a Foldable Ph...
4,positive,0.961191,Apple has a juicy $40 billion opportunity ahea...
5,positive,0.860803,India will account for 20% of Apple's user gro...
6,positive,0.727636,Apple's Stock Gets a Price Target Bump. Pay At...
7,neutral,0.999067,The Youngest-Ever Harvard Law Graduate Earned ...
8,neutral,0.963769,Apple can delay App Store changes to file Supr...
9,neutral,0.99872,New Apple Exclusive Reveals iPhone 15 Design S...


In [11]:
# Create new column that hold label as integer
sentiment_df['label_int'] = sentiment_df['label'].map({'negative': 0, 'positive': 1, 'neutral': 2})
sentiment_df.head()

Unnamed: 0,label,score,title,label_int
0,negative,0.983282,Giant telecom company that once almost bought ...,0
1,neutral,0.999854,"Apple iPhone from 2007 sells for $190,000 at a...",2
2,positive,0.986278,"Forget The MacBook Pro, Apple Has Something Mu...",1
3,neutral,0.999327,iOS 17 Proves Apple Doesn't Need a Foldable Ph...,2
4,positive,0.961191,Apple has a juicy $40 billion opportunity ahea...,1


In [25]:
# Calculate percentage of each label
def percentages(df: pd.DataFrame) -> None:
    n = df.shape[0]
    n_negative = sum(df['label_int'] == 0)
    n_positive = sum(df['label_int'] == 1)
    n_neutral = sum(df['label_int'] == 2)
    negative_pct = round(100 * ( n_negative / n), 2)
    positive_pct = round(100 * ( n_positive / n), 2)
    neutral_pct = round(100 * ( n_neutral / n), 2)

    print(f'Positive : {n_positive} ({positive_pct}%)\nNeutral : {n_neutral} ({neutral_pct}%)\nNegative : {n_negative} ({negative_pct}%)\nOf total {n} articles')

percentages(sentiment_df)

Positive : 19 (17.92%)
Neutral : 73 (68.87%)
Negative : 14 (13.21%)
Of total 106 articles


In [9]:
# fine-tune a pretrained model