In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import pandas as pd
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "yiyanghkust/finbert-tone"

tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu


In [None]:
data = {
    'date': [
        '2023-11-01', '2023-11-01', '2023-11-02',
        '2023-11-03', '2023-11-03', '2023-11-04'
    ],
    'headline': [
        'Amazon reports strong Q3 profits amid rising demand',
        'AWS growth slows down, raising investor concerns',
        'New product line expected to boost revenue next quarter',
        'CEO addresses market slowdown in earnings call',
        'Analysts optimistic about Amazon’s holiday sales',
        'Labor strike could impact warehouse operations'
    ]
}

data2 = {
    'date': ['2018-01-31', '2018-02-01', '2018-02-02'],
    'headline': [
        'Amazon starts strong in Q1 with bullish forecast',
        'Market sees slight dip in Amazon growth expectations',
        'Analysts predict Amazon will rebound in coming months'
    ]
}


df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)

news_df = pd.concat([df1, df2], ignore_index=True)

news_df['date'] = pd.to_datetime(news_df['date'])

news_df.head()


Unnamed: 0,date,headline
0,2023-11-01,Amazon reports strong Q3 profits amid rising d...
1,2023-11-01,"AWS growth slows down, raising investor concerns"
2,2023-11-02,New product line expected to boost revenue nex...
3,2023-11-03,CEO addresses market slowdown in earnings call
4,2023-11-03,Analysts optimistic about Amazon’s holiday sales


In [None]:
# Running FinBERT 
news_df['sentiment'] = news_df['headline'].apply(lambda x: nlp(x)[0]['label'])
sentiment_map = {'positive': 1, 'neutral': 0, 'negative': -1}
news_df['sentiment_score'] = news_df['sentiment'].str.lower().map(sentiment_map)


In [28]:
daily_sentiment = news_df.groupby('date')['sentiment_score'].mean().reset_index()
daily_sentiment.columns = ['Date', 'SentimentScore']
daily_sentiment.head()


Unnamed: 0,Date,SentimentScore
0,2018-01-31,1.0
1,2018-02-01,-1.0
2,2018-02-02,1.0
3,2023-11-01,0.0
4,2023-11-02,1.0


In [29]:
import pandas as pd

amzn_path = os.path.join('..', 'data', 'processed', 'amzn_features.csv')
df = pd.read_csv(amzn_path)
print(df.columns)


Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'Return', 'LogReturn',
       'MA20', 'Volatility20'],
      dtype='object')


In [None]:
amzn_path = os.path.join('..', 'data', 'processed', 'amzn_features.csv')
amzn = pd.read_csv(amzn_path, parse_dates=['Date'])


merged_df = pd.merge(amzn, daily_sentiment, how='left', on='Date')

merged_df['SentimentScore'].fillna(0, inplace=True)

merged_df.to_csv('../data/processed/amzn_with_sentiment.csv', index=False)

merged_df.head()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df['SentimentScore'].fillna(0, inplace=True)


Unnamed: 0,Date,Close,High,Low,Open,Volume,Return,LogReturn,MA20,Volatility20,SentimentScore
0,NaT,AMZN,AMZN,AMZN,AMZN,AMZN,,,,,0.0
1,2018-01-31,72.54450225830078,73.62899780273438,72.50199890136719,72.56500244140625,128494000,0.00909,0.009049,65.75055,0.009725,1.0
2,2018-02-01,69.5,72.99400329589844,69.25700378417969,72.25,182276000,-0.041967,-0.042873,66.21505,0.015262,-1.0
3,2018-02-02,71.49749755859375,74.9000015258789,70.69999694824219,73.86949920654297,222514000,0.028741,0.028336,66.76595,0.015957,1.0
4,2018-02-05,69.5,72.9489974975586,66.03600311279297,70.13099670410156,229900000,-0.027938,-0.028336,67.1681,0.017811,0.0
