In [27]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader
import torch

# Load Finbert model and tokenizer
model = BertForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", num_labels=3)
tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")

# Function: preprocess text and convert to tensor
def preprocess_text(text, max_length=102):
    inputs = tokenizer(text, padding=True, truncation=True, max_length = max_length, return_tensors="pt")
    return inputs

# Function: get sentiment label
def get_sentiment_label(logits):
    probabilities = torch.softmax(logits, dim=1)
    sentiment = torch.argmax(probabilities, dim=1).item()
    return sentiment

# Load data from datafile for analysis
data = pd.read_csv("../Prepared Data/Stock News/MSFT/MSFT_stock_news_2024-03-25_163333.csv")

# Extract headlines only for sentiment analysis in this case 
headlines = data["headline"]

# Empty list to store sentiment labels
sentiments = []

# Set device for model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Set the model to evaluation mode
model.eval()

# Process each headline
for headline in headlines:
    # Preprocess the text
    inputs = preprocess_text(headline)
    inputs = {key: inputs[key].to(device) for key in inputs}

    # Get model outputs
    with torch.no_grad():
        outputs = model(**inputs)

    # Get sentiment label
    sentiment = get_sentiment_label(outputs.logits)
    sentiments.append(sentiment)

# Add sentiment labels to the DataFrame
data["sentiment"] = sentiments

# Map sentiment labels to their respective categories (0: Negative, 1: Neutral, 2: Positive)
sentiment_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
data["sentiment"] = data["sentiment"].map(sentiment_map)

# Print the DataFrame with sentiment labels
print(data)

                                             headline  \
0   Magnificent Seven Stocks To Buy And Watch: Nvi...   
1    Jim Cramer Says You Should Avoid These 11 Stocks   
2   These Stocks Are Moving the Most Today: Boeing...   
3   Nvidia Has a Secret Weapon to Keep Growing Lon...   
4   Intel, AMD Fall on Report China to Limit Use o...   
..                                                ...   
95  Jim Cramer Thinks AI Revolution Can Boost Thes...   
96  3 Reasons to Buy This "Magnificent Seven" Stoc...   
97  Can This Beaten-Down Stock Skyrocket More Than...   
98             Top 15 Countries to Migrate from India   
99  Apple Faces Legal Protest From Meta, Microsoft...   

                                                 link               date  \
0   https://finance.yahoo.com/m/4205eaa9-f620-3a0b...      Today 12:23PM   
1   https://finance.yahoo.com/news/jim-cramer-says...            11:45AM   
2   https://finance.yahoo.com/m/ef62b59f-2890-3f7a...            11:17AM   
3   https:/

In [28]:
#Calculate average sentiment 
average_sentiment = sum(sentiments) / len(sentiments)

# Map the average sentiment label
average_sentiment_label = sentiment_map[int(average_sentiment)]

print("\nAverage sentiment for headlines:")
print("Average Sentiment Label:", average_sentiment_label)
print("Average Sentiment Value:", average_sentiment)


Average sentiment for headlines:
Average Sentiment Label: Negative
Average Sentiment Value: 0.46


In [33]:
#EXAMPLE: Price change of  MSFT stock 24-03-25
# Load stock price data
stock_price_data = pd.read_csv('../Prepared Data/MSFT/MSFT_stock_data_2024-01-01_to_2024-04-17.csv')

# Specify the row index you want to extract
row_index = 57

# Extract the 'open' and 'close' values for the specified row
open_value = stock_price_data.at[row_index, 'Open']
close_value = stock_price_data.at[row_index, 'Close']

# Store the open and close values in 'change_in_value'
change_in_value = (close_value - open_value)

print("Daily change in stock value:", change_in_value)


Daily change in stock value: -2.3800048828125


In [42]:
#Model relationship between Average Sentiment and Change in price
#Create DataFrame with both values
sentiment_change_data = pd.DataFrame({'Date': ['2024-03-25'], #date of sentiment and price data
                                      'Average_Sentiment': [average_sentiment], #Average sentiment value of headlines from date
                                      'Change_in_Value': [change_in_value]}) #Change in price from date

#merge with stock price data
merged_data = pd.merge(stock_price_data, sentiment_change_data, on='Date', how='inner')

print(merged_data.head())

         Date       Open        High         Low       Close   Adj Close  \
0  2024-03-25  425.23999  427.410004  421.609985  422.859985  422.859985   

     Volume  Average_Sentiment  Change_in_Value  
0  18060500               0.46        -2.380005  
