In [2]:
from google.colab import drive
import pandas as pd

drive.mount('/content/drive/')

file_path1 = '/content/drive/MyDrive/MRP/stock_news.csv'
file_path2 = '/content/drive/MyDrive/MRP/stock_price.csv'
try:
  news_df = pd.read_csv(file_path1)
  print("CSV file successfully read:")
  print(news_df.head())

  price_df = pd.read_csv(file_path2)
  print("CSV file successfully read:")
  print(price_df.head())

except FileNotFoundError:
  print(f"Error: File not found: {e}")
except Exception as e:
  print (f"An error occurred: {e}")


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
CSV file successfully read:
         date stock_symbol                                   combined_summary
0  2011-01-03           AA  Aluminum producer Alcoa Inc. ( AA ) on Monday ...
1  2011-01-03          BHP  The downside of medium-sized vessels (as compa...
2  2011-01-03          BTU  When translated into dollars and cents, Califo...
3  2011-01-03          CLF  Also charging out of the gates in 2011 are com...
4  2011-01-03          CLX  Cleaning products maker The Clorox Company ( C...
CSV file successfully read:
         date       open       high        low      close  adj close  \
0  2011-01-03  29.728184  30.143061  29.620888  29.957081  27.344431   
1  2011-01-04  30.035765  30.114449  29.456366  29.678112  27.089796   
2  2011-01-05  29.513592  29.849785  29.327610  29.613733  27.031036   
3  2011-01-06  29.592276  29.928469  29.477825  29.670958

In [3]:
!pip install transformers tqdm --quiet


In [4]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from tqdm import tqdm

In [5]:
model_name = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [6]:
finbert = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    truncation=True,
    max_length=512,
    padding=True
)


Device set to use cuda:0


In [7]:
def get_sentiment(texts):
    results = finbert(texts)
    return [(r['label'], r['score']) for r in results]

sentiments = []
batch_size = 32
summaries = news_df['combined_summary'].tolist()

for i in tqdm(range(0, len(summaries), batch_size), desc="Running FinBERT"):
    batch = summaries[i:i+batch_size]
    try:
        batch_results = get_sentiment(batch)
        sentiments.extend(batch_results)
    except Exception as e:
        print(f"Error at batch {i}: {e}")
        sentiments.extend([('neutral', 0.0)] * len(batch))  # fallback if failed

Running FinBERT:   0%|          | 10/36666 [00:04<4:07:50,  2.46it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Running FinBERT: 100%|██████████| 36666/36666 [5:12:39<00:00,  1.95it/s]


In [8]:
news_df[['sentiment_label', 'sentiment_score']] = pd.DataFrame(sentiments, index=news_df.index)


In [9]:
news_df.to_csv('/content/drive/MyDrive/MRP/stock_news_with_sentiment.csv', index=False)
print("Sentiment analysis complete. File saved as stock_news_with_sentiment.csv")

Sentiment analysis complete. File saved as stock_news_with_sentiment.csv
