In [4]:
import os
import sys
import pandas as pd

# Make sure we can import from project root
ROOT_DIR = os.path.abspath("..")
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)

DATA_DIR = os.path.join(ROOT_DIR, "data")
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")

ALIGNED_CSV = os.path.join(PROCESSED_DIR, "news_price_aligned.csv")
OUTPUT_CSV = os.path.join(PROCESSED_DIR, "news_price_aligned_with_sentiment.csv")

from scripts.sentiment_utils import add_textblob_sentiment


In [5]:
aligned = pd.read_csv(ALIGNED_CSV, parse_dates=["Date"])

print("Aligned data sample (before sentiment):")
display(aligned.head())
print(aligned.columns.tolist())


Aligned data sample (before sentiment):


Unnamed: 0.1,Date,stock,Unnamed: 0,headline,url,publisher,ClosePrice
0,2020-06-10,AAPL,7120,Tech Stocks And FAANGS Strong Again To Start D...,https://www.benzinga.com/government/20/06/1622...,JJ Kinahan,85.566071
1,2020-06-10,AAPL,7121,10 Biggest Price Target Changes For Wednesday,https://www.benzinga.com/analyst-ratings/price...,Lisa Levin,85.566071
2,2020-06-10,AAPL,7122,"Benzinga Pro's Top 5 Stocks To Watch For Wed.,...",https://www.benzinga.com/short-sellers/20/06/1...,Benzinga Newsdesk,85.566071
3,2020-06-10,AAPL,7123,"Deutsche Bank Maintains Buy on Apple, Raises P...",https://www.benzinga.com/news/20/06/16219873/d...,Benzinga Newsdesk,85.566071
4,2020-06-10,AAPL,7124,Apple To Let Users Trade In Their Mac Computer...,https://www.benzinga.com/news/20/06/16218697/a...,Neer Varshney,85.566071


['Date', 'stock', 'Unnamed: 0', 'headline', 'url', 'publisher', 'ClosePrice']


In [6]:
TEXT_COL = "headline"   # change this if your column has a different name

aligned_with_sent = add_textblob_sentiment(
    aligned,
    text_col=TEXT_COL,
)

print("Aligned data sample (after sentiment):")
display(aligned_with_sent.head()[[TEXT_COL,
                                  "sentiment_polarity",
                                  "sentiment_subjectivity",
                                  "sentiment_label"]])


Aligned data sample (after sentiment):


Unnamed: 0,headline,sentiment_polarity,sentiment_subjectivity,sentiment_label
0,Tech Stocks And FAANGS Strong Again To Start D...,0.433333,0.733333,positive
1,10 Biggest Price Target Changes For Wednesday,0.0,0.0,neutral
2,"Benzinga Pro's Top 5 Stocks To Watch For Wed.,...",0.5,0.5,positive
3,"Deutsche Bank Maintains Buy on Apple, Raises P...",0.0,0.0,neutral
4,Apple To Let Users Trade In Their Mac Computer...,0.0,0.0,neutral


In [7]:
aligned_with_sent["sentiment_label"].value_counts(normalize=True)


sentiment_label
neutral     0.663136
positive    0.254662
negative    0.082202
Name: proportion, dtype: float64

In [8]:
os.makedirs(PROCESSED_DIR, exist_ok=True)
aligned_with_sent.to_csv(OUTPUT_CSV, index=False)

print(f"Saved sentiment-enriched aligned data to:\n{OUTPUT_CSV}")


Saved sentiment-enriched aligned data to:
c:\Users\filimon.hailemariam\Downloads\Price Movement -Week1\Predicting-Price-Moves-with-News-Sentiment\data\processed\news_price_aligned_with_sentiment.csv
