
# News Sentiment Analysis and Topic Modeling Notebook

This notebook demonstrates how to use the `NewsNLPAnalyzer` class for:
- Loading news data from a CSV file with columns: `headline`, `url`, `publisher`, `date`, `stock_ticker`
- Cleaning and preprocessing text
- Conducting sentiment analysis using TextBlob and VADER
- Performing topic modeling (LDA) to identify main topics in headlines
- Visualizing sentiment scores and topic keywords interactively with Plotly


In [None]:
# 1: Setup
import sys
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

scripts_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'scripts'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)
from scripts.sentiment_analysis import NewsNLPAnalyzer

In [None]:
# 2: Load News Data
import glob

data_dir = os.path.abspath(os.path.join('..', 'data', 'raw'))
csv_files = glob.glob(os.path.join(data_dir, '*.csv'))

dataframes = {}
for file in csv_files:
    name = os.path.splitext(os.path.basename(file))[0]
    try:
        df = pd.read_csv(file)
        dataframes[name] = df
        print(f"✅ Loaded {name}: {df.shape}")
    except Exception as e:
        print(f"❌ Failed to load {name}: {e}")

In [None]:
# 3: Interactive Sentiment Analysis for Any News Dataset
from ipywidgets import Dropdown, interact

def analyze_news_sentiment(selected_name, text_column='headline'):
    df = dataframes[selected_name]
    print(f"\n--- Sentiment Analysis for {selected_name} ---")
    analyzer = NewsNLPAnalyzer(data=df, text_column=text_column)
    analyzer.calculate_sentiments()
    analyzer.extract_keywords(top_n=20)
    analyzer.plot_sentiment_comparison()
    analyzer.plot_vader_distribution()
    analyzer.plot_topic_keywords()
    analyzer.save_summary_pdf(pdf_path=f"sentiment_summary_{selected_name}.pdf")
    print(f"PDF summary saved as sentiment_summary_{selected_name}.pdf")

if dataframes:
    # You may need to adjust 'headline' to match your news text column
    interact(analyze_news_sentiment, 
             selected_name=Dropdown(options=list(dataframes.keys()), description='Dataset:'),
             text_column='headline')
else:
    print("No dataframes loaded.")