<a href="https://colab.research.google.com/github/Shanmugapriya-Karthikumar/Sentiment_Analyzer/blob/main/Sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests feedparser pandas python-dotenv google-generativeai tqdm matplotlib prophet scipy textblob schedule streamlit numpy plotly pyngrok

Collecting feedparser
  Downloading feedparser-6.0.12-py3-none-any.whl.metadata (2.7 kB)
Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading feedparser-6.0.12-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.5/81.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading schedule-1.2.2-py3-none-any.whl (12 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m115.7 MB/s[0m eta [36m0:00:00[0m


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
from prophet import Prophet
import os
import requests
from datetime import datetime, timedelta
import plotly.express as px
import plotly.graph_objects as go
from textblob import TextBlob
import re
from scipy import stats

# Custom CSS for a polished, professional dashboard
st.markdown("""
    <style>
    .main-header {
        font-size: 2.5rem;
        color: #1E3A8A;
        text-align: center;
        margin-bottom: 1.5rem;
        font-weight: bold;
    }
    .sub-header {
        font-size: 1.8rem;
        color: #3B82F6;
        margin-top: 1rem;
        margin-bottom: 1rem;
    }
    .metric-card {
        background-color: #F8FAFC;
        padding: 1.2rem;
        border-radius: 8px;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        text-align: center;
        margin: 0.5rem;
        border-left: 4px solid #3B82F6;
    }
    .metric-title {
        font-size: 1.1rem;
        color: #4B5563;
        margin-bottom: 0.5rem;
    }
    .metric-value {
        font-size: 1.5rem;
        color: #1E3A8A;
        font-weight: bold;
    }
    .alert-box {
        background-color: #FEF2F2;
        padding: 1rem;
        border-radius: 8px;
        border-left: 4px solid #EF4444;
        margin: 0.5rem 0;
    }
    .success-box {
        background-color: #ECFDF5;
        padding: 1rem;
        border-radius: 8px;
        border-left: 4px solid #10B981;
        margin: 0.5rem 0;
    }
    .placeholder-text {
        color: #6B7280;
        text-align: center;
        font-style: italic;
    }
    </style>
""", unsafe_allow_html=True)

# Thresholds
THRESHOLDS = {
    'sentiment_drop': -0.1,
    'surge_zscore': 1.0
}

# Sample data for testing (varied sentiment for non-linear plots)
SAMPLE_DATA = pd.DataFrame({
    "topic": ["AI"] * 20 + ["Cloud Computing"] * 20 + ["Cybersecurity"] * 20,
    "publishedAt": pd.date_range(start="2025-09-28", end="2025-10-07", periods=20).tolist() * 3,
    "sentiment_score": [
        0.2, -0.1, 0.3, -0.05, 0.15, -0.2, 0.25, 0.0, -0.15, 0.1,
        0.3, -0.2, 0.1, -0.1, 0.2, -0.05, 0.25, -0.1, 0.15, 0.0
    ] * 3,
    "title": ["Sample Article"] * 60
})

# Function Definitions
def clean_text(text):
    """Clean text by removing special characters and extra spaces."""
    if not text or not isinstance(text, str):
        return ""
    text = re.sub(r'[^\w\s]', '', text)
    return ' '.join(text.split())

def run_pipeline(topics):
    """Fetch and process news for given topics using NewsAPI and TextBlob for sentiment."""
    api_key = os.getenv("NEWS_API_KEY")
    if not api_key:
        st.markdown('<div class="alert-box">⚠️ NewsAPI key is missing. Using sample data.</div>', unsafe_allow_html=True)
        return SAMPLE_DATA

    all_data = []
    for topic in topics:
        url = "https://newsapi.org/v2/everything"
        params = {
            "q": topic,
            "pageSize": 100,
            "apiKey": api_key,
            "language": "en",
            "sortBy": "publishedAt",
            "from": (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
        }
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            articles = data.get("articles", [])
            if articles:
                df = pd.DataFrame(articles)
                df["topic"] = topic
                df["publishedAt"] = pd.to_datetime(df["publishedAt"], errors="coerce")
                df["clean_text"] = (df["title"].fillna("") + " " + df["description"].fillna("")).apply(clean_text)
                sentiments = [(TextBlob(text).sentiment.polarity, text) for text in df["clean_text"]]
                df["sentiment_score"] = [score for score, _ in sentiments]
                all_data.append(df[["topic", "publishedAt", "sentiment_score", "title"]])
            else:
                st.markdown(f'<div class="alert-box">⚠️ No articles found for topic: {topic}</div>', unsafe_allow_html=True)
        except Exception as e:
            st.markdown(f'<div class="alert-box">⚠️ Error fetching news for {topic}: {str(e)}. Using sample data.</div>', unsafe_allow_html=True)
            return SAMPLE_DATA

    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        st.markdown(f'<div class="success-box">📥 Fetched {len(combined_df)} articles</div>', unsafe_allow_html=True)
        return combined_df
    st.markdown('<div class="alert-box">⚠️ No data fetched. Using sample data.</div>', unsafe_allow_html=True)
    return SAMPLE_DATA

def forecast_sentiment(trend_data, keyword, days=7):
    """Forecast sentiment trends using Prophet."""
    try:
        sub_df = trend_data[trend_data["keyword"] == keyword][["date", "sentiment_score"]].rename(columns={"date": "ds", "sentiment_score": "y"})
        sub_df["ds"] = pd.to_datetime(sub_df["ds"])
        if len(sub_df) < 3:
            st.markdown(f'<div class="alert-box">⚠️ Insufficient data for {keyword}: {len(sub_df)} points (need at least 3)</div>', unsafe_allow_html=True)
            return None, pd.DataFrame()
        model = Prophet(yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True)
        model.fit(sub_df)
        future = model.make_future_dataframe(periods=days, freq="H")
        forecast = model.predict(future)
        return model, forecast
    except Exception as e:
        st.markdown(f'<div class="alert-box">⚠️ Forecasting failed for {keyword}: {str(e)}</div>', unsafe_allow_html=True)
        return None, pd.DataFrame()

def check_alerts(trend_data, thresholds, slack_webhook=None):
    """Check for sentiment and volume anomalies."""
    alerts = []
    st.markdown('<h2 class="sub-header">🔍 Alerts</h2>', unsafe_allow_html=True)
    if trend_data.empty:
        st.markdown('<div class="alert-box">⚠️ No data to analyze for alerts</div>', unsafe_allow_html=True)
        return alerts
    for kw in trend_data["keyword"].unique():
        sub_df = trend_data[trend_data["keyword"] == kw]
        if len(sub_df) < 2:
            for _, row in sub_df.iterrows():
                if row["sentiment_score"] < thresholds["sentiment_drop"]:
                    alert = f"Sentiment drop for {kw} on {row['date'].strftime('%Y-%m-%d %H:%M')}: {row['sentiment_score']:.2f}"
                    alerts.append(alert)
                    st.markdown(f'<div class="alert-box">{alert}</div>', unsafe_allow_html=True)
            continue
        z_scores = stats.zscore(sub_df["sentiment_score"], nan_policy="omit")
        for i, row in sub_df.iterrows():
            if row["sentiment_score"] < thresholds["sentiment_drop"]:
                alert = f"Sentiment drop for {kw} on {row['date'].strftime('%Y-%m-%d %H:%M')}: {row['sentiment_score']:.2f}"
                alerts.append(alert)
                st.markdown(f'<div class="alert-box">{alert}</div>', unsafe_allow_html=True)
            if i < len(z_scores) and abs(z_scores[i]) > thresholds["surge_zscore"]:
                alert = f"Surge anomaly for {kw} on {row['date'].strftime('%Y-%m-%d %H:%M')}: z-score {z_scores[i]:.2f}"
                alerts.append(alert)
                st.markdown(f'<div class="alert-box">{alert}</div>', unsafe_allow_html=True)

    if alerts and slack_webhook:
        try:
            payload = {"text": "\n".join(alerts)}
            response = requests.post(slack_webhook, json=payload)
            response.raise_for_status()
            st.markdown('<div class="success-box">✅ Alerts sent to Slack</div>', unsafe_allow_html=True)
        except Exception as e:
            st.markdown(f'<div class="alert-box">⚠️ Failed to send Slack alerts: {str(e)}</div>', unsafe_allow_html=True)

    if not alerts:
        st.markdown('<div class="success-box">✅ No alerts triggered</div>', unsafe_allow_html=True)

    return alerts

def load_historical_data():
    """Load historical data from session state."""
    return st.session_state.get("historical_df", pd.DataFrame())

def save_updated_history(combined_df):
    """Save combined data to session state."""
    st.session_state.historical_df = combined_df
    st.markdown(f'<div class="success-box">💾 Updated history with {len(combined_df)} data points</div>', unsafe_allow_html=True)

def process_new_data(new_df):
    """Process and normalize new data."""
    if new_df.empty:
        st.markdown('<div class="alert-box">⚠️ No new data to process</div>', unsafe_allow_html=True)
        return pd.DataFrame()

    new_df["date"] = pd.to_datetime(new_df["publishedAt"], errors="coerce").dt.tz_localize(None)
    new_df = new_df.dropna(subset=["date"])

    new_trend = (
        new_df.groupby(["topic", pd.Grouper(key="date", freq="H")])
        .agg(
            avg_sentiment=("sentiment_score", "mean"),
            articles_count=("sentiment_score", "count")
        )
        .reset_index()
    )

    new_trend = new_trend.rename(columns={
        "topic": "keyword",
        "avg_sentiment": "sentiment_score"
    })

    return new_trend

def combine_data(historical_df, new_trend):
    """Combine historical and new data."""
    if not historical_df.empty:
        combined_df = pd.concat([historical_df, new_trend], ignore_index=True)
        combined_df = combined_df.drop_duplicates(subset=["keyword", "date"], keep="last")
        combined_df = combined_df.sort_values(["keyword", "date"])
    else:
        combined_df = new_trend

    save_updated_history(combined_df)
    return combined_df

def plot_trend(keyword, data):
    """Plot sentiment trend for a keyword using Plotly."""
    if data.empty:
        st.markdown(f'<div class="alert-box">⚠️ No data available for {keyword}</div>', unsafe_allow_html=True)
        return

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=data["date"],
        y=data["sentiment_score"],
        mode="lines+markers",
        line=dict(color="#1E3A8A", width=2),
        marker=dict(size=8, color="#3B82F6"),
        name="Sentiment",
        hovertemplate="Date: %{x}<br>Sentiment: %{y:.2f}<extra></extra>"
    ))

    fig.add_hline(y=0, line_dash="solid", line_color="gray", line_width=1, opacity=0.5)
    fig.add_hline(y=0.3, line_dash="dot", line_color="green", line_width=1, opacity=0.7)
    fig.add_hline(y=-0.3, line_dash="dot", line_color="red", line_width=1, opacity=0.7)

    avg_sentiment = data["sentiment_score"].mean()
    latest_sentiment = data["sentiment_score"].iloc[-1]
    stats_text = f"Average: {avg_sentiment:.2f}<br>Latest: {latest_sentiment:.2f}"

    fig.add_annotation(
        xref="paper", yref="paper", x=0.02, y=0.98,
        text=stats_text, showarrow=False, align="left",
        bgcolor="lightblue", bordercolor="gray", borderwidth=1,
        font=dict(size=12)
    )

    fig.update_layout(
        title=f"Sentiment Trend: {keyword}",
        title_font=dict(size=20, color="#1E3A8A", family="Arial"),
        xaxis_title="Date",
        yaxis_title="Average Sentiment Score",
        xaxis=dict(tickangle=45, gridcolor="rgba(0,0,0,0.1)"),
        yaxis=dict(gridcolor="rgba(0,0,0,0.1)"),
        plot_bgcolor="white",
        showlegend=True,
        margin=dict(t=50, b=50),
        hovermode="x unified"
    )

    st.plotly_chart(fig, use_container_width=True)

def plot_forecast(keyword, trend_data, forecast_data):
    """Plot forecast for a keyword using Plotly."""
    if forecast_data.empty:
        st.markdown(f'<div class="alert-box">⚠️ No forecast data for {keyword}</div>', unsafe_allow_html=True)
        return

    historical_data = trend_data[trend_data["keyword"] == keyword].copy()
    historical_data = historical_data.sort_values("date")
    forecast_start_date = historical_data["date"].max()
    future_forecast = forecast_data[forecast_data["ds"] > forecast_start_date]

    fig = go.Figure()

    # Historical data
    fig.add_trace(go.Scatter(
        x=historical_data["date"],
        y=historical_data["sentiment_score"],
        mode="lines+markers",
        line=dict(color="#1E3A8A", width=2),
        marker=dict(size=6, color="#3B82F6"),
        name="Historical Data",
        hovertemplate="Date: %{x}<br>Sentiment: %{y:.2f}<extra></extra>"
    ))

    # Forecast data
    fig.add_trace(go.Scatter(
        x=future_forecast["ds"],
        y=future_forecast["yhat"],
        mode="lines+markers",
        line=dict(color="#EF4444", width=2),
        marker=dict(size=6, color="#F87171"),
        name="7-Day Forecast",
        hovertemplate="Date: %{x}<br>Forecast: %{y:.2f}<extra></extra>"
    ))

    # Confidence interval
    fig.add_trace(go.Scatter(
        x=future_forecast["ds"].tolist() + future_forecast["ds"][::-1].tolist(),
        y=future_forecast["yhat_upper"].tolist() + future_forecast["yhat_lower"][::-1].tolist(),
        fill="toself",
        fillcolor="rgba(239, 68, 68, 0.15)",
        line=dict(color="rgba(0,0,0,0)"),  # Fixed: Use rgba for transparent line
        name="Confidence Interval",
        hoverinfo="skip"
    ))

    fig.add_hline(y=0, line_dash="solid", line_color="gray", line_width=1, opacity=0.6)
    fig.add_hline(y=0.3, line_dash="dot", line_color="green", line_width=1, opacity=0.5)
    fig.add_hline(y=-0.3, line_dash="dot", line_color="red", line_width=1, opacity=0.5)
    fig.add_vline(x=forecast_start_date, line_dash="dash", line_color="gray", line_width=1, opacity=0.6)

    avg_historical = historical_data["sentiment_score"].mean()
    latest_sentiment = historical_data["sentiment_score"].iloc[-1]
    forecast_avg = future_forecast["yhat"].mean() if not future_forecast.empty else 0
    forecast_trend = "↗ Improving" if future_forecast["yhat"].iloc[-1] > future_forecast["yhat"].iloc[0] else "↘ Declining" if not future_forecast.empty else "N/A"

    stats_text = f"Historical Avg: {avg_historical:.2f}<br>Latest: {latest_sentiment:.2f}<br>Forecast Avg: {forecast_avg:.2f}<br>Trend: {forecast_trend}"

    fig.add_annotation(
        xref="paper", yref="paper", x=0.02, y=0.98,
        text=stats_text, showarrow=False, align="left",
        bgcolor="white", bordercolor="gray", borderwidth=1,
        font=dict(size=12)
    )

    fig.update_layout(
        title=f"Sentiment Forecast: {keyword}",
        title_font=dict(size=20, color="#1E3A8A", family="Arial"),
        xaxis_title="Date",
        yaxis_title="Sentiment Score",
        xaxis=dict(tickangle=45, gridcolor="rgba(0,0,0,0.1)"),
        yaxis=dict(gridcolor="rgba(0,0,0,0.1)"),
        plot_bgcolor="white",
        showlegend=True,
        margin=dict(t=50, b=50),
        hovermode="x unified"
    )

    st.plotly_chart(fig, use_container_width=True)

# Streamlit App
st.set_page_config(page_title="Real-Time Industry Insights", layout="wide", page_icon="📊")
st.markdown('<h1 class="main-header">📊 Real-Time Industry Insights</h1>', unsafe_allow_html=True)
st.markdown(f'<p style="text-align: center; color: #6B7280;">Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>', unsafe_allow_html=True)

# Placeholder if no data
st.markdown('<p class="placeholder-text">Enter your NewsAPI key and topics to get started.</p>', unsafe_allow_html=True)

# API Key and Topic Input
col1, col2 = st.columns([1, 1])
with col1:
    news_api_key = st.text_input("🔑 NewsAPI Key", type="password", help="Get a free key from newsapi.org")
    if news_api_key:
        os.environ["NEWS_API_KEY"] = news_api_key
with col2:
    topics_input = st.text_input("🔍 Topics (comma-separated)", value="AI, Cloud Computing, Cybersecurity", help="Enter topics to analyze")
    topics = [t.strip() for t in topics_input.split(",") if t.strip()]

# Slack Webhook (Optional)
slack_webhook = st.text_input("📢 Slack Webhook URL (Optional)", type="password", help="Enter Slack webhook for alerts")

# Fetch and Process Data
if st.button("🚀 Fetch and Analyze Data", key="fetch_data_button"):
    if not topics:
        st.markdown('<div class="alert-box">⚠️ Please enter at least one topic</div>', unsafe_allow_html=True)
    else:
        with st.spinner("Fetching and analyzing data..."):
            new_df = run_pipeline(topics)
            if not new_df.empty:
                new_trend = process_new_data(new_df)
                historical_df = load_historical_data()
                trend_data = combine_data(historical_df, new_trend)

                # Display Metrics
                st.markdown('<h2 class="sub-header">📈 Key Metrics</h2>', unsafe_allow_html=True)
                cols = st.columns(3)
                total_articles = len(new_df)
                avg_sentiment = new_trend["sentiment_score"].mean() if not new_trend.empty else 0
                unique_keywords = len(new_trend["keyword"].unique()) if not new_trend.empty else 0
                with cols[0]:
                    st.markdown(f'<div class="metric-card"><div class="metric-title">Total Articles</div><div class="metric-value">{total_articles}</div></div>', unsafe_allow_html=True)
                with cols[1]:
                    st.markdown(f'<div class="metric-card"><div class="metric-title">Avg. Sentiment</div><div class="metric-value">{avg_sentiment:.2f}</div></div>', unsafe_allow_html=True)
                with cols[2]:
                    st.markdown(f'<div class="metric-card"><div class="metric-title">Keywords Analyzed</div><div class="metric-value">{unique_keywords}</div></div>', unsafe_allow_html=True)

                # Check Alerts
                alerts = check_alerts(trend_data, THRESHOLDS, slack_webhook)

                # Display Trends and Forecasts
                for keyword in trend_data["keyword"].unique():
                    st.markdown(f'<h2 class="sub-header">📊 {keyword} Analysis</h2>', unsafe_allow_html=True)
                    keyword_data = trend_data[trend_data["keyword"] == keyword]
                    plot_trend(keyword, keyword_data)

                    model, forecast = forecast_sentiment(trend_data, keyword)
                    if model is not None and not forecast.empty:
                        plot_forecast(keyword, trend_data, forecast)
            else:
                st.markdown('<div class="alert-box">⚠️ Failed to fetch data. Please check your API key or try different topics.</div>', unsafe_allow_html=True)



Writing app.py


In [None]:
# Ngrok Setup
from pyngrok import ngrok
import os

# Terminate any existing tunnels
!pkill ngrok
!pkill streamlit

# Set your ngrok auth token
ngrok.set_auth_token("33ecwoVtWHx4LX5H0WV9iltadAH_2PsiQGEjBE9V7KP6Hkp9P")  # Replace with your token

# Start Streamlit server
os.system("streamlit run app.py &")

# Create a public URL with ngrok
public_url = ngrok.connect(8501)
print("Streamlit app is live at:", public_url)

Streamlit app is live at: NgrokTunnel: "https://resistantly-spiritlike-irina.ngrok-free.dev" -> "http://localhost:8501"
