<a href="https://colab.research.google.com/github/MuskanTiwari12/Sentimental-Analysis-Project/blob/main/task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
task2_folder = "/content/drive/MyDrive/Task1.ipynb"
!mkdir -p "{task2_folder}"  # Create folder if it doesn't exist

Mounted at /content/drive


# Market & Sentiment Analysis Engine

1>Install Required Libraries

In [None]:
# Core data handling and numerical operations
!pip install pandas numpy requests python-dotenv newsapi-python tweepy openai transformers google-generativeai

Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Downloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.7


2>Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import requests
from newsapi import NewsApiClient
import tweepy
from datetime import datetime
import openai             # OpenAI GPT
import google.generativeai as genai  # Google Generative AI
from transformers import pipeline     # HuggingFace models
import os

3>Load All API Keys from Colab Secret Manage

In [None]:
HUGGINGFACE_API_KEY = userdata.get('HUGGINGFACE_API_KEY')
NEWSAPI_KEY = userdata.get('NEWS_API_KEY')
TWITTER_BEARER = userdata.get('TWITTER_BEARER')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

# Configure API keys
openai.api_key = OPENAI_API_KEY
genai.configure(api_key=HUGGINGFACE_API_KEY)  # ✅ Needed for Google Generative AI

# Hugging Face pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


4>Fetch News and Tweets

In [None]:
import pandas as pd
import numpy as np
import tweepy
from datetime import datetime, timedelta, timezone
from newsapi import NewsApiClient

# -----------------------------
# Function: Fetch News + Tweets with safe limits
# -----------------------------
def fetch_news_and_tweets(
    news_query="AI OR artificial intelligence",
    tweet_query="AI OR artificial intelligence -is:retweet lang:en",
    news_count=50,     # keep <= 50 for free plan safety
    tweet_count=50,
    news_source=None   # None = all sources
):
    try:
        # Date filter (last 24 hours)
        today = datetime.now(timezone.utc)
        yesterday = today - timedelta(days=1)

        # --- Fetch News ---
        newsapi = NewsApiClient(api_key=NEWSAPI_KEY)
        articles = newsapi.get_everything(
            q=news_query,
            sources=news_source,
            language='en',
            from_param=yesterday.strftime("%Y-%m-%d"),
            to=today.strftime("%Y-%m-%d"),
            sort_by='publishedAt',
            page_size=min(news_count, 50),   # max 50 at once (safe for free tier)
            page=np.random.randint(1, 3)     # only page 1 or 2 → under 100 results
        )
        news_df = pd.DataFrame([{
            'description': a['description'],
            'publishedAt': a['publishedAt'],
            'source': a['source']['name'],
            'source_type': 'news'
        } for a in articles['articles']])

        # --- Fetch Tweets ---
        tweets_df = pd.DataFrame(columns=['description', 'publishedAt', 'source', 'source_type'])
        try:
            client = tweepy.Client(bearer_token=TWITTER_BEARER)
            tweets = client.search_recent_tweets(
                query=tweet_query,
                max_results=min(tweet_count, 50),  # max 100 allowed, safe with 50
                tweet_fields=["created_at"]
            )
            if tweets and tweets.data:
                tweets_df = pd.DataFrame([{
                    'description': t.text,
                    'publishedAt': t.created_at,
                    'source': 'Twitter',
                    'source_type': 'twitter'
                } for t in tweets.data])
        except tweepy.TooManyRequests:
            print("Twitter rate limit hit! Only NewsAPI data will be displayed.")

        # --- Combine ---
        combined_df = pd.concat([news_df, tweets_df], ignore_index=True)
        combined_df['publishedAt'] = pd.to_datetime(combined_df['publishedAt'])
        combined_df.sort_values('publishedAt', inplace=True)
        combined_df.reset_index(drop=True, inplace=True)

        return combined_df

    except Exception as e:
        print(f"Error fetching news or tweets: {e}")
        return pd.DataFrame()

# -----------------------------
# Example usage
# -----------------------------
combined_df = fetch_news_and_tweets()
print(combined_df.head(10))


Twitter rate limit hit! Only NewsAPI data will be displayed.
                                         description  \
0  Part three of the interview series covers Sams...   
1  Juluke will miss Florida's next three games ag...   
2  Last week was the 24th year anniversary of the...   
3  Microdramas have rapidly evolved from experime...   
4  Micro-drama revenues in China are set to surpa...   
5  A Florida football assistant coach has been su...   
6  Cybersecurity startup Airia LLC says it has th...   
7         Another chapter in the AI pricing playbook   
8  Florida suspended assistant coach Jabbar Juluk...   
9  Using Multi-Agent Collaboration with Evolution...   

                publishedAt             source source_type  
0 2025-09-17 01:22:47+00:00        Samsung.com        news  
1 2025-09-17 01:27:07+00:00         CBS Sports        news  
2 2025-09-17 01:29:19+00:00    Lewrockwell.com        news  
3 2025-09-17 01:30:00+00:00            Variety        news  
4 2025-09-17 01:3

In [None]:
from google.colab import userdata

HUGGINGFACE_API_KEY = userdata.get('HUGGINGFACE_API_KEY')
NEWSAPI_KEY = userdata.get('NEWS_API_KEY')
TWITTER_BEARER = userdata.get('TWITTER_BEARER')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')


# Function for Sentiment Analysis Using OpenAI

In [None]:
from google.colab import userdata

OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
client = OpenAI(api_key=OPENAI_API_KEY)


In [None]:
import time
import pandas as pd
from transformers import pipeline
import openai

# Hugging Face fallback model
hf_sentiment_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# OpenAI API key
openai.api_key = "YOUR_OPENAI_API_KEY"

def analyze_top_news_sentiment(combined_df, text_column="description", top_n=50, output_file="news_sentiment.csv"):
    """
    Analyze sentiment for top news using OpenAI GPT with Hugging Face fallback.

    Args:
    - combined_df: DataFrame containing news/tweets
    - text_column: Column containing text to analyze
    - top_n: Number of top rows to analyze
    - output_file: CSV file to save results

    Returns:
    - DataFrame with sentiment text and sentiment score
    """
    df_copy = combined_df.copy()
    top_n = min(top_n, len(df_copy))

    sentiments = []
    scores = []

    quota_warning_printed = False  # To print quota message only once

    for idx, row in df_copy.loc[:top_n-1].iterrows():
        text = row[text_column]
        sentiment_text = "NEUTRAL"
        sentiment_score = 0

        if not text or pd.isna(text):
            sentiments.append("NEUTRAL")
            scores.append(0)
            continue

        # Try OpenAI GPT
        try:
            prompt = f"Classify the sentiment of the following text as POSITIVE, NEGATIVE, or NEUTRAL:\n{text}"
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role":"user","content":prompt}]
            )
            sentiment_text = response.choices[0].message.content.strip().upper()
        except Exception as e:
            # On quota/rate limit, fallback to Hugging Face
            error_msg = str(e)
            if ("RateLimitError" in error_msg or "insufficient_quota" in error_msg) and not quota_warning_printed:
                print("⚠️ OpenAI quota or rate limit hit! Using Hugging Face fallback for further indexes...")
                quota_warning_printed = True
            try:
                hf_result = hf_sentiment_model(text[:512])[0]
                sentiment_text = hf_result['label'].upper()
            except:
                sentiment_text = "NEUTRAL"

        # Map to score
        if "POSITIVE" in sentiment_text:
            sentiment_score = 1
        elif "NEGATIVE" in sentiment_text:
            sentiment_score = -1
        else:
            sentiment_score = 0

        sentiments.append(sentiment_text)
        scores.append(sentiment_score)
        time.sleep(1)  # Sleep 1 sec to reduce OpenAI rate-limit errors

    df_copy.loc[:top_n-1, 'sentiment_gpt'] = sentiments
    df_copy.loc[:top_n-1, 'sentiment_score_gpt'] = scores

    df_copy.to_csv(output_file, index=False)
    print(f"✅ Sentiment analysis results saved to {output_file}")

    return df_copy

# -----------------------------
# Example usage
# -----------------------------
# Assume combined_df is already fetched
top50_sentiment_df = analyze_top_news_sentiment(combined_df, top_n=50)
print(top50_sentiment_df[['description','sentiment_gpt','sentiment_score_gpt']].head(10))


Device set to use cpu


✅ Sentiment analysis results saved to news_sentiment.csv
                                         description sentiment_gpt  \
0  Part three of the interview series covers Sams...      POSITIVE   
1  Juluke will miss Florida's next three games ag...      NEGATIVE   
2  Last week was the 24th year anniversary of the...      NEGATIVE   
3  Microdramas have rapidly evolved from experime...      POSITIVE   
4  Micro-drama revenues in China are set to surpa...      POSITIVE   
5  A Florida football assistant coach has been su...      NEGATIVE   
6  Cybersecurity startup Airia LLC says it has th...      NEGATIVE   
7         Another chapter in the AI pricing playbook      NEGATIVE   
8  Florida suspended assistant coach Jabbar Juluk...      NEGATIVE   
9  Using Multi-Agent Collaboration with Evolution...      POSITIVE   

   sentiment_score_gpt  
0                  1.0  
1                 -1.0  
2                 -1.0  
3                  1.0  
4                  1.0  
5                 -1.0