<a href="https://colab.research.google.com/github/MuskanTiwari12/Sentimental-Analysis-Project/blob/main/Task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
from google.colab import drive
drive.mount('/content/drive')
task2_folder = "/content/drive/MyDrive/Task2.ipynb"
!mkdir -p "{task2_folder}"  # Create folder if it doesn't exist


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Market & Sentiment Analysis Engine

1>Install Required Libraries

In [46]:
# Core data handling and numerical operations
!pip install pandas       # For storing and manipulating news, tweets, and sentiment data
!pip install numpy        # For numerical operations, calculations, and array handling
!pip install requests     # For making HTTP requests to APIs (e.g., fetching news or market data)
!pip install python-dotenv # To securely load API keys from environment variables (safer than hardcoding)
# News & social media data
!pip install newsapi-python # To fetch structured news articles from NewsAPI
!pip install tweepy         # To access Twitter API and collect tweets for sentiment analysis
# Large Language Models for sentiment analysis
!pip install openai        # To analyze text sentiment using OpenAI GPT models
!pip install transformers  # To use HuggingFace models like LLaMA for sentiment or text analysis
!pip install google-generativeai  # To analyze text or news sentiment using Google Generative AI (PaLM/Bard)



2>Import Required Libraries

In [47]:
import pandas as pd
import numpy as np
import requests
from newsapi import NewsApiClient
import tweepy
from datetime import datetime
import openai             # OpenAI GPT
import google.generativeai as genai  # Google Generative AI
from transformers import pipeline     # HuggingFace models
import os
from datetime import datetime

3>Load All API Keys from Colab Secret Manage

In [48]:
from google.colab import userdata

# Load secrets from Colab Secret Manager
HUGGINGFACE_API_KEY = userdata.get('HUGGINGFACE_API_KEY')   # For HuggingFace models (like LLaMA)
NEWSAPI_KEY = userdata.get('NEWS_API_KEY')                  # For NewsAPI
TWITTER_BEARER = userdata.get('TWITTER_BEARER')             # For Twitter API v2
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')             # For OpenAI GPT


4>Fetch News and Tweets

In [49]:
# -----------------------------
# Function: Fetch News + Tweets with Twitter rate-limit handling
# -----------------------------
def fetch_news_and_tweets(
    news_query="AI OR artificial intelligence",
    tweet_query="AI OR artificial intelligence -is:retweet lang:en",
    news_count=50,
    tweet_count=50,
    news_source="google-news"
):
    try:
        # 1. Fetch News from NewsAPI
        newsapi = NewsApiClient(api_key=NEWSAPI_KEY)
        articles = newsapi.get_everything(
            q=news_query,
            sources=news_source,
            language='en',
            sort_by='publishedAt',
            page_size=news_count
        )
        news_df = pd.DataFrame([{
            'description': a['description'],
            'publishedAt': a['publishedAt'],
            'source': a['source']['name'],
            'source_type':'news'
        } for a in articles['articles']])

        # 2. Fetch Tweets with rate-limit handling
        tweets_df = pd.DataFrame(columns=['description','publishedAt','source','source_type'])
        try:
            client = tweepy.Client(bearer_token=TWITTER_BEARER)
            tweets = client.search_recent_tweets(query=tweet_query, max_results=tweet_count)
            if tweets and tweets.data:
                tweets_df = pd.DataFrame([{
                    'description': t.text,
                    'publishedAt': t.created_at,
                    'source':'Twitter',
                    'source_type':'twitter'
                } for t in tweets.data])
        except tweepy.TooManyRequests:
            print("Twitter rate limit hit! Only NewsAPI data will be displayed.")

        # 3. Combine News + Tweets
        combined_df = pd.concat([news_df, tweets_df], ignore_index=True)
        combined_df['publishedAt'] = pd.to_datetime(combined_df['publishedAt'])
        combined_df.sort_values('publishedAt', inplace=True)
        combined_df.reset_index(drop=True, inplace=True)

        return combined_df

    except Exception as e:
        print(f"Error fetching news or tweets: {e}")
        return pd.DataFrame()

# -----------------------------
# Example usage
# -----------------------------
combined_df = fetch_news_and_tweets()
combined_df.head()

Twitter rate limit hit! Only NewsAPI data will be displayed.


Unnamed: 0,description,publishedAt,source,source_type
0,Hi!I made a Chrome extension that hides conten...,2025-08-23 07:59:52+00:00,Google News,news
1,Super Web Scraper is your all-in-one solution ...,2025-09-04 07:55:51+00:00,Google News,news


# Function for Sentiment Analysis Using OpenAI

In [50]:

# Load OpenAI API key
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
openai.api_key = OPENAI_API_KEY

# GPT Sentiment Analysis with robust error handling
def gpt_sentiment_analysis(df, text_column="description", top_n=None):
    try:
        df_copy = df.copy()
        if top_n is None:
            top_n = len(df_copy)

        def classify_sentiment(text):
            if not text:
                return "NEUTRAL"
            prompt = f"Classify the sentiment of the following text as POSITIVE, NEGATIVE, or NEUTRAL:\n{text}"
            try:
                response = openai.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": prompt}]
                )
                return response.choices[0].message.content.strip()
            except Exception as e:
                error_msg = str(e)
                if "RateLimitError" in error_msg or "insufficient_quota" in error_msg:
                    print("OpenAI quota exceeded! Skipping GPT sentiment analysis.")
                else:
                    print(f"Error analyzing text: {error_msg}")
                return None

        df_copy.loc[:top_n-1, 'sentiment_gpt'] = df_copy.loc[:top_n-1, text_column].apply(classify_sentiment)
        return df_copy

    except Exception as e:
        print(f"General error in GPT sentiment analysis: {e}")
        df_copy['sentiment_gpt'] = None
        return df_copy

# Example usage
df_with_sentiment = gpt_sentiment_analysis(combined_df, top_n=5)
df_with_sentiment[['description','sentiment_gpt']].head()


OpenAI quota exceeded! Skipping GPT sentiment analysis.
OpenAI quota exceeded! Skipping GPT sentiment analysis.


Unnamed: 0,description,sentiment_gpt
0,Hi!I made a Chrome extension that hides conten...,
1,Super Web Scraper is your all-in-one solution ...,
