<a href="https://colab.research.google.com/github/ROSHINITALLA26/infosys_internship/blob/main/task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install all required libraries at once
!pip install google-generativeai newsapi-python pandas

import pandas as pd
import google.generativeai as genai
from newsapi import NewsApiClient
from google.colab import userdata
import time
import requests
from bs4 import BeautifulSoup
import random

Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Downloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.7


In [2]:
# --- development mode ---
# set to True to run instantly with fake data and save your API quota.
# set to False to run the real analysis and use the Gemini API.
DEVELOPMENT_MODE = False

In [3]:
# securely load both API keys
try:
    news_api=userdata.get('news_api')
    gemini_api = userdata.get('gemini_api')

    # configure the Gemini client
    genai.configure(api_key=gemini_api)
    print("API keys loaded successfully.")
except Exception as e:
    print("API Key not found. Please add both keys to your Colab Secrets.")

API keys loaded successfully.


In [4]:
def fetch_news_data(api_key, search_query, num_articles=20):
    """Fetches a specified number of fresh news articles from NewsAPI."""
    print(f"\nFetching {num_articles} fresh articles for query: '{search_query}'...")
    try:
        newsapi = NewsApiClient(api_key=api_key)
        all_articles = newsapi.get_everything(q=search_query,
                                              language='en',
                                              sort_by='publishedAt', # Sort by newest
                                              page_size=num_articles)

        print(f"Successfully fetched {len(all_articles['articles'])} articles.")
        return all_articles['articles']
    except Exception as e:
        print(f"An error occurred while fetching news: {e}")
        return None

In [5]:
def fetch_news_from_rss(url, num_articles=50):
    """Fetches news headlines from a standard RSS feed."""
    print(f"\nFetching {num_articles} fresh articles from RSS feed...")
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "xml")
        articles = soup.find_all("item", limit=num_articles)

        # format the data to look like the NewsAPI data
        formatted_articles = []
        for article in articles:
            formatted_articles.append({
                'title': article.find("title").text,
                'source': {'name': 'Google News RSS'},
                'publishedAt': article.find("pubDate").text
            })
        print(f"Successfully fetched {len(formatted_articles)} articles from RSS.")
        return formatted_articles
    except Exception as e:
        print(f"An error occurred while fetching RSS feed: {e}")
        return []

In [6]:
def fetch_tweets(bearer_token, search_query, num_tweets=20):
    """Fetches recent tweets from the X (Twitter) API v2."""
    print(f"\nFetching {num_tweets} recent tweets for query: '{search_query}'...")
    search_url = "https://api.twitter.com/2/tweets/search/recent"

    # Format the request headers with your bearer token for authentication
    headers = {"Authorization": f"Bearer {bearer_token}"}

    # Set the parameters for the search
    params = {'query': search_query, 'max_results': num_tweets, 'tweet.fields': 'created_at'}

    try:
        response = requests.get(search_url, headers=headers, params=params)
        response.raise_for_status() # This will raise an error for bad responses
        tweets = response.json().get('data', [])

        # Format the data to match our other sources
        formatted_tweets = []
        for tweet in tweets:
            formatted_tweets.append({
                'title': tweet['text'],
                'source': 'Twitter',
                'publishedAt': tweet['created_at']
            })
        print(f"Successfully fetched {len(formatted_tweets)} tweets.")
        return formatted_tweets
    except Exception as e:
        print(f"An error occurred while fetching tweets: {e}")
        return []

In [7]:
def fetch_gnews_articles(api_key, search_query, num_articles=20):
    """Fetches news articles from the GNews API."""
    print(f"\nFetching {num_articles} articles from GNews for query: '{search_query}'...")
    try:
        url = f"https://gnews.io/api/v4/search?q={search_query}&lang=en&max={num_articles}&apikey={api_key}"
        response = requests.get(url)
        response.raise_for_status()
        articles = response.json().get('articles', [])

        # Format the data to match our other sources
        formatted_articles = []
        for article in articles:
            formatted_articles.append({
                'title': article['title'],
                'source': article['source']['name'],
                'publishedAt': article['publishedAt']
            })
        print(f"Successfully fetched {len(formatted_articles)} articles from GNews.")
        return formatted_articles
    except Exception as e:
        print(f"An error occurred while fetching GNews: {e}")
        return []

In [8]:
def get_sentiment_gemini(headline):
    """
    Analyzes the sentiment of a news headline using the Gemini API.
    Returns both a label ('Positive', 'Negative', 'Neutral') and a numerical score (-1.0 to 1.0).
    """
    try:
        model = genai.GenerativeModel('gemini-1.5-flash-latest')

        # --- NEW, MORE DETAILED PROMPT ---
        prompt = f"""Analyze the sentiment of the following news headline.
        Provide a sentiment label (Positive, Negative, or Neutral) and a sentiment score from -1.0 (most negative) to 1.0 (most positive).
        Respond in the following format exactly: "Label: [sentiment], Score: [score]"

        Headline: '{headline}'
        """

        response = model.generate_content(prompt)
        text_response = response.text.strip()

        # --- NEW PARSING LOGIC ---
        # Logic to extract the label and score from the response string
        if "Label:" in text_response and "Score:" in text_response:
            parts = text_response.split(',')
            sentiment_label = parts[0].replace("Label:", "").strip()
            sentiment_score_str = parts[1].replace("Score:", "").strip()
            sentiment_score = float(sentiment_score_str)
            return sentiment_label, sentiment_score
        else:
            # If the response is not in the expected format
            return "Could not determine", 0.0

    except Exception as e:
        print(f"An error occurred during analysis: {e}")
        return "Error", 0.0

In [10]:
# --- 1. Load ALL your API keys ---
try:
    NEWS_API_KEY = userdata.get('news_api')
    GEMINI_API_KEY = userdata.get('gemini_api')
    TWITTER_BEARER_TOKEN = userdata.get('bearer_token')
    GNEWS_API_KEY = userdata.get('GNEWS_API_KEY')

    genai.configure(api_key=GEMINI_API_KEY)
    print("All API keys loaded successfully.")
except Exception as e:
    print("API Key not found. Please add all four keys to your Colab Secrets.")

# --- 2. Fetch data from ALL sources ---
news_api_articles = fetch_news_data(news_api, search_query="Artificial Intelligence", num_articles=20)
rss_articles = fetch_news_from_rss(url="https://news.google.com/rss?hl=en-IN&gl=IN&ceid=IN:en", num_articles=20)
tweets = fetch_tweets(TWITTER_BEARER_TOKEN, search_query="Artificial Intelligence", num_tweets=20)
gnews_articles = fetch_gnews_articles(GNEWS_API_KEY, search_query="Artificial Intelligence", num_articles=20)

# --- 3. Combine all data into one master list ---
all_content = (news_api_articles or []) + (rss_articles or []) + (tweets or []) + (gnews_articles or [])
print(f"\nTotal content collected from all sources: {len(all_content)}")

# --- 4. Analyze the sentiment of the combined list ---
if all_content:
    results = []
    print("\nStarting real-time sentiment analysis with scoring...")

    sample_to_analyze = all_content[:25]

    for item in sample_to_analyze:
        headline = item['title']
        print(f"Analyzing: '{headline}'...")

        # --- MODIFIED: Get both label and score ---
        label, score = get_sentiment_gemini(headline)

        results.append({
            'title': headline,
            'source': item['source'],
            'published_at': item['publishedAt'],
            'sentiment_label': label, # New column for the label
            'sentiment_score': score   # New column for the score
        })
        time.sleep(4)

    # --- 5. Display and SAVE the final results to a CSV file ---
    print("\n--- Pipeline Complete: Real-time Results with Scores ---")
    results_df = pd.DataFrame(results)

    output_filename = 'final_sentiment_results_with_scores.csv'
    results_df.to_csv(output_filename, index=False, encoding='utf-8-sig')
    print(f"Results saved successfully to '{output_filename}'")

    display(results_df)

All API keys loaded successfully.

Fetching 20 fresh articles for query: 'Artificial Intelligence'...
Successfully fetched 20 articles.

Fetching 20 fresh articles from RSS feed...
Successfully fetched 20 articles from RSS.

Fetching 20 recent tweets for query: 'Artificial Intelligence'...
Successfully fetched 20 tweets.

Fetching 20 articles from GNews for query: 'Artificial Intelligence'...
An error occurred while fetching GNews: 403 Client Error: Forbidden for url: https://gnews.io/api/v4/search?q=Artificial%20Intelligence&lang=en&max=20&apikey=25945965f199bde6663d8b7eaec58566

Total content collected from all sources: 60

Starting real-time sentiment analysis with scoring...
Analyzing: 'Space, AI, And The Future Of Human Potential'...
Analyzing: '☕️ FC Breakfast: top 26 Germans in FC 26, wild video on Gigio'...
Analyzing: 'Albania’s prime minister wants to appoint an AI to his ministry'...
Analyzing: 'How AI and the Trump administration are fuelling ‘quiet quitting’ on IT sustainab

Unnamed: 0,title,source,published_at,sentiment_label,sentiment_score
0,"Space, AI, And The Future Of Human Potential","{'id': None, 'name': 'Forbes'}",2025-09-12T05:29:26Z,Positive,0.6
1,"☕️ FC Breakfast: top 26 Germans in FC 26, wild...","{'id': None, 'name': 'Onefootball.com'}",2025-09-12T05:25:00Z,Neutral,0.0
2,Albania’s prime minister wants to appoint an A...,"{'id': None, 'name': 'Biztoc.com'}",2025-09-12T05:24:55Z,Neutral,0.0
3,How AI and the Trump administration are fuelli...,"{'id': None, 'name': 'ComputerWeekly.com'}",2025-09-12T05:24:00Z,Negative,-0.6
4,Hut 8 (NASDAQ:HUT) Trading 7.4% Higher After A...,"{'id': None, 'name': 'ETF Daily News'}",2025-09-12T05:22:46Z,Positive,0.7
5,Short Interest in Themes Generative Artificial...,"{'id': None, 'name': 'ETF Daily News'}",2025-09-12T05:20:45Z,Positive,0.6
6,Analysts Set Expectations for Fluence Energy F...,"{'id': None, 'name': 'ETF Daily News'}",2025-09-12T05:19:01Z,Neutral,0.0
7,Seaport Res Ptn Lowers Earnings Estimates for ...,"{'id': None, 'name': 'ETF Daily News'}",2025-09-12T05:18:54Z,Negative,-0.6
8,Digital Realty Trust (NYSE:DLR) vs. Net Lease ...,"{'id': None, 'name': 'ETF Daily News'}",2025-09-12T05:16:48Z,Neutral,0.0
9,America’s Grid is Nearing Its Breaking Point,"{'id': None, 'name': 'Nakedcapitalism.com'}",2025-09-12T05:15:37Z,Negative,-0.8
