In [7]:
!pip install requests
!pip install beautifulsoup4




In [9]:
!pip install googletrans==4.0.0-rc1
!pip install googlesearch-python




In [11]:
!pip install nltk




In [19]:
nltk.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [21]:
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime
from googletrans import Translator
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import time
import googlesearch

In [23]:
# Initialize Translator and Sentiment Analyzer
translator = Translator()
sia = SentimentIntensityAnalyzer()

# Function to scrape PIB website for headlines and links
def scrape_pib_headlines():
    url = "https://pib.gov.in/allRel.aspx?reg=3&lang=1"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    headlines_links = []
    
    # Scrape headlines and links
    for item in soup.find_all('div', class_='contentheading'):
        headline = item.text.strip()
        link = "https://pib.gov.in/" + item.find('a')['href']
        headlines_links.append((headline, link))
    
    return headlines_links

In [None]:
# Function to perform Google search for related articles
def search_related_news(headline):
    query = headline + " site:in"
    search_results = googlesearch.search(query, num_results=5)
    return search_results


In [None]:
# Function to translate text to English
def translate_to_english(text):
    try:
        translated = translator.translate(text, dest='en')
        return translated.text
    except Exception as e:
        print(f"Error translating text: {e}")
        return text  # Return original text if translation fails

In [None]:
# Function to perform sentiment analysis
def analyze_sentiment(text):
    sentiment_score = sia.polarity_scores(text)['compound']
    if sentiment_score >= 0.05:
        return "Positive"
    elif sentiment_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

In [None]:
# Function to scrape and analyze individual news articles
def scrape_and_analyze_article(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            paragraphs = [p.get_text(strip=True) for p in soup.find_all('p')]
            text = ' '.join(paragraphs)
            
            if not text:
                return None, None  # No text found
            
            # Translate the text to English if necessary
            translated_text = translate_to_english(text)
            sentiment = analyze_sentiment(translated_text)
            
            return sentiment, translated_text
        else:
            return None, None
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None, None

In [None]:
# Function to check for negative sentiment in related news
def analyze_news_sentiment(headlines_links):
    results = []
    
    for headline, pib_url in headlines_links:
        print(f"Analyzing PIB headline: {headline}")
        pib_sentiment = analyze_sentiment(headline)
        
        if pib_sentiment == "Positive":
            # Search related news for the PIB headline
            related_news = search_related_news(headline)
            negative_news_found = False
            
            for news_url in related_news:
                print(f"Scraping related news: {news_url}")
                sentiment, translated_text = scrape_and_analyze_article(news_url)
                
                if sentiment == "Negative":
                    # Highlight negative news for positive PIB news
                    results.append([headline, pib_url, news_url, translated_text, sentiment])
                    negative_news_found = True
            
            if not negative_news_found:
                print(f"No negative sentiment found for: {headline}")
    
    return results

In [None]:
# Function to store the results in a CSV file
def store_results_in_csv(results, filename="negative_news.csv"):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['PIB Headline', 'PIB URL', 'Related News URL', 'Translated Text', 'Sentiment'])
        
        for result in results:
            writer.writerow(result)

In [None]:
# Main function to automate the process
def main():
    while True:
        print("Starting the automated news sentiment analysis system...")

        # Scrape PIB headlines and links
        headlines_links = scrape_pib_headlines()
        
        # Analyze the news sentiment and gather results
        results = analyze_news_sentiment(headlines_links)
        
        # Store the results in a CSV file
        if results:
            store_results_in_csv(results)
            print(f"Stored {len(results)} negative sentiment results.")
        else:
            print("No negative sentiment results found.")
        
        # Automate to run every 24 hours
        print("Waiting for 24 hours before the next run...")
        time.sleep(86400)  # Wait for 24 hours (86400 seconds)

if __name__ == "__main__":
    main()