<a href="https://colab.research.google.com/github/Lean-IQ/Lean-IQ_02/blob/main/NewsAnalyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install feedparser gdown langdetect googletrans vaderSentiment

In [None]:
# NEWS EXTRACT - HEADLINES AND LINKS BASED ON KEYWORD LIST, ALL NORMALIZED IN ENGLISH, INCLUDING SENTIMENT ANALYSIS #

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import feedparser
import csv
from datetime import datetime, timedelta
from google.colab import drive
import gdown
import os

# Mount Google Drive
drive.mount('/content/drive')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

def download_keywords_csv(file_path, output_path):
    # Check if the file exists
    if os.path.exists(file_path):
        return file_path
    else:
        print("File does not exist.")
        return None

def extract_titles_and_links_with_date(rss_url):
    try:
        # Parse the RSS feed
        feed = feedparser.parse(rss_url)

        # Extract titles, links, and dates
        titles_links_dates = [(entry.title, entry.link, entry.published) for entry in feed.entries]

        return titles_links_dates
    except Exception as e:
        print(f"Error: {e}")
        return None

def save_to_csv(data, csv_file_path):
    try:
        # Open the CSV file in append mode
        with open(csv_file_path, 'a', newline='', encoding='utf-8') as csv_file:
            # Create a CSV writer
            csv_writer = csv.writer(csv_file)

            # Write data to CSV
            csv_writer.writerows(data)

        print(f"Data successfully appended to: {csv_file_path}")
    except Exception as e:
        print(f"Error appending to CSV: {e}")

def main():
    # Specify the path to the keywords CSV file
    keywords_file_path = '/content/drive/My Drive/DATA/keyword.csv'

    # Specify the desired CSV file path in Google Drive for saving headlines
    csv_file_path = 'ENTER THE FILE PATH HERE'

    # Check if the keywords file exists
    keywords_file_path = download_keywords_csv(keywords_file_path, '/content/keyword.csv')

    # Load keywords from the downloaded CSV file
    if keywords_file_path:
        with open(keywords_file_path, 'r') as keywords_file:
            reader = csv.reader(keywords_file)
            next(reader)  # Skip the header
            keywords = [row[0] for row in reader]

        # Write headers to the CSV file
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(['Keyword', 'Title', 'Date', 'Link', 'Sentiment Polarity', 'Sentiment Compound'])

        # Loop through each keyword
        for keyword in keywords:
            # Replace spaces with '+' in the keyword for the RSS feed URL
            keyword_for_url = keyword.replace(' ', '+')

            # Construct the RSS feed URL
            rss_feed_url = f'https://news.google.com/rss/search?q={keyword_for_url}'

            # Call the function to extract titles, links, and dates
            result = extract_titles_and_links_with_date(rss_feed_url)

            if result:
                # Filter entries that are older than 24 hours
                current_time = datetime.now()
                filtered_result = [(title, link, date) for title, link, date in result if (current_time - datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %Z")).total_seconds() <= 86400]

                # Prepare data for CSV with formatted date and sentiment analysis
                data_for_csv = []
                for title, link, date in filtered_result:
                    sentiment_scores = analyzer.polarity_scores(title.replace('.', ','))
                    polarity_score = f"{sentiment_scores['pos'] - sentiment_scores['neg']:.2f}".replace('.', ',')
                    compound_score = f"{sentiment_scores['compound']:.2f}".replace('.', ',')
                    data_for_csv.append([keyword, title, datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %Z").strftime("%d.%m.%Y"), link, polarity_score, compound_score])

                # Save data to CSV
                save_to_csv(data_for_csv, csv_file_path)
            else:
                print(f"Extraction failed for keyword: {keyword}")

if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Drive/DATA/results_sentiment.csv
Data successfully appended to: /content/drive/My Dr