In [3]:
import feedparser
import pandas as pd
from urllib.parse import quote
from newspaper import Article

def get_article_text(url):
    try:
        article = Article(url, language='en')  # Specify 'en' for English
        article.download()
        article.parse()
        article.download(input_html=article.html)
        article.parse()
        return article.text
    except Exception as e:
        print(f"Error downloading or parsing article content: {e}")
        return None

def get_news(search_term, rss_feed_url, max_results=10):
    encoded_search_term = quote(search_term)
    base_url = f"{rss_feed_url}&q={encoded_search_term}"

    # Fetching the RSS feed
    feed = feedparser.parse(base_url)

    data = []
    for i, entry in enumerate(feed.entries):
        title = entry.title
        date_published = entry.published
        link = entry.link
        content = get_article_text(link)
        print("title", title)
        print("link", link)
        if content is not None:
            data.append({'Title': title, 'Date Published': date_published, 'Link': link, 'Text': content})

            # Save to the same CSV file in append mode
            save_to_csv(data, 'news_14_2_3.csv', mode='a')

            # Clear the data list after saving
            data = []

            # Break the loop if the maximum number of results is reached
            if i + 1 == max_results:
                break

    return data

def save_to_csv(data, filename, mode='w'):
    if not data:
        print("No relevant articles found.")
        return

    df = pd.DataFrame(data)
    df.to_csv(filename, mode=mode, index=False, header=(mode == 'w'))
    print(f'Data saved to {filename}')

if __name__ == "__main__":
    search_term = "Iran Pakistan war"  # Set your desired search term here
    rss_feed_url = "https://news.google.com/rss/search?"  # Replace with the desired RSS feed URL
    max_results = 999999  # Set the maximum number of results to save
    news_data = get_news(search_term, rss_feed_url, max_results)


title Pakistan-Iran border tensions: A timeline - Al Jazeera English
link https://news.google.com/rss/articles/CBMiTGh0dHBzOi8vd3d3LmFsamF6ZWVyYS5jb20vbmV3cy8yMDI0LzEvMTcvYS1yZWNhcC1vZi12aW9sZW5jZS1wYWstaXJhbi1ib3JkZXLSAVBodHRwczovL3d3dy5hbGphemVlcmEuY29tL2FtcC9uZXdzLzIwMjQvMS8xNy9hLXJlY2FwLW9mLXZpb2xlbmNlLXBhay1pcmFuLWJvcmRlcg?oc=5
Data saved to news_14_2_3.csv
title Making Sense of Iran-Pakistan Cross-Border Strikes - United States Institute of Peace
link https://news.google.com/rss/articles/CBMiWWh0dHBzOi8vd3d3LnVzaXAub3JnL3B1YmxpY2F0aW9ucy8yMDI0LzAxL21ha2luZy1zZW5zZS1pcmFuLXBha2lzdGFuLWNyb3NzLWJvcmRlci1zdHJpa2Vz0gEA?oc=5
Data saved to news_14_2_3.csv
title Pakistan Iran War News Live: 'Iran should have sought assistance from Pakistan instead of taking action unilaterally' - Times of India
link https://news.google.com/rss/articles/CBMioQFodHRwczovL3RpbWVzb2ZpbmRpYS5pbmRpYXRpbWVzLmNvbS93b3JsZC9wYWtpc3Rhbi9wYWtpc3Rhbi1pcmFuLXdhci1uZXdzLWxpdmUtdXBkYXRlcy1pcmFuLXN0cmlrZXMtcGFraXN0YW4tbW