In [3]:
import requests
import pandas as pd
import logging

# Initialize logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


def get_news(api_key, query="Nifty", language="en",
             country="in", start_date=None, end_date=None):
    """
    Fetches relevant news articles using the Newsdata API.

    Args:
        api_key (str): Your Newsdata API key.
        query (str, optional): The search query. Defaults to "Nifty".
        language (str, optional): The language of the articles. Defaults to "en".
        country (str, optional): The country code (ISO 3166-1 alpha-2) to filter by. Defaults to "in" (India).
        start_date (str, optional): The start date in YYYY-MM-DD format.
        end_date (str, optional): The end date in YYYY-MM-DD format.

    Returns:
        pd.DataFrame: A DataFrame containing the fetched news data (title, description, url, publishedAt),
                      or None if an error occurs.
    """

    url = "https://api.newsdata.io/v1/news"
    params = {
        "apikey": api_key,
        "q": query,
        "language": language,
        "country": country,
    }

    if start_date and end_date:
        try:
            # Validate date format (YYYY-MM-DD)
            from datetime import datetime
            datetime.strptime(start_date, "%Y-%m-%d")
            datetime.strptime(end_date, "%Y-%m-%d")
            params["from"] = start_date
            params["to"] = end_date
        except ValueError:
            logger.error("Invalid start_date or end_date format. Use YYYY-MM-DD.")
            return None

    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise exception for non-200 status codes

        data = response.json()

        if data["status"] == "ok":
            articles = data["results"]

            # Extract title, description, url, and publishedAt
            news_data = []
            for article in articles:
                title = article["title"]
                description = article["description"] if "description" in article else ""
                url = article["link"]
                publishedAt = article["pubDate"]
                news_data.append({"title": title, "description": description, "url": url, "publishedAt": publishedAt})

            return pd.DataFrame(news_data)
        else:
            logger.error(f"API error: {data['message']}")
            return None

    except requests.exceptions.RequestException as e:
        logger.error(f"An error occurred while fetching news: {e}")
        return None


def generate_news_file(api_key, start_date, end_date, output_format="csv"):
    """
    Generates a file containing news headlines for a specified date range.

    Args:
        api_key (str): Your Newsdata API key.
        start_date (str): The start date in YYYY-MM-DD format.
        end_date (str): The end date in YYYY-MM-DD format.
        output_format (str, optional): The desired output format (csv, json). Defaults to "csv".
    """

    news_data = get_news(api_key, start_date=start_date, end_date=end_date)

    if news_data is not None:
        if output_format == "csv":
            news_data.to_csv("news_data24aug.csv", index=False)
        elif output_format == "json":
            news_data.to_json("news_data.json", orient="records")
        else:
            logger.error(f"Invalid output format: {output_format}")
    else:
        logger.error("Error: Failed to retrieve news data.")


# Example usage (replace with your actual Newsdata API key)
api_key = "d1301d016d024dcb819e57ade041fe67"  # Replace with your Newsdata API key
start_date = "2024-08-24"
end_date = "2024-08-24"
generate_news_file(api_key, start_date, end_date)

2024-09-22 16:25:10,782 - ERROR - An error occurred while fetching news: HTTPSConnectionPool(host='api.newsdata.io', port=443): Max retries exceeded with url: /v1/news?apikey=d1301d016d024dcb819e57ade041fe67&q=Nifty&language=en&country=in&from=2024-08-24&to=2024-08-24 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002384D419A30>: Failed to resolve 'api.newsdata.io' ([Errno 11001] getaddrinfo failed)"))
2024-09-22 16:25:10,783 - ERROR - Error: Failed to retrieve news data.


In [2]:
%pip install newsapi-python

Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Downloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.7
Note: you may need to restart the kernel to use updated packages.


In [5]:
%pip install requests panda logger

Collecting logger
  Downloading logger-1.4.tar.gz (1.2 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: logger
  Building wheel for logger (setup.py): started
  Building wheel for logger (setup.py): finished with status 'done'
  Created wheel for logger: filename=logger-1.4-py3-none-any.whl size=1762 sha256=299d35a23c72334671fbc8860c3ea64447462d05f8f792a2b25831f4d602c6ce
  Stored in directory: c:\users\asus\appdata\local\pip\cache\wheels\cc\ef\15\aadfb106e1cc7ac1d668efc189bcd98c444211847f7d91bd02
Successfully built logger
Installing collected packages: logger
Successfully installed logger-1.4
Note: you may need to restart the kernel to use updated packages.
