# Gnews Api

In [4]:
import os
import json
import urllib.request
from urllib.parse import quote
from urllib.error import HTTPError, URLError
from datetime import datetime
from typing import Optional, List


class GNewsDownloader:
    def __init__(self,
                 api_key: Optional[str] = None,
                 query: Optional[str] = None,
                 lang: str = "en",
                 country: Optional[str] = None,
                 start_date: Optional[str] = None,
                 end_date: Optional[str] = None,
                 max_results: int = 10,
                 workdir: Optional[str] = None):
        """
        初始化 GNewsDownloader
        """
        self.api_key = api_key or os.getenv("GNEWS_API_KEY")
        if not self.api_key:
            raise ValueError("API key is required. Please set it via parameter or GNEWS_API_KEY environment variable.")
        
        self.query = query
        if not self.query:
            raise ValueError("Query parameter is required.")
        
        self.lang = lang
        self.country = country
        self.start_date = start_date
        self.end_date = end_date
        self.max_results = max_results

        # 工作目录
        self.workdir = workdir or "./gnews_data"
        if not os.path.exists(self.workdir):
            os.makedirs(self.workdir)
        if not os.access(self.workdir, os.W_OK):
            raise ValueError(f"Work directory {self.workdir} is not writable.")

        # 构建URL
        self.base_url = "https://gnews.io/api/v4/search"
        self.request_url = self._build_request_url()

    def _build_request_url(self) -> str:
        """
        构建请求 URL，包含查询参数、语言、国家和时间区间。
        """
        url = f"{self.base_url}?q={quote(self.query)}&lang={self.lang}&max={self.max_results}&apikey={self.api_key}"
        if self.country:
            url += f"&country={self.country}"
        if self.start_date:
            url += f"&from={self.start_date}"
        if self.end_date:
            url += f"&to={self.end_date}"
        return url

    def fetch_news(self) -> List[dict]:
        """
        从 GNews API 获取新闻数据
        """
        print(f"Fetching news from: {self.request_url}")
        try:
            with urllib.request.urlopen(self.request_url) as response:
                data = json.loads(response.read().decode("utf-8"))
                return data.get("articles", [])
        except HTTPError as e:
            print(f"HTTP Error: {e.code} - {e.reason}")
        except URLError as e:
            print(f"URL Error: {e.reason}")
        except Exception as e:
            print(f"Unexpected error: {e}")
        return []

    def save_news(self, articles: List[dict]) -> bool:
        """
        保存新闻数据到本地 JSON 文件
        """
        if not articles:
            print("No articles to save.")
            return False

        timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
        file_path = os.path.join(self.workdir, f"news_{timestamp}.json")

        try:
            with open(file_path, "w", encoding="utf-8") as file:
                json.dump(articles, file, ensure_ascii=False, indent=4)
            print(f"Saved {len(articles)} articles to {file_path}")
            return True
        except Exception as e:
            print(f"Error saving articles: {e}")
            return False

    def run(self):
        """
        主流程：获取新闻并保存到本地
        """
        articles = self.fetch_news()
        if articles:
            for article in articles:
                print(f"Title: {article['title']}")
                print(f"Description: {article['description']}")
                print(f"Published At: {article['publishedAt']}")
                print(f"Source: {article['source']['name']}")
                print(f"URL: {article['url']}")
                print("-" * 80)
            if self.save_news(articles):
                print("Articles saved successfully.")
            else:
                print("Failed to save articles.")
        else:
            print("No articles fetched.")


# 测试代码
if __name__ == "__main__":
    API_KEY = "a91fd1a70f894c56afe397fc0ac1c074"

    gnews_downloader = GNewsDownloader(
        api_key=API_KEY,
        query="Google Pixel",
        lang="en",
        country="us",
        start_date="2022-09-01",  # 开始日期
        end_date="2022-09-30",    # 结束日期
        max_results=5,
        workdir="./gnews_data"
    )

    gnews_downloader.run()

Fetching news from: https://gnews.io/api/v4/search?q=Google%20Pixel&lang=en&max=5&apikey=a91fd1a70f894c56afe397fc0ac1c074&country=us&from=2022-09-01&to=2022-09-30
Title: Samsung Galaxy S25 Ultra vs Google Pixel 9 Pro XL: A friendly but heated rivalry
Description: Here's a look ahead at Google's recently announced Pixel 9 Pro XL and how it might compare to the more distant Galaxy S25 Ultra from Samsung.
Published At: 2025-01-22T18:00:16Z
Source: PhoneArena
URL: https://www.phonearena.com/reviews/galaxy-s25-ultra-vs-pixel-9-pro-xl_id6440
--------------------------------------------------------------------------------
Title: Gemini gets multi-action commands, Astra on Pixel & Galaxy S25
Description: As part of Samsung’s Galaxy S25 launch, Google announced the latest Android, Circle to Search, and Gemini app features...
Published At: 2025-01-22T18:00:00Z
Source: 9to5Google
URL: https://9to5google.com/2025/01/22/gemini-samsung-galaxy-s25-launch/
---------------------------------------------

  timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")


# Google News Api

In [7]:
import os
import json
import requests
from datetime import datetime, timezone


class GoogleNewsDownloader:
    def __init__(self,
                 api_key: str,
                 query: str,
                 max_results: int = 10,
                 start_date: str = None,
                 end_date: str = None,
                 cache_dir: str = "./cache"):
        """
        初始化 GoogleNewsDownloader
        """
        self.api_key = api_key
        self.query = query
        self.max_results = max_results
        self.start_date = start_date  # 格式: MM/DD/YYYY
        self.end_date = end_date  # 格式: MM/DD/YYYY
        self.cache_dir = cache_dir

        # API 基础 URL
        self.base_url = "https://www.searchapi.io/api/v1/search"

        # 确保缓存目录存在
        if not os.path.exists(self.cache_dir):
            os.makedirs(self.cache_dir)
        if not os.access(self.cache_dir, os.W_OK):
            raise ValueError(f"Cache directory {self.cache_dir} is not writable.")

    def _get_cache_path(self) -> str:
        """
        根据查询生成缓存文件路径
        """
        timestamp = datetime.now(timezone.utc).strftime("%Y%m%d")
        filename = f"{self.query.replace(' ', '_')}_{timestamp}.json"
        return os.path.join(self.cache_dir, filename)

    def fetch_news(self) -> dict:
        """
        从 Google News API 获取新闻数据
        """
        params = {
            "engine": "google_news",
            "q": self.query,
            "num": self.max_results,
            "api_key": self.api_key
        }

        # 如果设置了时间范围，加入到参数中
        if self.start_date:
            params["time_period_min"] = self.start_date
        if self.end_date:
            params["time_period_max"] = self.end_date

        print(f"Fetching news from: {self.base_url}")
        try:
            response = requests.get(self.base_url, params=params, timeout=10)
            if response.status_code == 200:
                print("Request successful!")
                return response.json()
            else:
                print(f"Request failed with status code: {response.status_code}")
        except requests.exceptions.RequestException as e:
            print(f"An error occurred during the request: {e}")
        return None

    def save_to_cache(self, data: dict):
        """
        将新闻数据保存到缓存文件
        """
        cache_path = self._get_cache_path()
        try:
            with open(cache_path, "w", encoding="utf-8") as file:
                json.dump(data, file, ensure_ascii=False, indent=4)
            print(f"Saved news data to cache: {cache_path}")
        except Exception as e:
            print(f"Failed to save cache: {e}")

    def load_from_cache(self) -> dict:
        """
        从缓存文件加载新闻数据
        """
        cache_path = self._get_cache_path()
        if os.path.exists(cache_path):
            print(f"Using cached data from: {cache_path}")
            try:
                with open(cache_path, "r", encoding="utf-8") as file:
                    return json.load(file)
            except Exception as e:
                print(f"Failed to load cache: {e}")
        return None

    def process_news(self, news_data: dict):
        """
        处理并打印新闻数据
        """
        if not news_data or "organic_results" not in news_data:
            print("No news data available.")
            return

        for article in news_data["organic_results"]:
            print(f"Title: {article.get('title')}")
            print(f"Source: {article.get('source')}")
            print(f"Date: {article.get('date')}")
            print(f"URL: {article.get('link')}")
            print("-" * 80)

    def run(self):
        """
        主流程：尝试从缓存加载数据，若缓存不存在则从 API 获取
        """
        # 尝试从缓存加载数据
        news_data = self.load_from_cache()
        if not news_data:
            # 如果缓存不可用，则从 API 获取数据
            news_data = self.fetch_news()
            if news_data:
                # 保存数据到缓存
                self.save_to_cache(news_data)

        # 处理新闻数据
        self.process_news(news_data)


# 测试代码
if __name__ == "__main__":
    # 替换为你的 Google News API 密钥
    API_KEY = "jTKr1YDvcC2PBUAtFmSRsk7q"

    # 初始化下载器
    downloader = GoogleNewsDownloader(
        api_key=API_KEY,
        query="Jeff Bezos news",  # 查询关键字
        max_results=5,  # 每次请求的最大新闻数量
        start_date="01/01/2023",  # 搜索的开始时间（格式: MM/DD/YYYY）
        end_date="12/31/2023",  # 搜索的结束时间（格式: MM/DD/YYYY）
        cache_dir="./cache"  # 缓存目录
    )

    # 运行下载器
    downloader.run()

Fetching news from: https://www.searchapi.io/api/v1/search
Request successful!
Saved news data to cache: ./cache\Jeff_Bezos_news_20250123.json
Title: A Decade Ago, Jeff Bezos Bought a Newspaper. Now He’s Paying Attention to It Again. (Published 2023)
Source: The New York Times
Date: Jul 22, 2023
URL: https://www.nytimes.com/2023/07/22/business/media/jeff-bezos-washington-post.html
--------------------------------------------------------------------------------
Title: Amazon recreated the garage where Jeff Bezos started the company in 1994. Here's what it looks like.
Source: AboutAmazon.com
Date: Oct 18, 2023
URL: https://www.aboutamazon.com/news/workplace/first-amazon-office-jeff-bezos-garage
--------------------------------------------------------------------------------
Title: Exclusive | Billionaire Jeff Bezos engaged to Lauren Sánchez after nearly 5 years together
Source: Page Six
Date: May 22, 2023
URL: https://pagesix.com/2023/05/22/jeff-bezos-engaged-to-lauren-sanchez-after-near

# Newsapi

In [9]:
import os
import json
import urllib.request
from urllib.parse import quote
from urllib.error import HTTPError, URLError
from datetime import datetime
from typing import Optional, List


class NewsAPIDownloader:
    def __init__(self,
                 api_key: Optional[str] = None,
                 query: Optional[str] = None,
                 lang: str = "en",
                 from_date: Optional[str] = None,
                 to_date: Optional[str] = None,
                 sort_by: str = "publishedAt",
                 max_results: int = 10,
                 workdir: Optional[str] = None):
        """
        初始化 NewsAPIDownloader
        """
        self.api_key = api_key or os.getenv("NEWS_API_KEY")
        if not self.api_key:
            raise ValueError("API key is required. Please set it via parameter or NEWS_API_KEY environment variable.")
        
        self.query = query
        if not self.query:
            raise ValueError("Query parameter is required.")
        
        self.lang = lang
        self.from_date = from_date
        self.to_date = to_date
        self.sort_by = sort_by
        self.max_results = max_results

        # 工作目录
        self.workdir = workdir or "./news_data"
        if not os.path.exists(self.workdir):
            os.makedirs(self.workdir)
        if not os.access(self.workdir, os.W_OK):
            raise ValueError(f"Work directory {self.workdir} is not writable.")

        # 构建URL
        self.base_url = "https://newsapi.org/v2/everything"
        self.request_url = self._build_request_url()

    def _build_request_url(self) -> str:
        """
        构建请求 URL，包含查询参数、语言和时间区间。
        """
        url = f"{self.base_url}?q={quote(self.query)}&language={self.lang}&sortBy={self.sort_by}&pageSize={self.max_results}&apiKey={self.api_key}"
        if self.from_date:
            url += f"&from={self.from_date}"
        if self.to_date:
            url += f"&to={self.to_date}"
        return url

    def fetch_news(self) -> List[dict]:
        """
        从 News API 获取新闻数据
        """
        print(f"Fetching news from: {self.request_url}")
        try:
            with urllib.request.urlopen(self.request_url) as response:
                data = json.loads(response.read().decode("utf-8"))
                if data.get("status") == "ok":
                    return data.get("articles", [])
                else:
                    print(f"Error in response: {data.get('message')}")
        except HTTPError as e:
            print(f"HTTP Error: {e.code} - {e.reason}")
        except URLError as e:
            print(f"URL Error: {e.reason}")
        except Exception as e:
            print(f"Unexpected error: {e}")
        return []

    def save_news(self, articles: List[dict]) -> bool:
        """
        保存新闻数据到本地 JSON 文件
        """
        if not articles:
            print("No articles to save.")
            return False

        timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
        file_path = os.path.join(self.workdir, f"news_{timestamp}.json")

        try:
            with open(file_path, "w", encoding="utf-8") as file:
                json.dump(articles, file, ensure_ascii=False, indent=4)
            print(f"Saved {len(articles)} articles to {file_path}")
            return True
        except Exception as e:
            print(f"Error saving articles: {e}")
            return False

    def run(self):
        """
        主流程：获取新闻并保存到本地
        """
        articles = self.fetch_news()
        if articles:
            for article in articles:
                print(f"Title: {article['title']}")
                print(f"Description: {article['description']}")
                print(f"Published At: {article['publishedAt']}")
                print(f"Source: {article['source']['name']}")
                print(f"URL: {article['url']}")
                print("-" * 80)
            if self.save_news(articles):
                print("Articles saved successfully.")
            else:
                print("Failed to save articles.")
        else:
            print("No articles fetched.")


# 测试代码
if __name__ == "__main__":
    API_KEY = "9dfa4228ed7f4f7e8522d1ef7d9fb6b2"

    news_downloader = NewsAPIDownloader(
        api_key=API_KEY,
        query="Google Pixel",
        lang="en",
        from_date="2025-01-01",  # 开始日期
        to_date="2025-01-23",    # 结束日期
        sort_by="popularity",    # 排序方式
        max_results=5,           # 最大结果数
        workdir="./news_data"    # 工作目录
    )

    news_downloader.run()

Fetching news from: https://newsapi.org/v2/everything?q=Google%20Pixel&language=en&sortBy=popularity&pageSize=5&apiKey=9dfa4228ed7f4f7e8522d1ef7d9fb6b2&from=2025-01-01&to=2025-01-23
Title: Google Pixel 4a's update kills its battery life on purpose
Description: Google’s Pixel 4a has long been considered a great smartphone for those on a budget
, but it just received a software update that calls that into question. The update lowers the reported battery life
. This isn’t a side-effect of some new software. This is …
Published At: 2025-01-08T16:45:32Z
Source: Yahoo Entertainment
URL: https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_e1bb03c5-a7c3-47a0-b6ee-1f32e8153ea0
--------------------------------------------------------------------------------
Title: Hisense introduces its first-ever consumer microLED TV
Description: Hisense just introduced its first consumer microLED television at CES 2025 in Las Vegas. The 136MX includes a high-density array of over 24.88 microscop

  timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
