In [2]:
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
import time
import os
from typing import Dict, List, Optional

class GuardianRealMadridAPI:
    """
    Guardian API client to fetch Real Madrid news
    84,000 requests/week - 12,000 requests/day
    """
    
    def __init__(self, api_key: str):
        self.api_key = api_key

        self.base_url = "https://content.guardianapis.com"
        self.rate_limit_delay = 0.1  # 100ms between requests (safe for rate limits)
        
    def search_real_madrid_news(self, 
                               days_back: int = 7,
                               page_size: int = 50,
                               max_pages: int = 10) -> List[Dict]:
        """
        Search for Real Madrid news in The Guardian
        
        Args:
            days_back: Days back to search
            page_size: Articles per page (max 50)
            max_pages: Maximum pages to fetch
        """
        
        # Date from when to search
        from_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
        
        articles = []
        
        for page in range(1, max_pages + 1):
            print(f"📰 Fetching page {page}...")
            
            params = {
                'api-key': self.api_key,
                'q': 'Real Madrid OR "Real Madrid" OR "Santiago Bernabeu"',
                'section': 'football',  # Sports section
                'from-date': from_date,
                'page-size': page_size,
                'page': page,
                'show-fields': 'headline,byline,body,thumbnail,publication',
                'show-tags': 'sport',
                'order-by': 'newest'
            }
            
            try:
                response = requests.get(f"{self.base_url}/search", params=params)
                response.raise_for_status()
                
                data = response.json()
                page_articles = data['response']['results']
                
                if not page_articles:
                    print(f"✅ No more articles on page {page}")
                    break
                    
                articles.extend(page_articles)
                print(f"   📄 Found {len(page_articles)} articles")
                
                # Rate limiting - be respectful with the API
                time.sleep(self.rate_limit_delay)
                
            except requests.exceptions.RequestException as e:
                print(f"❌ Error on page {page}: {e}")
                break
                
        print(f"🎯 Total: {len(articles)} articles fetched")
        return articles
    
    def get_real_madrid_transfers(self, days_back: int = 30) -> List[Dict]:
        """
        Search specifically for Real Madrid transfer news
        """
        
        from_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
        
        params = {
            'api-key': self.api_key,
            'q': 'Real Madrid AND (transfer OR signing OR "new player" OR "signs for")',
            'section': 'football',
            'from-date': from_date,
            'page-size': 50,
            'show-fields': 'headline,byline,body,thumbnail,publication',
            'order-by': 'newest'
        }
        
        try:
            response = requests.get(f"{self.base_url}/search", params=params)
            response.raise_for_status()
            return response.json()['response']['results']
        except Exception as e:
            print(f"❌ Error fetching transfers: {e}")
            return []
    
    def get_match_results(self, days_back: int = 14) -> List[Dict]:
        """
        Search for Real Madrid match results
        """
        
        from_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
        
        params = {
            'api-key': self.api_key,
            'q': 'Real Madrid AND (result OR "match report" OR "Real Madrid" AND score)',
            'section': 'football',
            'from-date': from_date,
            'page-size': 50,
            'show-fields': 'headline,byline,body,thumbnail',
            'order-by': 'newest'
        }
        
        try:
            response = requests.get(f"{self.base_url}/search", params=params)
            response.raise_for_status()
            return response.json()['response']['results']
        except Exception as e:
            print(f"❌ Error fetching match results: {e}")
            return []
    
    def clean_article_data(self, articles: List[Dict]) -> pd.DataFrame:
        """
        Clean and structure article data into a pandas DataFrame
        """
        
        cleaned_articles = []
        
        for article in articles:
            fields = article.get('fields', {})
            
            cleaned_article = {
                'id': article.get('id'),
                'title': fields.get('headline', article.get('webTitle', '')),
                'url': article.get('webUrl'),
                'publication_date': article.get('webPublicationDate'),
                'section': article.get('sectionName'),
                'author': fields.get('byline', 'Unknown'),
                'body_text': fields.get('body', ''),
                'thumbnail': fields.get('thumbnail'),
                'tags': [tag['webTitle'] for tag in article.get('tags', [])]
            }
            
            cleaned_articles.append(cleaned_article)
        
        return pd.DataFrame(cleaned_articles)
    
    def save_to_csv(self, df: pd.DataFrame, filename: str = None):
        """
        Save articles to CSV file
        """
        
        if filename is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            filename = f'real_madrid_guardian_{timestamp}.csv'
        
        df.to_csv(filename, index=False, encoding='utf-8')
        print(f"💾 Saved {len(df)} articles to {filename}")
        
    def get_daily_summary(self, days_back: int = 1) -> Dict:
        """
        Get a daily summary of Real Madrid news
        """
        
        articles = self.search_real_madrid_news(days_back=days_back, max_pages=3)
        df = self.clean_article_data(articles)
        
        summary = {
            'total_articles': len(df),
            'latest_article': df.iloc[0]['title'] if len(df) > 0 else None,
            'publication_date': df.iloc[0]['publication_date'] if len(df) > 0 else None,
            'authors': df['author'].unique().tolist(),
            'articles': df.to_dict('records')
        }
        
        return summary

# Example usage and setup
def main():
    """
    Example usage of the Guardian Real Madrid API client
    """
    
    # 1. Get your free API key from: https://open-platform.theguardian.com/access/
    API_KEY = "082ab523-08e1-4db1-a93e-14f1404c1c9d"  # Replace with your actual API key
    
    # Initialize the client
    guardian = GuardianRealMadridAPI(API_KEY)
    
    print("🚀 Guardian API - Real Madrid News Fetcher")
    print("=" * 50)
    
    # Example 1: Get latest Real Madrid news (last 7 days)
    print("\n📰 Latest Real Madrid News (7 days):")
    articles = guardian.search_real_madrid_news(days_back=7, max_pages=3)
    
    if articles:
        # Clean and structure the data
        df = guardian.clean_article_data(articles)
        
        # Display summary
        print(f"\n📊 Summary:")
        print(f"   • Total articles: {len(df)}")
        print(f"   • Date range: Last 7 days")
        print(f"   • Latest article: {df.iloc[0]['title'][:80]}...")
        
        # Save to CSV
        guardian.save_to_csv(df)
        
        # Show top 5 headlines
        print(f"\n🔥 Top 5 Headlines:")
        for i, row in df.head(5).iterrows():
            print(f"   {i+1}. {row['title']}")
            print(f"      📅 {row['publication_date'][:10]} | ✍️  {row['author']}")
    
    # Example 2: Get transfer news (last 30 days)
    print(f"\n💰 Transfer News (30 days):")
    transfers = guardian.get_real_madrid_transfers(days_back=30)
    
    if transfers:
        transfer_df = guardian.clean_article_data(transfers)
        print(f"   • Found {len(transfer_df)} transfer-related articles")
        
        for i, row in transfer_df.head(3).iterrows():
            print(f"   🔄 {row['title']}")
    
    # Example 3: Get match results (last 14 days)
    print(f"\n⚽ Match Results (14 days):")
    matches = guardian.get_match_results(days_back=14)
    
    if matches:
        match_df = guardian.clean_article_data(matches)
        print(f"   • Found {len(match_df)} match-related articles")
        
        for i, row in match_df.head(3).iterrows():
            print(f"   🏆 {row['title']}")
    
    # Example 4: Daily summary
    print(f"\n📈 Daily Summary:")
    summary = guardian.get_daily_summary(days_back=1)
    print(f"   • Articles today: {summary['total_articles']}")
    
    if summary['latest_article']:
        print(f"   • Latest: {summary['latest_article'][:60]}...")

if __name__ == "__main__":
    main()

# Rate limit monitoring function
def monitor_rate_limits(guardian_client, requests_made: int):
    """
    Monitor your API usage to stay within limits
    """
    
    daily_limit = 12000
    weekly_limit = 84000
    
    remaining_daily = daily_limit - requests_made
    weekly_estimate = requests_made * 7  # Rough weekly estimate
    
    print(f"\n📊 Rate Limit Status:")
    print(f"   • Requests made today: {requests_made}")
    print(f"   • Remaining today: {remaining_daily}")
    print(f"   • Weekly estimate: {weekly_estimate}/84,000")
    
    if requests_made > daily_limit * 0.8:
        print("   ⚠️  Warning: Approaching daily limit!")
    
    if weekly_estimate > weekly_limit * 0.8:
        print("   ⚠️  Warning: Approaching weekly limit!")

# Installation requirements
"""
To install required packages:

pip install requests pandas

To get your free Guardian API key:
1. Go to: https://open-platform.theguardian.com/access/
2. Register for free account
3. Get your API key
4. Replace "your-guardian-api-key-here" with your actual key

Rate Limits:
- 12,000 requests per day
- 84,000 requests per week
- Free forever with registration
"""

🚀 Guardian API - Real Madrid News Fetcher

📰 Latest Real Madrid News (7 days):
📰 Fetching page 1...
   📄 Found 50 articles
📰 Fetching page 2...
   📄 Found 16 articles
📰 Fetching page 3...
❌ Error on page 3: 400 Client Error: Bad Request for url: https://content.guardianapis.com/search?api-key=082ab523-08e1-4db1-a93e-14f1404c1c9d&q=Real+Madrid+OR+%22Real+Madrid%22+OR+%22Santiago+Bernabeu%22&section=football&from-date=2025-06-30&page-size=50&page=3&show-fields=headline%2Cbyline%2Cbody%2Cthumbnail%2Cpublication&show-tags=sport&order-by=newest
🎯 Total: 66 articles fetched

📊 Summary:
   • Total articles: 66
   • Date range: Last 7 days
   • Latest article: Diana Gomes rescues Portugal dream with late equaliser to deny Italy victory...
💾 Saved 66 articles to real_madrid_guardian_20250707_225302.csv

🔥 Top 5 Headlines:
   1. Diana Gomes rescues Portugal dream with late equaliser to deny Italy victory
      📅 2025-07-07 | ✍️  Sophie Downey
   2. Sporting demand guaranteed €70m as Arsenal clos

'\nTo install required packages:\n\npip install requests pandas\n\nTo get your free Guardian API key:\n1. Go to: https://open-platform.theguardian.com/access/\n2. Register for free account\n3. Get your API key\n4. Replace "your-guardian-api-key-here" with your actual key\n\nRate Limits:\n- 12,000 requests per day\n- 84,000 requests per week\n- Free forever with registration\n'