In [11]:
import requests

def get_most_popular_articles(n, year, month, day):
    """
    Fetches the top N most popular articles on Wikipedia for a given date,
    excluding special pages like 'Special:Search'.
    """
    url = f"https://wikimedia.org/api/rest_v1/metrics/pageviews/top/en.wikipedia/all-access/{year}/{month}/{day}"
    headers = {
        'User-Agent': 'WikiSandbox/ManagingBigData'  # Replace with your project and email
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        raise Exception(f"Error fetching top articles: {response.status_code}")
    
    data = response.json()
    # Filter out 'Main_Page' and 'Special:' entries
    articles = [
        (article['article'], article['views'])
        for article in data['items'][0]['articles']
        if not article['article'].startswith("Special:") and article['article'] != "Main_Page"
    ]
    return articles[:n]


def get_article_text(title):
    """
    Fetches the full text of a Wikipedia article.
    """
    title = title.replace(" ", "_")  # Replace spaces with underscores for API compatibility
    url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}"
    headers = {
        'User-Agent': 'WikiSandbox/ManagingBigData'  # Replace with your project and email
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code == 404:
        return f"No content available for {title}. (404 Error)"
    elif response.status_code != 200:
        raise Exception(f"Error fetching article {title}: {response.status_code}")
    
    data = response.json()
    return data.get('extract', 'No content available')

def main():
    n = 5  # Number of top articles to fetch
    year, month, day = "2024", "05", "01"  # Year, Month, and Day for the date
    
    print(f"Fetching the top {n} articles on Wikipedia for {year}-{month}-{day}...")
    popular_articles = get_most_popular_articles(n, year, month, day)
    
    for idx, (title, views) in enumerate(popular_articles, start=1):
        print(f"\n#{idx}: {title} ({views} views)")
        print("Fetching article text...")
        text = get_article_text(title)
        print(f"Summary: {text[:500]}..." if text else "Summary: No content available")

if __name__ == "__main__":
    main()


Fetching the top 5 articles on Wikipedia for 2024-05-01...

#1: International_Workers'_Day (553048 views)
Fetching article text...
Summary: International Workers' Day, also known as Labour Day in some countries and often referred to as May Day, is a celebration of labourers and the working classes that is promoted by the international labour movement and occurs every year on 1 May, or the first Monday in May....

#2: Labour_Day (463629 views)
Fetching article text...
Summary: Labour Day is an annual day of celebration of the achievements of workers. It has its origins in the labour union movement, specifically the eight-hour day movement, which advocated eight hours for work, eight hours for recreation, and eight hours for rest....

#3: Indian_Premier_League (350581 views)
Fetching article text...
Summary: The Indian Premier League (IPL), also known as the TATA IPL for sponsorship reasons, is a men's Twenty20 (T20) cricket league held annually in India. Founded by the BCCI (the Board o