In [1]:
import requests

def get_most_popular_articles(n, year, month, day):
    """
    Fetches the top N most popular articles on Wikipedia for a given date,
    excluding special pages like 'Special:Search'.
    """
    url = f"https://wikimedia.org/api/rest_v1/metrics/pageviews/top/en.wikipedia/all-access/{year}/{month}/{day}"
    headers = {
        'User-Agent': 'WikiSandbox/ManagingBigData'  # Replace with your project and email
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        raise Exception(f"Error fetching top articles: {response.status_code}")
    
    data = response.json()
    # Filter out 'Main_Page' and 'Special:' entries
    articles = [
        (article['article'], article['views'])
        for article in data['items'][0]['articles']
        if not article['article'].startswith("Special:") and article['article'] != "Main_Page"
    ]
    return articles[:n]


def get_article_text(title):
    """
    Fetches the full text of a Wikipedia article.
    """
    title = title.replace(" ", "_")  # Replace spaces with underscores for API compatibility
    url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}"
    headers = {
        'User-Agent': 'WikiSandbox/ManagingBigData'  # Replace with your project and email
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code == 404:
        return f"No content available for {title}. (404 Error)"
    elif response.status_code != 200:
        raise Exception(f"Error fetching article {title}: {response.status_code}")
    
    data = response.json()
    return data.get('extract', 'No content available')

def main():
    n = 1000  # Number of top articles to fetch
    year, month, day = "2024", "05", "01"  # Year, Month, and Day for the date
    
    print(f"Fetching the top {n} articles on Wikipedia for {year}-{month}-{day}...")
    popular_articles = get_most_popular_articles(n, year, month, day)
    
    for idx, (title, views) in enumerate(popular_articles, start=1):
        print(f"\n#{idx}: {title} ({views} views)")
        print("Fetching article text...")
        text = get_article_text(title)
        print(f"Summary: {text[:500]}..." if text else "Summary: No content available")

if __name__ == "__main__":
    main()


Fetching the top 1000 articles on Wikipedia for 2024-05-01...

#1: International_Workers'_Day (553048 views)
Fetching article text...
Summary: International Workers' Day, also known as Labour Day in some countries and often referred to as May Day, is a celebration of labourers and the working classes that is promoted by the international labour movement and occurs every year on 1 May, or the first Monday in May....

#2: Labour_Day (463629 views)
Fetching article text...
Summary: Labour Day is an annual day of celebration of the achievements of workers. It has its origins in the labour union movement, specifically the eight-hour day movement, which advocated eight hours for work, eight hours for recreation, and eight hours for rest....

#3: Indian_Premier_League (350581 views)
Fetching article text...
Summary: The Indian Premier League (IPL), also known as the TATA IPL for sponsorship reasons, is a men's Twenty20 (T20) cricket league held annually in India. Founded by the BCCI (the Boar

In [5]:
import requests

def get_full_article(title):
    """
    Fetches the full content of a Wikipedia article in wikitext format.
    """
    title = title.replace(" ", "_")  # Replace spaces with underscores for API compatibility
    url = f"https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "revisions",
        "rvslots": "*",
        "rvprop": "content",
        "format": "json",
        "titles": title
    }
    headers = {
        'User-Agent': 'WikiSandbox/ManagingBigData'  # Replace with your project info
    }
    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code != 200:
        raise Exception(f"Error fetching article {title}: {response.status_code}")
    
    data = response.json()
    pages = data["query"]["pages"]
    page = next(iter(pages.values()))  # Get the first page
    if "revisions" not in page:
        return f"No content available for {title}."
    
    return page["revisions"][0]["slots"]["main"]["*"]

def main():
    article_title = "Artificial intelligence"
    print(f"Fetching full article for: {article_title}")
    content = get_full_article(article_title)
    print(content[:1000])  # Print the first 1000 characters of the article
    print(content)  # Print the first 1000 characters of the article


if __name__ == "__main__":
    main()


Fetching full article for: Artificial intelligence
{{Short description|Intelligence of machines}}
{{Redirect|AI|other uses|AI (disambiguation)|and|Artificial intelligence (disambiguation)|and|Intelligent agent}}<!-- related -->
{{Use dmy dates|date=July 2023}}{{Pp|small=yes}}<!-- details only a Wikipedian could love -->
{{Artificial intelligence}}<!-- portal -->

<!-- DEFINITIONS -->
'''Artificial intelligence''' ('''AI'''), in its broadest sense, is [[intelligence]] exhibited by [[machine]]s, particularly [[computer|computer systems]]. It is a [[field of research]] in [[computer science]] that develops and studies methods and [[software]] that enable machines to [[Machine perception|perceive their environment]] and use [[machine learning|learning]] and intelligence to take actions that maximize their chances of achieving defined goals.{{Sfnp|Russell|Norvig|2021|pp=1–4}} Such machines may be called AIs.

<!-- APPLICATIONS -->
High-profile [[applications of AI]] include advanced [[web s