In [37]:
import requests

def download_wikipedia(dump_url, choice, 
                       dump_file_path='wikipedia_dump_file.bz2'):
    """
    Downloads the full Wikipedia dump or a 5 MB subset.

    Input Parameters:
    - dump_url (str): The URL of the Wikipedia dump file to be downloaded.
    - choice (int): Enter 0 for the full download or 1 for a 5 MB subset.
    - dump_file_path (str, optional): Path where the downloaded file will be
      saved (default: 'wikipedia_dump_file.bz2').
    
    Returns:
    - str: The path to the saved dump file.
    """

    # Stream the file download based on user's choice (0 = full, 1 = subset)
    with requests.get(dump_url, stream=True) as r, open(dump_file_path, 'wb') as f:
        r.raise_for_status()  # Raise an error for bad responses
        
        if choice == 1:  # Download 5 MB subset
            downloaded_size = 0
            for chunk in r.iter_content(1024):  # Download data in chunks of 1 KB
                if chunk:
                    f.write(chunk)  # Write the chunk to the file
                    downloaded_size += len(chunk)  # Increase size counter
                    if downloaded_size >= 5_000_000:  # Stop after 5 MB
                        break
        elif choice == 0:  # Download the full dump
            for chunk in r.iter_content(1024):
                if chunk:
                    f.write(chunk)
            print(f"Downloaded full dump.")
    
    print(f"Dump saved to {dump_file_path}")
    return dump_file_path

user_choice = int(input("Enter 1 to download a 5MB subset or 0 for the full dump: "))
dump_url = 'https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2'
dump = download_wikipedia(dump_url, user_choice)