In [3]:
import requests
from bs4 import BeautifulSoup

In [6]:
def get_population_data(url):
    # Send a GET request to the Wikipedia page
    response = requests.get('https://en.wikipedia.org/wiki/World_population')
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find the table containing population data
        population_table = soup.find('table', class_='wikitable sortable')
        
        # Initialize an empty dictionary to store population data
        population_data = {}
        
        # Iterate through each row in the table
        for row in population_table.find_all('tr')[1:]:
            # Extract the country/territory name
            country = row.find('td').text.strip()
            
            # Extract the population value
            population = row.find_all('td')[1].text.strip()
            
            # Store the population data in the dictionary
            population_data[country] = population
        
        return population_data
    else:
        print("Failed to retrieve data from the provided URL")
        return None

In [7]:
# Example usage:
wikipedia_url = "https://en.wikipedia.org/wiki/World_population"
population_data = get_population_data(wikipedia_url)

if population_data:
    for country, population in population_data.items():
        print(f"{country}: {population}")
else:
    print("No population data retrieved")


Asia: 104.1
Africa: 44.4
Europe: 73.4
Latin America: 24.1
Northern America[note 1]: 14.9
Oceania: 5
Antarctica: ~0


Web Scraping Population Statistics from Wikipedia

This Python-based web scraping project utilizes BeautifulSoup and Requests libraries to extract population statistics from Wikipedia pages. The parsing algorithms implemented navigate through Wikipedia tables, extracting population data for various countries and territories. The extracted information is standardized for consistency, facilitating integration with downstream data processing and analysis pipelines.

Key Features:

Utilizes BeautifulSoup and Requests libraries for web scraping
Implements parsing algorithms for navigating Wikipedia tables
Standardizes extracted information for consistency
Provides valuable demographic insights through automated scraping process
Offers basic data analytics using pandas library
Usage:

Clone the repository and run the Python script to scrape population data from Wikipedia pages.
Use the extracted data for further analysis, visualization, or integration into other projects.
Dependencies:

BeautifulSoup
Requests
pandas
matplotlib (for visualization)
Contributions:
Contributions and feedback are welcome! Feel free to submit pull requests or open issues for any improvements or bug fixes.

