<a href="https://colab.research.google.com/github/AmirHosseinAlikhahMishamandani/worldometers-data-scraper/blob/main/worldometers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Worldometers Data Scraper

This Python program scrapes data from https://www.worldometers.info to create a table containing information about countries, including their population, area, GDP (Nominal), and continent. It utilizes BeautifulSoup for web scraping.

## Data Acquisition

Collecting data from [worldometers.com](https://www.worldometers.info/world-population/population-by-country/)!

This involves data scraping, and it may vary depending on the latest updates.

Date Accessed: Wednesday, January 17, 2024.


In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_data(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find('table', id='example2') or soup.find('table')
    headers = [th.text.strip() for th in table.find('thead').find_all('th')]
    rows = table.find('tbody').find_all('tr')
    data = [[ele.text.strip() for ele in row.find_all('td')] for row in rows]
    temp_df = pd.DataFrame(data, columns=headers)

    # Map actual column names to the standard ones
    column_mappings = {
        next((col for col in headers if 'Country' in col), None): 'Country',
        next((col for col in headers if 'Population' in col), None): 'Population',
        next((col for col in headers if 'Area' in col), None): 'Area'
    }

    # Rename the columns
    temp_df.rename(columns=column_mappings, inplace=True)

    # Keep only the required columns
    temp_df = temp_df[['Country', 'Population', 'Area']]
    return temp_df

# List of continents and their URLs
continents_urls = [
    ('Asia', 'https://www.worldometers.info/population/countries-in-asia-by-population/'),
    ('Africa', 'https://www.worldometers.info/population/countries-in-africa-by-population/'),
    ('Oceania','https://www.worldometers.info/population/countries-in-oceania-by-population/'),
    ('Northern America', 'https://www.worldometers.info/population/countries-in-northern-america-by-population/'),
    ('Latin America and the Caribbean','https://www.worldometers.info/population/countries-in-latin-america-and-the-caribbean-by-population/'),
    ('Europe','https://www.worldometers.info/population/countries-in-europe-by-population/'),
]

df = pd.DataFrame(columns=['Country', 'Continent', 'Population', 'Area'])

for continent, url in continents_urls:
    continent_data = fetch_data(url)
    continent_data['Continent'] = continent
    df = pd.concat([df, continent_data], ignore_index=True)

def fetch_gdp_data(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find('table')
    headers = [th.text.strip() for th in table.find('thead').find_all('th')]
    rows = table.find('tbody').find_all('tr')
    data = [[ele.text.strip() for ele in row.find_all('td')] for row in rows]
    gdp_df = pd.DataFrame(data, columns=headers)

    # Adjusted filter: includes the 'GDP (nominal, 2022)' column
    filtered_columns = ['Country', 'GDP (nominal, 2022)']
    gdp_df = gdp_df[filtered_columns]

    # Rename columns
    gdp_df.rename(columns={'Country': 'Country', 'GDP (nominal, 2022)': 'GDP (Nominal)'}, inplace=True)

    return gdp_df

# Test the function
gdp_url = "https://www.worldometers.info/gdp/gdp-by-country/"
gdp_df = fetch_gdp_data(gdp_url)

# Assuming 'df' is your main DataFrame with Country, Continent, Population, and Area
# Merge the GDP data
df_merged = pd.merge(df, gdp_df, on='Country', how='left')

# Display and save the DataFrame
print(df_merged.head())
df_merged.to_csv('Countries_data.csv', index=False)

      Country Continent     Population       Area        GDP (Nominal)
0       China      Asia  1,425,671,352  9,388,211  $17,963,200,000,000
1       India      Asia  1,428,627,663  2,973,190   $3,385,090,000,000
2   Indonesia      Asia    277,534,122  1,811,570   $1,319,100,000,000
3    Pakistan      Asia    240,485,658    770,880     $376,533,000,000
4  Bangladesh      Asia    172,954,319    130,170     $460,201,000,000
