In [122]:
from tabulate import tabulate
import pandas as pd

In [123]:
# Extracting the data from Wikipedia by using the pandas library
url = "https://en.wikipedia.org/wiki/List_of_continents_and_continental_subregions_by_population"
tables = pd.read_html(url)
# Display the tables to understand their structure
print(f"Number of tables found: {len(tables)}")

Number of tables found: 28


In [124]:
# When extracting data from the Wikipedia page, the data is unstructured and needs to be cleaned

In [125]:
# Tabel for all regions
region_tables = tables[3:]  
region = pd.concat(region_tables, ignore_index=True)
region.to_csv("region.csv", index=False)


In [126]:
# Renaming the columns to make them identical

region = region.rename(columns={'Pop.': 'Population'})


In [127]:
# The table for regions dont have the same structure as the other tables. 
# The table for regions needs to be cleaned and structured
# The table do not have region name but just Year and Population. 
# First step is therefore to create a new column with the Region_nr where each unique region is assigned a number based on repeated data from 1950 to 2021
region['Region_nr'] = (region['Year'] == 1950).cumsum()

In [128]:
# Define mapping between region number and region name 
region_mapping = {
    1: "Eastern Africa",
    2: "Middle Africa",
    3: "Northern Africa",
    4: "Southern Africa",
    5: "Western Africa",
    6: "Total Africa",
    7: "Total Americas",
    8: "Caribbean",
    9: "Central America",
    10: "North America",
    11: "Total North America",
    12: "Total South America",
    13: "Central Asia",
    14: "Eastern Asia",
    15: "South-Eastern Asia",
    16: "Southern Asia",
    17: "Western Asia",
    18: "Total Asia",
    19: "Eastern Europe",
    20: "North Europe",
    21: "Southern Europe",
    22: "Western Europe",
    23: "Total Europe",
    24: "Total Oceania",
    25: "Total World",
}

# Adding a new column 'Region' by mappiung region number with the name 
region['Region'] = region['Region_nr'].map(region_mapping)

In [129]:
# Defining the continent identifiers from the list
continent_identifiers = {6, 11, 12, 18, 23, 24, 25}

# Extracting the continent data with the identified continent identifiers
continent = region[region['Region_nr'].isin(continent_identifiers)].copy()

continent = continent.rename(columns={'Region': 'Continent'})

# Dropping the continent rows from the original DataFrame 
region = region[~region['Region_nr'].isin(continent_identifiers)].copy()

# Saving the continent DataFrame
continent.to_csv("continent.csv", index=False)

In [130]:
print(tabulate(region, headers='keys', tablefmt='grid'))

+-----+--------+--------------+-----------+-------------+--------------------+
|     |   Year |   Population | ±% p.a.   |   Region_nr | Region             |
|   0 |   1950 |     66923000 | —         |           1 | Eastern Africa     |
+-----+--------+--------------+-----------+-------------+--------------------+
|   1 |   1960 |     84305000 | +2.34%    |           1 | Eastern Africa     |
+-----+--------+--------------+-----------+-------------+--------------------+
|   2 |   1970 |    110428000 | +2.74%    |           1 | Eastern Africa     |
+-----+--------+--------------+-----------+-------------+--------------------+
|   3 |   1980 |    147512000 | +2.94%    |           1 | Eastern Africa     |
+-----+--------+--------------+-----------+-------------+--------------------+
|   4 |   1990 |    198232000 | +3.00%    |           1 | Eastern Africa     |
+-----+--------+--------------+-----------+-------------+--------------------+
|   5 |   2000 |    259373000 | +2.72%    |         

In [131]:
print(tabulate(continent, headers='keys', tablefmt='grid'))

+-----+--------+--------------+-----------+-------------+---------------------+
|     |   Year |   Population | ±% p.a.   |   Region_nr | Continent           |
|  40 |   1950 |    228902000 | —         |           6 | Total Africa        |
+-----+--------+--------------+-----------+-------------+---------------------+
|  41 |   1960 |    284887000 | +2.21%    |           6 | Total Africa        |
+-----+--------+--------------+-----------+-------------+---------------------+
|  42 |   1970 |    365626000 | +2.53%    |           6 | Total Africa        |
+-----+--------+--------------+-----------+-------------+---------------------+
|  43 |   1980 |    477965000 | +2.72%    |           6 | Total Africa        |
+-----+--------+--------------+-----------+-------------+---------------------+
|  44 |   1990 |    631614000 | +2.83%    |           6 | Total Africa        |
+-----+--------+--------------+-----------+-------------+---------------------+
|  45 |   2000 |    814063000 | +2.57%  

CLASS REGION

In [132]:
class Region:
    def __init__(self, data):
        """
        Initialize the Region class with a DataFrame containing Year, Population, and Region.
        """
        self.data = data

    def display_population(self, region_name, year):
        """
        Display the population of a specific region in a specific year.
        """
        region_data = self.data[(self.data['Region'] == region_name) & (self.data['Year'] == year)]
        if not region_data.empty:
            population = region_data['Population'].iloc[0]
            print(f"Population of {region_name} in {year}: {population:,}")
        else:
            print(f"No data available for {region_name} in {year}.")

    def population_comparison(self, region_name1, region_name2, year):
        """
        Compares the population between two regions in a specific year.
        """
        region_data1 = self.data[(self.data['Region'] == region_name1) & (self.data['Year'] == year)]
        region_data2 = self.data[(self.data['Region'] == region_name2) & (self.data['Year'] == year)]
        if not region_data1.empty and not region_data2.empty:
            population1 = region_data1['Population'].iloc[0]
            population2 = region_data2['Population'].iloc[0]
            print(f"Population in {region_name1} in {year}: {population1:,}") 
            print(f"Population in {region_name2} in {year}: {population2:,}")
        else: 
            print("Data for one or both regions for the specified year is not available.")
        if population1 > population2:
            print(f"{region_name1}s population was greater than {region_name2}s")
        elif population2 > population1:
            print(f"{region_name2}s population was greater than {region_name1}s")
        elif population1 == population2:
            print(f"You cannot compare the same region")
        

    def population_sort(self, year):
        """
        Sort regions by population size in a specific year
        """
        unsorted_data = self.data[(self.data['Year'] == year)]
        if not unsorted_data.empty:
            sorted_data = unsorted_data.sort_values(by=['Population'], ascending=True)
            print(f"Here are regions sorted population in the year {year}:\n{sorted_data}")
        else:
            print(f"No data available for regions in {year}.")
        

    def growth_calculator(self, region_name, year):
        """
        Calculates the annual growth rate of a region for a given year,
        accounting for datasets where population is recorded every 10 years.
        """
        # Filter data for the specified region
        region_data = self.data[self.data['Region'] == region_name]

        # Ensure the requested year exists in the data
        if year not in region_data['Year'].values:
            print(f"No data available for {region_name} in {year}.")
            return None

        # Find the closest previous year with data
        previous_year_data = region_data[region_data['Year'] < year].sort_values(by='Year', ascending=False)

        if previous_year_data.empty:
            print(f"No previous data available for {region_name} before {year}.")
            return None

        # Get the populations for the current and previous years
        current_population = region_data[region_data['Year'] == year]['Population'].iloc[0]
        previous_population = previous_year_data['Population'].iloc[0]
        previous_year = previous_year_data['Year'].iloc[0]

        # Calculate growth rate
        growth_rate = ((current_population - previous_population) / previous_population) * 100

        self.data.loc[(self.data['Region'] == region_name) & (self.data['Year'] == year), 'Growth Rate'] = growth_rate

        # Display or return growth rate
        print(f"Growth rate for {region_name} in {year}: {growth_rate:.2f}%")
        return growth_rate
    
    
    def growth_comparison(self, region1, region2, year):
        """
        Compares the growth rate between two regions in a specific year.
        """
        growth1 = self.growth_calculator(region1, year)
        growth2 = self.growth_calculator(region2, year)

        if growth1 is not None and growth2 is not None:
            if growth1 > growth2:
                print(f"{region1} had a higher growth rate ({growth1:.2f}%) than {region2} ({growth2:.2f}%) in {year}.")
            elif growth2 > growth1:
                print(f"{region2} had a higher growth rate ({growth2:.2f}%) than {region1} ({growth1:.2f}%) in {year}.")
            else:
                print(f"{region1} and {region2} had the same growth rate ({growth1:.2f}%) in {year}.")
        else:
            print("Comparison could not be made due to insufficient data.")

    
    def growth_sort(self, year):
        """
        Sorts the DataFrame by growth rate for a specific year and returns the sorted DataFrame.
        """
        # Ensure the 'Growth Rate' column exists
        if 'Growth Rate' not in self.data.columns:
            self.data['Growth Rate'] = None

        # Calculate growth rate for all regions for the specified year
        regions = self.data['Region'].unique()
        for region in regions:
            self.growth_calculator(region, year)  # Updates the 'Growth Rate' column dynamically

        # Filter rows for the specified year and sort by 'Growth Rate'
        year_data = self.data[self.data['Year'] == year]
        sorted_data = year_data.sort_values(by='Growth Rate', ascending=False)

        print(f"Regions sorted by growth rate in {year}:")
        print(tabulate(sorted_data))

        return sorted_data


    

In [133]:
region_manager = Region(region)

# Display the population of a specific region in a specific year
region_manager.display_population("Eastern Africa", 1950)


Population of Eastern Africa in 1950: 66,923,000


In [134]:
region_manager = Region(region)

# Compare the population of 2 specific regions in a specific year
region_manager.population_comparison("North America", "Central America", 1970)


Population in North America in 1970: 231,029,000
Population in Central America in 1970: 69,702,000
North Americas population was greater than Central Americas


In [135]:
region_manager = Region(region)

region_manager.population_sort(2000)

Here are regions sorted population in the year 2000:
     Year  Population ±% p.a.  Region_nr              Region
61   2000    38314000  +1.14%          8           Caribbean
29   2000    51451000  +2.04%          4     Southern Africa
101  2000    55117000  +0.90%         13        Central Asia
157  2000    94397000  +0.25%         20        North Europe
13   2000    96113000  +3.09%          2       Middle Africa
69   2000   138780000  +1.91%          9     Central America
165  2000   145058000  +0.11%         21     Southern Europe
21   2000   171891000  +2.06%          3     Northern Africa
173  2000   183163000  +0.42%         22      Western Europe
133  2000   184957000  +2.22%         17        Western Asia
37   2000   235235000  +2.69%          5      Western Africa
5    2000   259373000  +2.72%          1      Eastern Africa
149  2000   303789000  −0.20%         19      Eastern Europe
77   2000   313724000  +1.12%         10       North America
117  2000   526179000  +1.67%   

In [136]:
# Perform growth calculation for a Region in a Year

region_name = "Eastern Africa"  # Enter desired region
year = 1970  # Enter desired year

region_manager = Region(region)

region_manager.growth_calculator(region_name, year)

Growth rate for Eastern Africa in 1970: 30.99%


30.986299744973607

In [137]:
# Compares growth rates of Two Regions in a specific Year

region1 = "Western Africa"  # Replace with first region
region2 = "Eastern Africa"  # Replace with second region
year = 2000

region_manager.growth_comparison(region1, region2, year)

Growth rate for Western Africa in 2000: 30.45%
Growth rate for Eastern Africa in 2000: 30.84%
Eastern Africa had a higher growth rate (30.84%) than Western Africa (30.45%) in 2000.


In [138]:

region_manager = Region(region)
year = 1970

# Sort by growth rates in a specific Year
print("\nSorting all regions by growth rates...")
region_manager.growth_sort(year)


Sorting all regions by growth rates...
Growth rate for Eastern Africa in 1970: 30.99%
Growth rate for Middle Africa in 1970: 26.79%
Growth rate for Northern Africa in 1970: 30.12%
Growth rate for Southern Africa in 1970: 29.05%
Growth rate for Western Africa in 1970: 24.80%
Growth rate for Total Americas in 1970: 22.14%
Growth rate for Caribbean in 1970: 22.11%
Growth rate for Central America in 1970: 35.61%
Growth rate for North America in 1970: 13.16%
Growth rate for Central Asia in 1970: 34.69%
Growth rate for Eastern Asia in 1970: 24.10%
Growth rate for South-Eastern Asia in 1970: 31.65%
Growth rate for Southern Asia in 1970: 24.86%
Growth rate for Western Asia in 1970: 30.05%
Growth rate for Eastern Europe in 1970: 8.98%
Growth rate for North Europe in 1970: 6.74%
Growth rate for Southern Europe in 1970: 8.26%
Growth rate for Western Europe in 1970: 8.92%
Regions sorted by growth rate in 1970:
---  ----  ---------  ------  --  ------------------  --------
 66  1970   69702000  +3

Unnamed: 0,Year,Population,±% p.a.,Region_nr,Region,Growth Rate
66,1970,69702000,+3.09%,9,Central America,35.607004
98,1970,33156000,+3.02%,13,Central Asia,34.692883
114,1970,281521000,+2.79%,15,South-Eastern Asia,31.651531
2,1970,110428000,+2.74%,1,Eastern Africa,30.9863
18,1970,82883000,+2.67%,3,Northern Africa,30.120728
130,1970,86037000,+2.66%,17,Western Asia,30.051696
26,1970,25454000,+2.58%,4,Southern Africa,29.050902
10,1970,40846000,+2.40%,2,Middle Africa,26.787931
122,1970,741603000,+2.25%,16,Southern Asia,24.860972
34,1970,106015000,+2.24%,5,Western Africa,24.802816
