In [1]:
import numpy as np
import pandas as pd

In [3]:
data_population = pd.read_csv('population_area.csv')
data_population.head()

countries = data_population['Country'].unique()
indicators = data_population['Indicator'].unique()
print(indicators)

['Population mid-year estimates (millions)'
 'Population mid-year estimates for males (millions)'
 'Population mid-year estimates for females (millions)'
 'Sex ratio (males per 100 females)'
 'Population aged 0 to 14 years old (percentage)'
 'Population aged 60+ years old (percentage)' 'Population density'
 'Surface area (thousand km2)']


In [None]:
data_population['Country'].unique()

array(['Total, all countries or areas', 'Africa', 'Northern Africa',
       'Sub-Saharan Africa', 'Eastern Africa', 'Middle Africa',
       'Southern Africa', 'Western Africa', 'Americas',
       'Northern America', 'Latin America & the Caribbean', 'Caribbean',
       'Central America', 'South America', 'Asia', 'Central Asia',
       'Eastern Asia', 'South-central Asia', 'South-eastern Asia',
       'Southern Asia', 'Western Asia', 'Europe', 'Eastern Europe',
       'Northern Europe', 'Southern Europe', 'Western Europe', 'Oceania',
       'Australia and New Zealand', 'Melanesia', 'Micronesia',
       'Polynesia', 'Afghanistan', 'Albania', 'Algeria', 'American Samoa',
       'Andorra', 'Angola', 'Anguilla', 'Antigua and Barbuda',
       'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria',
       'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados',
       'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan',
       'Bolivia (Plurin. State of)', 'Bonaire, St. Eustatiu

In [6]:
import pandas as pd
import plotly.express as px

# Use io.StringIO to treat the string as a file
data_file = 'population_area.csv'

# Read the CSV into a pandas DataFrame
# Use quotechar='"' to handle commas within quoted fields like the country name
df = pd.read_csv(data_file)

# Optional: Clean up column names (remove leading/trailing spaces)
df.columns = df.columns.str.strip()

# 2. Get User Input for Multiple Countries
# Display available countries/regions to help the user
available_countries = sorted(df['Country'].unique()) # Sort for easier reading

countries_input = 'United States of America,Canada,Japan,Germany,India,Indonesia,Brazil,South Africa,Niger'
indicators_inputs = ['Population aged 0 to 14 years old (percentage)','Sex ratio (males per 100 females)']

# Parse the input string into a list of names, stripping whitespace
selected_countries = [name.strip() for name in countries_input.split(',') if name.strip()]

if not selected_countries:
    print("No country names entered. Exiting.")
else:
    # 3. Filter Data for ALL selected countries
    df_filtered = df[df['Country'].isin(selected_countries)].copy() # Use .copy() to avoid SettingWithCopyWarning
    df_filtered = df_filtered[df_filtered['Indicator'].isin(indicators_inputs)].copy()

    # 4. Handle No/Partial Data
    found_countries = df_filtered['Country'].unique()
    not_found = set(selected_countries) - set(found_countries)

    if not found_countries.any(): # Check if the filtered dataframe is empty
        print(f"No data found for any of the specified countries: {', '.join(selected_countries)}")
    else:
        if not_found:
            print(f"\nWarning: No data found for the following requested countries: {', '.join(not_found)}")
            print(f"Plotting data for: {', '.join(found_countries)}")
        else:
             print(f"\nFound data for: {', '.join(found_countries)}")

        # 5. Data Cleaning
        df_filtered['Value'] = df_filtered['Value'].str.replace(',', '', regex=False)
        df_filtered['Value'] = pd.to_numeric(df_filtered['Value'], errors='coerce')
        df_filtered.dropna(subset=['Value'], inplace=True)

        # Sort by country and year for potentially cleaner line connections
        df_filtered.sort_values(['Country', 'year'], inplace=True)

        # Check again if data remains after cleaning
        if df_filtered.empty:
             print(f"No valid numeric indicator data found for the selected countries after cleaning.")
        else:
            # 6. Plot Data
            print(f"\nGenerating plot for selected countries...")

            # --- Plotting Strategy ---
            # We need to distinguish between countries AND indicators.
            # Option 1: Color by Country, Line Style by Indicator (good if few indicators)
            # Option 2: Color by Indicator, Line Style by Country (good if few countries)
            # Let's use Option 1 as a default, adding symbols for clarity.

            title_countries = ', '.join(found_countries)
            if len(title_countries) > 60: # Truncate title if too long
                 title_countries = title_countries[:57] + "..."

            fig = px.line(
                df_filtered,
                x='year',
                y='Value',
                color='Country',         # Different color for each country
                line_dash='Indicator',   # Different line style for each indicator
                symbol='Indicator',      # Different marker shape for each indicator
                markers=True,            # Show markers on the lines
                title=f'Indicators for {title_countries} over Time',
                hover_data=['Indicator'] # Show indicator name clearly on hover
            )

            # Customize layout (optional)
            fig.update_layout(
                xaxis_title='Year',
                yaxis_title='Indicator Value',
                legend_title='Legend' # Generic legend title
            )
            # Improve legend clarity (optional, might make it large)
            # fig.update_layout(legend={'traceorder': 'grouped'})

            # 7. Show Plot
            fig.show()


Found data for: Brazil, Canada, Germany, India, Indonesia, Japan, Niger, South Africa, United States of America

Generating plot for selected countries...


In [3]:
!pip install voila


Collecting voila
  Downloading voila-0.5.8-py3-none-any.whl.metadata (9.5 kB)
Collecting jupyter-client<9,>=7.4.4 (from voila)
  Downloading jupyter_client-8.6.3-py3-none-any.whl.metadata (8.3 kB)
Collecting jupyter-server<3,>=1.18 (from voila)
  Downloading jupyter_server-2.15.0-py3-none-any.whl.metadata (8.4 kB)
Collecting jupyterlab-server<3,>=2.3.0 (from voila)
  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)
Collecting nbclient>=0.4.0 (from voila)
  Downloading nbclient-0.10.2-py3-none-any.whl.metadata (8.3 kB)
Collecting nbconvert<8,>=6.4.5 (from voila)
  Downloading nbconvert-7.16.6-py3-none-any.whl.metadata (8.5 kB)
Collecting websockets>=9.0 (from voila)
  Downloading websockets-15.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting tornado>=6.2 (from jupyter-client<9,>=7.4.4->voila)
  Downloading tornado-6.4.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x8