In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go

from plotly.subplots import make_subplots
import warnings

# Suppress FutureWarning messages

In [3]:
warnings.simplefilter(action='ignore', category=FutureWarning)
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

# Graph

In [5]:
df = pd.read_csv('/content/world_population.csv')

In [None]:
df.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [7]:
df.shape

(234, 17)

In [6]:
df.isna().sum()

Unnamed: 0,0
Rank,0
CCA3,0
Country/Territory,0
Capital,0
Continent,0
2022 Population,0
2020 Population,0
2015 Population,0
2010 Population,0
2000 Population,0


In [8]:
print(f"Amount of duplicates: {df.duplicated().sum()}")

Amount of duplicates: 0


In [9]:
df.columns

Index(['Rank', 'CCA3', 'Country/Territory', 'Capital', 'Continent',
       '2022 Population', '2020 Population', '2015 Population',
       '2010 Population', '2000 Population', '1990 Population',
       '1980 Population', '1970 Population', 'Area (km²)', 'Density (per km²)',
       'Growth Rate', 'World Population Percentage'],
      dtype='object')

In [10]:
df.drop(['CCA3', 'Capital'], axis=1, inplace=True)

In [11]:
df.tail()

Unnamed: 0,Rank,Country/Territory,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
229,226,Wallis and Futuna,Oceania,11572,11655,12182,13142,14723,13454,11315,9377,142,81.493,0.9953,0.0
230,172,Western Sahara,Africa,575986,556048,491824,413296,270375,178529,116775,76371,266000,2.1654,1.0184,0.01
231,46,Yemen,Asia,33696614,32284046,28516545,24743946,18628700,13375121,9204938,6843607,527968,63.8232,1.0217,0.42
232,63,Zambia,Africa,20017675,18927715,16248230,13792086,9891136,7686401,5720438,4281671,752612,26.5976,1.028,0.25
233,74,Zimbabwe,Africa,16320537,15669666,14154937,12839771,11834676,10113893,7049926,5202918,390757,41.7665,1.0204,0.2


In [12]:
countries_by_continent = df['Continent'].value_counts().reset_index()

# Create the bar chart

In [16]:
!pip install plotly.express
import plotly.express as px

# Define the custom palette. For example:
custom_palette = ['red', 'green', 'blue', 'orange', 'purple']

fig = px.bar(
    countries_by_continent,
    x='Continent',
    y='count',
    color='Continent',
    text='count',
    title='Number of Countries by Continent',
    color_discrete_sequence=custom_palette # Now this variable is defined
)

Collecting plotly.express
  Downloading plotly_express-0.4.1-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading plotly_express-0.4.1-py2.py3-none-any.whl (2.9 kB)
Installing collected packages: plotly.express
Successfully installed plotly.express-0.4.1


# Customize the layout

In [17]:
fig.update_layout(
xaxis_title='Continents',
yaxis_title='Number of Countries',
plot_bgcolor='rgba(0,0,0,0)', # Set the background color to transparent
font_family='Arial', # Set font family
title_font_size=20 # Set title font size
)

# Show the plot

In [18]:
fig.show()

In [19]:
continent_population_percentage = df.groupby('Continent')['World Population Percentage'].sum().reset_index()

# Create the pie chart

In [20]:
fig = go.Figure(data=[go.Pie(labels=continent_population_percentage['Continent'],
values=continent_population_percentage['World Population Percentage'])])

# Update layout

In [21]:
fig.update_layout(
title='World Population Percentage by Continent',
template='plotly',
paper_bgcolor='rgba(255,255,255,0)', # Set the paper background color to transparent
plot_bgcolor='rgba(255,255,255,0)' # Set the plot background color to transparent
)

# Update pie colors

In [22]:
fig.update_traces(marker=dict(colors=custom_palette, line=dict(color='#FFFFFF',
width=1)))

# Show the plot

In [23]:
fig.show()

# Melt the DataFrame to have a long format

In [24]:
df_melted = df.melt(id_vars=['Continent'],

value_vars=['2022 Population', '2020 Population', '2015 Population',
'2010 Population', '2000 Population', '1990 Population',
'1980 Population', '1970 Population'],

var_name='Year',
value_name='Population')

# Convert 'Year' to a more suitable format
df_melted['Year'] = df_melted['Year'].str.split().str[0].astype(int)

# Convert 'Year' to a more suitable format

In [25]:
df_melted['Year'] = df_melted['Year'].str.split().str[0].astype(int)

AttributeError: Can only use .str accessor with string values!

In [26]:
# Convert 'Year' to a more suitable format
df_melted['Year'] = df_melted['Year']
# Changed split() to split(' ')
df_melted['Year'] = df_melted['Year'].astype(int)

# Aggregate population by continent and year

In [27]:
population_by_continent = df_melted.groupby(['Continent',
'Year']).sum().reset_index()

In [28]:
fig = px.line(population_by_continent, x='Year', y='Population', color='Continent',

title='Population Trends by Continent Over Time',
labels={'Population': 'Population', 'Year': 'Year'},
color_discrete_sequence=custom_palette)

fig.update_layout(

template='plotly_white',
xaxis_title='Year',
yaxis_title='Population',
font_family='Arial',
title_font_size=20,
)

fig.update_traces(line=dict(width=3))

fig.show()

In [None]:
features=['1970 Population' ,'2020 Population']
for feature in features:
fig = px.choropleth(df,

locations='Country/Territory',
locationmode='country names',
color=feature,
hover_name='Country/Territory',
template='plotly_white',
title = feature)

fig.show()

IndentationError: expected an indented block after 'for' statement on line 2 (<ipython-input-45-6e920d868608>, line 3)

In [1]:
features=['1970 Population' ,'2020 Population']
for feature in features:
    # indented block of code
    fig = px.choropleth(df,

    locations='Country/Territory',
    locationmode='country names',
    color=feature,
    hover_name='Country/Territory',
    template='plotly_white',
    title = feature)

    fig.show()

NameError: name 'px' is not defined

In [29]:
growth = (df.groupby(by='Country/Territory')['2022 Population'].sum()-df.groupby(by='Country/Territory')['1970 Population'].sum()).sort_values(ascending=False).head(8)

In [30]:
fig=px.bar(x=growth.index,
y=growth.values,
text=growth.values,
color=growth.values,
title='Growth Of Population From 1970 to 2020 (Top 8)',
template='plotly_white')
fig.update_layout(xaxis_title='Country',

yaxis_title='Population Growth')

fig.show()

In [31]:
top_8_populated_countries_1970 = df.groupby('Country/Territory')['1970
Population'].sum().sort_values(ascending=False).head(8)
top_8_populated_countries_2022 = df.groupby('Country/Territory')['2022
Population'].sum().sort_values(ascending=False).head(8)

features = {'top_8_populated_countries_1970': top_8_populated_countries_1970,
'top_8_populated_countries_2022': top_8_populated_countries_2022}

for feature_name, feature_data in features.items():
year = feature_name.split('_')[-1] # Extract the year from the feature name
fig = px.bar(x=feature_data.index,
y=feature_data.values,
text=feature_data.values,
color=feature_data.values,
title=f'Top 8 Most Populated Countries ({year})',
template='plotly_white')
fig.update_layout(xaxis_title='Country',

yaxis_title='Population Growth')

fig.show()

SyntaxError: unterminated string literal (detected at line 1) (<ipython-input-31-7bd8e417d798>, line 1)

In [32]:
top_8_populated_countries_1970 = df.groupby('Country/Territory')['1970 Population'].sum().sort_values(ascending=False).head(8)
top_8_populated_countries_2022 = df.groupby('Country/Territory')['2022 Population'].sum().sort_values(ascending=False).head(8)

features = {'top_8_populated_countries_1970': top_8_populated_countries_1970, 'top_8_populated_countries_2022': top_8_populated_countries_2022}

for feature_name, feature_data in features.items():
    # Indented block of code within the for loop
    year = feature_name.split('_')[-1] # Extract the year from the feature name
    fig = px.bar(x=feature_data.index,
    y=feature_data.values,
    text=feature_data.values,
    color=feature_data.values,
    title=f'Top 8 Most Populated Countries ({year})',
    template='plotly_white')
    fig.update_layout(xaxis_title='Country',
    yaxis_title='Population Growth')

    fig.show()

In [33]:
sorted_df_growth = df.sort_values(by='Growth Rate', ascending=False)

top_fastest = sorted_df_growth.head(6)
top_slowest = sorted_df_growth.tail(6)

In [34]:
def plot_population_trends(countries):
    # Calculate the number of rows needed
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols

    # Add code here to define what the function should do
    # when called, for example:
    print(f'Number of rows: {n_rows}')

# Create subplots

In [35]:
def plot_population_trends(countries):
    # Calculate the number of rows needed
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols

    # Add code here to define what the function should do
    # when called, for example:
    print(f'Number of rows: {n_rows}')

# Filter data for the selected country



In [36]:
def plot_population_trends(countries): # added countries as an argument
    # Calculate the number of rows needed
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols

    # Add code here to define what the function should do
    # when called, for example:
    print(f'Number of rows: {n_rows}')

    for country in countries: # iterate over the countries argument
        country_df = df[df['Country/Territory'] == country] # this line will now work as country is defined
        # add code here to use country_df
        print(country_df.head())

countries = ['United States', 'China'] # example list of countries
plot_population_trends(countries) # call the function with the list of countries

Number of rows: 1
     Rank Country/Territory      Continent  2022 Population  2020 Population  \
221     3     United States  North America        338289857        335942003   

     2015 Population  2010 Population  2000 Population  1990 Population  \
221        324607776        311182845        282398554        248083732   

     1980 Population  1970 Population  Area (km²)  Density (per km²)  \
221        223140018        200328340     9372610            36.0935   

     Growth Rate  World Population Percentage  
221       1.0038                         4.24  
    Rank Country/Territory Continent  2022 Population  2020 Population  \
41     1             China      Asia       1425887337       1424929781   

    2015 Population  2010 Population  2000 Population  1990 Population  \
41       1393715448       1348191368       1264099069       1153704252   

    1980 Population  1970 Population  Area (km²)  Density (per km²)  \
41        982372466        822534450     9706961           1

# Melt the DataFrame to have a long format


In [37]:
country_melted = country_df.melt(id_vars=['Country/Territory'],

value_vars=['2022 Population', '2020 Population', '2015 Population',
'2010 Population', '2000 Population', '1990 Population',
]
['1980 Population', '1970 Population'],
value_name='Population', var_name='Year'
)


list indices must be integers or slices, not tuple; perhaps you missed a comma?


list indices must be integers or slices, not tuple; perhaps you missed a comma?


list indices must be integers or slices, not tuple; perhaps you missed a comma?



NameError: name 'country_df' is not defined

In [38]:
def plot_population_trends(countries):
    # Calculate the number of rows needed
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols

    # Add code here to define what the function should do
    # when called, for example:
    print(f'Number of rows: {n_rows}')

    for country in countries: # iterate over the countries argument
        country_df = df[df['Country/Territory'] == country] # this line will now work as country is defined
        # add code here to use country_df
        print(country_df.head())

        # Move the following lines inside the function to access country_df
        country_melted = country_df.melt(id_vars=['Country/Territory'],
        value_vars=['2022 Population', '2020 Population', '2015 Population',
        '2010 Population', '2000 Population', '1990 Population',
        '1980 Population', '1970 Population'],
        value_name='Population', var_name='Year'
        )
        print(country_melted.head()) # Example: Print the head of the melted DataFrame

countries = ['United States', 'China'] # example list of countries
plot_population_trends(countries) # call the function with the list of countries

Number of rows: 1
     Rank Country/Territory      Continent  2022 Population  2020 Population  \
221     3     United States  North America        338289857        335942003   

     2015 Population  2010 Population  2000 Population  1990 Population  \
221        324607776        311182845        282398554        248083732   

     1980 Population  1970 Population  Area (km²)  Density (per km²)  \
221        223140018        200328340     9372610            36.0935   

     Growth Rate  World Population Percentage  
221       1.0038                         4.24  
  Country/Territory             Year  Population
0     United States  2022 Population   338289857
1     United States  2020 Population   335942003
2     United States  2015 Population   324607776
3     United States  2010 Population   311182845
4     United States  2000 Population   282398554
    Rank Country/Territory Continent  2022 Population  2020 Population  \
41     1             China      Asia       1425887337       1

# Convert 'Year' to a more suitable format

In [39]:
country_melted['Year'] = country_melted['Year'].str.split().str[0].astype(int)

NameError: name 'country_melted' is not defined

In [40]:
def plot_population_trends(countries):
    # Calculate the number of rows needed
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols

    # Add code here to define what the function should do
    # when called, for example:
    print(f'Number of rows: {n_rows}')

    for country in countries: # iterate over the countries argument
        country_df = df[df['Country/Territory'] == country] # this line will now work as country is defined
        # add code here to use country_df
        print(country_df.head())

        # Move the following lines inside the function to access country_df
        country_melted = country_df.melt(id_vars=['Country/Territory'],
        value_vars=['2022 Population', '2020 Population', '2015 Population',
        '2010 Population', '2000 Population', '1990 Population',
        '1980 Population', '1970 Population'],
        value_name='Population', var_name='Year'
        )
        print(country_melted.head()) # Example: Print the head of the melted DataFrame

        # Process country_melted within the function
        country_melted['Year'] = country_melted['Year'].str.split().str[0].astype(int)
        print(country_melted.head())

countries = ['United States', 'China'] # example list of countries
plot_population_trends(countries) # call the function with the list of countries

Number of rows: 1
     Rank Country/Territory      Continent  2022 Population  2020 Population  \
221     3     United States  North America        338289857        335942003   

     2015 Population  2010 Population  2000 Population  1990 Population  \
221        324607776        311182845        282398554        248083732   

     1980 Population  1970 Population  Area (km²)  Density (per km²)  \
221        223140018        200328340     9372610            36.0935   

     Growth Rate  World Population Percentage  
221       1.0038                         4.24  
  Country/Territory             Year  Population
0     United States  2022 Population   338289857
1     United States  2020 Population   335942003
2     United States  2015 Population   324607776
3     United States  2010 Population   311182845
4     United States  2000 Population   282398554
  Country/Territory  Year  Population
0     United States  2022   338289857
1     United States  2020   335942003
2     United States  

# Create a line plot for each country

In [None]:
line_fig = px.line(country_melted, x='Year', y='Population',

color='Country/Territory',

labels={'Population': 'Population', 'Year': 'Year'},
color_discrete_sequence=custom_palette)

NameError: name 'country_melted' is not defined

In [41]:
!pip install plotly
import plotly.express as px

def plot_population_trends(countries):
    # Calculate the number of rows needed
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols

    # Add code here to define what the function should do
    # when called, for example:
    print(f'Number of rows: {n_rows}')

    for country in countries: # iterate over the countries argument
        country_df = df[df['Country/Territory'] == country] # this line will now work as country is defined
        # add code here to use country_df
        print(country_df.head())

        # Move the following lines inside the function to access country_df
        country_melted = country_df.melt(id_vars=['Country/Territory'],
        value_vars=['2022 Population', '2020 Population', '2015 Population',
        '2010 Population', '2000 Population', '1990 Population',
        '1980 Population', '1970 Population'],
        value_name='Population', var_name='Year'
        )
        print(country_melted.head()) # Example: Print the head of the melted DataFrame

        # Process country_melted within the function
        country_melted['Year'] = country_melted['Year'].str.split().str[0].astype(int)
        print(country_melted.head())

        # Create and return the line figure within the function
        line_fig = px.line(country_melted, x='Year', y='Population',
                           color='Country/Territory',
                           labels={'Population': 'Population', 'Year': 'Year'})

        return line_fig # Return the figure from the function

countries = ['United States', 'China'] # example list of countries
line_fig = plot_population_trends(countries) # call the function and store the returned figure
line_fig.show() # Display the figure

Number of rows: 1
     Rank Country/Territory      Continent  2022 Population  2020 Population  \
221     3     United States  North America        338289857        335942003   

     2015 Population  2010 Population  2000 Population  1990 Population  \
221        324607776        311182845        282398554        248083732   

     1980 Population  1970 Population  Area (km²)  Density (per km²)  \
221        223140018        200328340     9372610            36.0935   

     Growth Rate  World Population Percentage  
221       1.0038                         4.24  
  Country/Territory             Year  Population
0     United States  2022 Population   338289857
1     United States  2020 Population   335942003
2     United States  2015 Population   324607776
3     United States  2010 Population   311182845
4     United States  2000 Population   282398554
  Country/Territory  Year  Population
0     United States  2022   338289857
1     United States  2020   335942003
2     United States  

# Update the line plot to fit the subplot

In [44]:
n_cols = 2  # Example value, adjust as needed
n_rows = 2  # Example value, adjust as needed
fig = make_subplots(rows=n_rows, cols=n_cols) # Create a subplot figure

for i in range(1, len(line_fig.data) + 1): # Define i and iterate over the figures
    row = (i - 1) // n_cols + 1
    col = (i - 1) % n_cols + 1
    for trace in line_fig.data:
        fig.add_trace(trace, row=row, col=col) # Indent this line to include it in the for loop

In [45]:
# Assuming you want to add traces from line_fig to a new figure with subplots
import plotly.graph_objects as go

# Create a figure with subplots
fig = go.Figure()
# Assuming you have n_cols and n_rows defined somewhere
fig = make_subplots(rows=n_rows, cols=n_cols)

# Loop through traces in line_fig and add them to subplots
for i, trace in enumerate(line_fig.data):
    row = (i) // n_cols + 1  # Calculate row index starting from 0
    col = (i) % n_cols + 1   # Calculate column index
    fig.add_trace(trace, row=row, col=col)

fig.show()

In [46]:
# Assuming you want to add traces from line_fig to a new figure with subplots
import plotly.graph_objects as go
from plotly.subplots import make_subplots # import the make_subplots function

# Define n_rows and n_cols here
n_cols = 2
n_rows = 1 # You'll need to calculate this based on the number of countries you want to plot

# Create a figure with subplots
fig = make_subplots(rows=n_rows, cols=n_cols)

# Loop through traces in line_fig and add them to subplots
for i, trace in enumerate(line_fig.data):
    row = (i) // n_cols + 1  # Calculate row index starting from 0
    col = (i) % n_cols + 1   # Calculate column index
    fig.add_trace(trace, row=row, col=col)

fig.show()

# Update the layout of the subplots

In [47]:
fig.update_layout(
title='Population Trends of Selected Countries Over Time',
template='plotly_white',
font_family='Arial',
title_font_size=20,
showlegend=False,
height=600*n_rows, # Adjust height for bigger plots
)

fig.update_traces(line=dict(width=3))
fig.update_xaxes(title_text='Year')
fig.update_yaxes(title_text='Population')

fig.show()

In [48]:
fastest = top_fastest[['Country/Territory', 'Growth Rate']].sort_values(by='Growth Rate', ascending=False).reset_index(drop=True)
fastest

Unnamed: 0,Country/Territory,Growth Rate
0,Moldova,1.0691
1,Poland,1.0404
2,Niger,1.0378
3,Syria,1.0376
4,Slovakia,1.0359
5,DR Congo,1.0325


In [49]:
plot_population_trends(['Moldova', 'Poland', 'Niger', 'Syria', 'Slovakia', 'DR Congo'])

Number of rows: 3
     Rank Country/Territory Continent  2022 Population  2020 Population  \
133   135           Moldova    Europe          3272996          3084847   

     2015 Population  2010 Population  2000 Population  1990 Population  \
133          3277388          3678186          4251573          4480199   

     1980 Population  1970 Population  Area (km²)  Density (per km²)  \
133          4103240          3711140       33846            96.7026   

     Growth Rate  World Population Percentage  
133       1.0691                         0.04  
  Country/Territory             Year  Population
0           Moldova  2022 Population     3272996
1           Moldova  2020 Population     3084847
2           Moldova  2015 Population     3277388
3           Moldova  2010 Population     3678186
4           Moldova  2000 Population     4251573
  Country/Territory  Year  Population
0           Moldova  2022     3272996
1           Moldova  2020     3084847
2           Moldova  2015     3

In [50]:
slowest = top_slowest[['Country/Territory', 'Growth Rate']].sort_values(by='Growth Rate', ascending=False).reset_index(drop=True)
slowest

Unnamed: 0,Country/Territory,Growth Rate
0,Latvia,0.9876
1,Lithuania,0.9869
2,Bulgaria,0.9849
3,American Samoa,0.9831
4,Lebanon,0.9816
5,Ukraine,0.912


In [51]:
plot_population_trends(['Latvia', 'Lithuania', 'Bulgaria', 'American Samoa',
'Lebanon', 'Ukraine'])

Number of rows: 3
     Rank Country/Territory Continent  2022 Population  2020 Population  \
111   151            Latvia    Europe          1850651          1897052   

     2015 Population  2010 Population  2000 Population  1990 Population  \
111          1991955          2101530          2392530          2689391   

     1980 Population  1970 Population  Area (km²)  Density (per km²)  \
111          2572037          2397414       64559             28.666   

     Growth Rate  World Population Percentage  
111       0.9876                         0.02  
  Country/Territory             Year  Population
0            Latvia  2022 Population     1850651
1            Latvia  2020 Population     1897052
2            Latvia  2015 Population     1991955
3            Latvia  2010 Population     2101530
4            Latvia  2000 Population     2392530
  Country/Territory  Year  Population
0            Latvia  2022     1850651
1            Latvia  2020     1897052
2            Latvia  2015     1

In [56]:
and_by_country = df.groupby('Country/Territory')['Area (km²)'].sum().sort_values(ascending=False) # Changed 'Area (km2)' to 'Area (km²)'
most_land = and_by_country.head(5) # Changed 'land_by_country' to 'and_by_country'
least_land = and_by_country.tail(5) # Changed 'land_by_country' to 'and_by_country'

# Create subplots

In [57]:
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Countries with Most Land",
"Countries with Least Land"))

# Plot countries with the most land

In [58]:
fig.add_trace(go.Bar(x=most_land.index, y=most_land.values, name='Most Land',
marker_color=custom_palette[0]), row=1, col=1)

# Plot countries with the least land

In [59]:
fig.add_trace(go.Bar(x=least_land.index, y=least_land.values, name='Least Land',
marker_color=custom_palette[1]), row=1, col=2)

In [60]:
fig.update_layout(
title_text="Geographical Distribution of Land Area by Country",
showlegend=False,
template='plotly_white'
)

fig.update_yaxes(title_text="Area (km2)", row=1, col=1)
fig.update_yaxes(title_text="Area (km2)", row=1, col=2)

fig.show()

In [64]:
# Check for typos and correct the column name if necessary.
# For example if the column name is 'Area(km²)' use the following
# df['Area per Person'] = df['Area(km²)'] / df['2022 Population'] # Changed 'Area(km2)' to 'Area(km²)'
# To verify the column names present in your dataframe use:
print(df.columns)

# Assuming the column name is 'Area (km²)' based on the available data
df['Area per Person'] = df['Area (km²)'] / df['2022 Population'] # Corrected column name to 'Area (km²)'
country_area_per_person = df.groupby('Country/Territory')['Area per Person'].sum()
most_land_available = country_area_per_person.sort_values(ascending=False).head(5)
least_land_available = country_area_per_person.sort_values(ascending=False).tail(5)

Index(['Rank', 'Country/Territory', 'Continent', '2022 Population',
       '2020 Population', '2015 Population', '2010 Population',
       '2000 Population', '1990 Population', '1980 Population',
       '1970 Population', 'Area (km²)', 'Density (per km²)', 'Growth Rate',
       'World Population Percentage'],
      dtype='object')


In [None]:
# Check the DataFrame columns
df.columns

Index(['Rank', 'Country/Territory', 'Continent', '2022 Population',
       '2020 Population', '2015 Population', '2010 Population',
       '2000 Population', '1990 Population', '1980 Population',
       '1970 Population', 'Area (km²)', 'Density (per km²)', 'Growth Rate',
       'World Population Percentage'],
      dtype='object')

link code

# Create subplots

In [66]:
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Countries with Most Land Available Per Capita", "Countries with Least Land Available Per Capita"))

# Plot countries with the most land

In [67]:
fig.add_trace(go.Bar(x=most_land_available.index, y=most_land_available.values,
name='Most Land', marker_color=custom_palette[2]), row=1, col=1)

# Plot countries with the least land

In [68]:
fig.add_trace(go.Bar(x=least_land_available.index, y=least_land_available.values,
name='Least Land', marker_color=custom_palette[3]), row=1, col=2)

In [69]:
fig.update_layout(
title_text="Distribution of Available Land Area by Country Per Capita",
showlegend=False,
template='plotly_white'
)


In [70]:
fig.update_yaxes(title_text="Land Available Per Person", row=1, col=1)
fig.update_yaxes(title_text="Land Available Per Person", row=1, col=2)

In [71]:
fig.show()