In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [5]:
data = pd.read_csv('world_population.csv')
data.head()

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [6]:
data.shape

(234, 17)

In [8]:
data.isna().sum()

Rank                           0
CCA3                           0
Country/Territory              0
Capital                        0
Continent                      0
2022 Population                0
2020 Population                0
2015 Population                0
2010 Population                0
2000 Population                0
1990 Population                0
1980 Population                0
1970 Population                0
Area (km²)                     0
Density (per km²)              0
Growth Rate                    0
World Population Percentage    0
dtype: int64

In [9]:
print(f"Amount of duplicates : {data.duplicated().sum()}")

Amount of duplicates : 0


In [10]:
data.columns

Index(['Rank', 'CCA3', 'Country/Territory', 'Capital', 'Continent',
       '2022 Population', '2020 Population', '2015 Population',
       '2010 Population', '2000 Population', '1990 Population',
       '1980 Population', '1970 Population', 'Area (km²)', 'Density (per km²)',
       'Growth Rate', 'World Population Percentage'],
      dtype='object')

In [11]:
# Remove the 'CCA3' and 'Capital' columns as they are not needed for the analysis.
data.drop(['CCA3', 'Capital'], axis=1, inplace=True)

In [12]:
data.head()

Unnamed: 0,Rank,Country/Territory,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,Afghanistan,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,Albania,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,Algeria,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,American Samoa,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,Andorra,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [13]:
data.tail()

Unnamed: 0,Rank,Country/Territory,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
229,226,Wallis and Futuna,Oceania,11572,11655,12182,13142,14723,13454,11315,9377,142,81.493,0.9953,0.0
230,172,Western Sahara,Africa,575986,556048,491824,413296,270375,178529,116775,76371,266000,2.1654,1.0184,0.01
231,46,Yemen,Asia,33696614,32284046,28516545,24743946,18628700,13375121,9204938,6843607,527968,63.8232,1.0217,0.42
232,63,Zambia,Africa,20017675,18927715,16248230,13792086,9891136,7686401,5720438,4281671,752612,26.5976,1.028,0.25
233,74,Zimbabwe,Africa,16320537,15669666,14154937,12839771,11834676,10113893,7049926,5202918,390757,41.7665,1.0204,0.2


In [26]:
custom_palette = ['#0b3d91', '#e0f7fa', '#228b22', '#1e90ff', '#8B4513', '#D2691E','#DAA520', '#556B2F']

In [15]:
countries_by_continent = data['Continent'].value_counts().reset_index()

In [20]:
# create the bar chart
fig = px.bar(
    countries_by_continent,
    x = 'Continent',
    y = 'count',
    color = 'Continent',
    text = 'count',
    color_discrete_sequence = custom_palette

)

In [22]:
# Adjust the layout for a customized appearance 
fig.update_layout(
    xaxis_title = 'Continents',
    yaxis_title = 'Number of Countries',
    plot_bgcolor = 'rgba(0,0,0,0)', # from background color to transparent
    font_family = 'Arial' , # set font family
    title_font_size = 20 # Set title font size
)

fig.show()

In [23]:
continent_population_percentage = data.groupby('Continent')['World Population Percentage'].sum().reset_index()

In [24]:
# Create the pie chart
fig = go.Figure(data=[go.Pie(labels=continent_population_percentage['Continent'], 
values=continent_population_percentage['World Population Percentage'])])

In [None]:
# Adjust the layout for a customized appearance 
fig.update_layout(
    title = 'World Population Percentage by Continent',
    template = 'plotly',
    paper_bgcolor = 'rgba(255, 255, 255, 0)', # From paper background color to transparent
    plot_bgcolor = 'rgba(255, 255, 255, 0)', # From plot background color to transparent
)

In [28]:
# Update pie colors
fig.update_traces(marker = dict(colors = custom_palette, line = dict(color = '#FFFFFF', width = 1)))
fig.show()

In [29]:
# Melt DataFrame to have a long format
df_melted = data.melt(id_vars=['Continent'], value_vars=['2022 Population', '2020 Population', '2015 Population',  
'2010 Population', '2000 Population', '1990 Population', '1980 Population', '1970 Population'], var_name='Year', value_name='Population')

In [30]:
# Convert 'Year' to a more suitable format
df_melted['Year'] = df_melted['Year'].str.split().str[0].astype(int)

In [57]:
# Aggregate population by continent and year
population_by_continent = df_melted.groupby(['Continent','Year']).sum().reset_index()

In [55]:
fig = px.line(population_by_continent,
      x = 'Year', y = 'Population', color = 'Continent',        
      title ='Population Trends by Continent Over Time',
      labels = {'Population': 'Population', 'Year': 'Year'},
      color_discrete_sequence = custom_palette    
)

In [56]:
# Update 
fig.update_layout(
    template = 'plotly_white',
    xaxis_title = 'Year',
    yaxis_title = 'Population',
    font_family = 'Arial',
    title_font_size = 20,
)

fig.update_traces(line=dict(width=3))
fig.show()

In [71]:
# World Population Comparison: 1970 to 2020

features = ['1970 Population', '2020 Population']
for feature in features:
    fig = px.choropleth(data,
    locations='Country/Territory',
    locationmode='country names',
    color=feature,
    hover_name='Country/Territory',
    template='plotly_white',
    title = feature)
    
fig.show()  


In [77]:
growth = (data.groupby(by='Country/Territory'))['2022 Population'].sum()-data.groupby(by='Country/Territory')['1970 Population'].sum().sort_values(ascending=False)
growth.head()

Country/Territory
Afghanistan       30375800
Albania             517590
Algeria           31107310
American Samoa       17198
Andorra              59964
dtype: int64

In [80]:
# Create bar chart
fig=px.bar(x=growth.index,
    y = growth.values,
    text =growth.values,
    color =growth.values,
    title ='Growth Of Population From 1970 to 2020 (Top 8)',
    template ='plotly_white')

fig.update_layout(xaxis_title='Country',yaxis_title='Population Growth')
fig.show()


In [79]:
top_8_populated_countries_1970 = data.groupby('Country/Territory')['1970 Population'].sum().sort_values(ascending=False).head(8)
top_8_populated_countries_2022 = data.groupby('Country/Territory')['2022 Population'].sum().sort_values(ascending=False).head(8)

In [81]:
features = {'top_8_populated_countries_1970': top_8_populated_countries_1970,'top_8_populated_countries_2022': top_8_populated_countries_2022}

In [82]:
for feature_name, feature_data in features.items():
    year = feature_name.split('_')[-1] # Extract the year from the feature name
    fig = px.bar(x=feature_data.index,
    y=feature_data.values,
    text=feature_data.values,
    color=feature_data.values,
    title=f'Top 8 Most Populated Countries ({year})',
    template='plotly_white')
    fig.update_layout(xaxis_title='Country',
    yaxis_title='Population Growth')
    
fig.show()