In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.subplots as sp

In [2]:
data = pd.read_csv('/content/world_population.csv')

In [3]:
data.head(10)

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0
5,42,AGO,Angola,Luanda,Africa,35588987,33428485,28127721,23364185,16394062,11828638,8330047,6029700,1246700,28.5466,1.0315,0.45
6,224,AIA,Anguilla,The Valley,North America,15857,15585,14525,13172,11047,8316,6560,6283,91,174.2527,1.0066,0.0
7,201,ATG,Antigua and Barbuda,Saint John’s,North America,93763,92664,89941,85695,75055,63328,64888,64516,442,212.1335,1.0058,0.0
8,33,ARG,Argentina,Buenos Aires,South America,45510318,45036032,43257065,41100123,37070774,32637657,28024803,23842803,2780400,16.3683,1.0052,0.57
9,140,ARM,Armenia,Yerevan,Asia,2780469,2805608,2878595,2946293,3168523,3556539,3135123,2534377,29743,93.4831,0.9962,0.03


In [4]:
data.shape

(234, 17)

In [5]:
data.isnull().sum()

Unnamed: 0,0
Rank,0
CCA3,0
Country/Territory,0
Capital,0
Continent,0
2022 Population,0
2020 Population,0
2015 Population,0
2010 Population,0
2000 Population,0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Rank                         234 non-null    int64  
 1   CCA3                         234 non-null    object 
 2   Country/Territory            234 non-null    object 
 3   Capital                      234 non-null    object 
 4   Continent                    234 non-null    object 
 5   2022 Population              234 non-null    int64  
 6   2020 Population              234 non-null    int64  
 7   2015 Population              234 non-null    int64  
 8   2010 Population              234 non-null    int64  
 9   2000 Population              234 non-null    int64  
 10  1990 Population              234 non-null    int64  
 11  1980 Population              234 non-null    int64  
 12  1970 Population              234 non-null    int64  
 13  Area (km²)          

In [7]:
print(f"Amount of duplicates:{data.duplicated().sum()}")

Amount of duplicates:0


In [8]:
data.columns

Index(['Rank', 'CCA3', 'Country/Territory', 'Capital', 'Continent',
       '2022 Population', '2020 Population', '2015 Population',
       '2010 Population', '2000 Population', '1990 Population',
       '1980 Population', '1970 Population', 'Area (km²)', 'Density (per km²)',
       'Growth Rate', 'World Population Percentage'],
      dtype='object')

In [9]:
custom_palette = ['#0b3d91', '#e0f7fa', '#228b22', '#1e90ff', '#8B4513', '#D2691E',
'#DAA520', '#556B2F']

In [10]:
countries_by_continent = data['Continent'].value_counts().reset_index()
countries_by_continent

Unnamed: 0,Continent,count
0,Africa,57
1,Asia,50
2,Europe,50
3,North America,40
4,Oceania,23
5,South America,14


In [11]:
import plotly.express as px

In [12]:
fig = px.bar(countries_by_continent,x='Continent',y='count',color='Continent',text='count',title='Number of countries by Continent',color_discrete_sequence=custom_palette)
fig.show()

fig.update_layout(xaxis_title='Continents',yaxis_title='Number of Countries',plot_bgcolor='rgba(0,0,0,0)',font_family='Arial',title_font_size=20)
fig.show()

In [13]:
continent_population_per=data.groupby('Continent')['World Population Percentage'].sum().reset_index()
continent_population_per

Unnamed: 0,Continent,World Population Percentage
0,Africa,17.87
1,Asia,59.19
2,Europe,9.33
3,North America,7.51
4,Oceania,0.55
5,South America,5.48


In [14]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [15]:
fig=go.Figure(data=[go.Pie(labels=continent_population_per['Continent'],values=continent_population_per['World Population Percentage'])])
fig.show()

fig.update_layout(title='World Population Percentage by Continent',template='plotly',paper_bgcolor='rgba(255,255,255,0)', plot_bgcolor='rgba(255,255,255,0)')
fig.update_traces(marker=dict(colors=custom_palette, line=dict(color='#FFFFFF',width=1)))
fig.show()

In [16]:
data.head(10)

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0
5,42,AGO,Angola,Luanda,Africa,35588987,33428485,28127721,23364185,16394062,11828638,8330047,6029700,1246700,28.5466,1.0315,0.45
6,224,AIA,Anguilla,The Valley,North America,15857,15585,14525,13172,11047,8316,6560,6283,91,174.2527,1.0066,0.0
7,201,ATG,Antigua and Barbuda,Saint John’s,North America,93763,92664,89941,85695,75055,63328,64888,64516,442,212.1335,1.0058,0.0
8,33,ARG,Argentina,Buenos Aires,South America,45510318,45036032,43257065,41100123,37070774,32637657,28024803,23842803,2780400,16.3683,1.0052,0.57
9,140,ARM,Armenia,Yerevan,Asia,2780469,2805608,2878595,2946293,3168523,3556539,3135123,2534377,29743,93.4831,0.9962,0.03


In [17]:
data.tail(10)

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
224,43,UZB,Uzbekistan,Tashkent,Asia,34627652,33526656,30949417,28614227,24925554,20579100,15947129,12011361,447400,77.3975,1.016,0.43
225,181,VUT,Vanuatu,Port-Vila,Oceania,326740,311685,276438,245453,192074,150882,118156,87019,12189,26.8061,1.0238,0.0
226,234,VAT,Vatican City,Vatican City,Europe,510,520,564,596,651,700,733,752,1,510.0,0.998,0.0
227,51,VEN,Venezuela,Caracas,South America,28301696,28490453,30529716,28715022,24427729,19750579,15210443,11355475,916445,30.882,1.0036,0.35
228,16,VNM,Vietnam,Hanoi,Asia,98186856,96648685,92191398,87411012,79001142,66912613,52968270,41928849,331212,296.4472,1.0074,1.23
229,226,WLF,Wallis and Futuna,Mata-Utu,Oceania,11572,11655,12182,13142,14723,13454,11315,9377,142,81.493,0.9953,0.0
230,172,ESH,Western Sahara,El Aaiún,Africa,575986,556048,491824,413296,270375,178529,116775,76371,266000,2.1654,1.0184,0.01
231,46,YEM,Yemen,Sanaa,Asia,33696614,32284046,28516545,24743946,18628700,13375121,9204938,6843607,527968,63.8232,1.0217,0.42
232,63,ZMB,Zambia,Lusaka,Africa,20017675,18927715,16248230,13792086,9891136,7686401,5720438,4281671,752612,26.5976,1.028,0.25
233,74,ZWE,Zimbabwe,Harare,Africa,16320537,15669666,14154937,12839771,11834676,10113893,7049926,5202918,390757,41.7665,1.0204,0.2


In [18]:
features=['1970 Population','2020 Population']

for feature in features :
  fig = px.choropleth(data , locations = 'Country/Territory' , locationmode = 'country names' ,
                      color = feature , hover_name = 'Country/Territory' , template = 'plotly_white' , title = 'feature')
  fig.show()

In [19]:
growth=(data.groupby(by='Country/Territory')['2022 Population'].sum()-data.groupby(by='Country/Territory')['1970 Population'].sum()).sort_values(ascending=False).head(10)
growth

Unnamed: 0_level_0,0
Country/Territory,Unnamed: 1_level_1
India,859671872
China,603352887
Pakistan,176533990
Nigeria,162971948
Indonesia,160272945
United States,137961517
Brazil,118943623
Bangladesh,103644512
Ethiopia,95071678
DR Congo,78858479


In [20]:
fig = px.bar(x = growth.index , y = growth.values , text = growth.values ,
             color = growth.values , title = 'Top 10 Growth Of Population From 1970 To 2022' ,
             template = 'plotly_white')
fig.show()

fig.update_layout(xaxis_title = 'Countries' , yaxis_title = 'Population Growth')

In [21]:
top_10_countries_pop_1970 = data.groupby(by='Country/Territory')['1970 Population'].sum().sort_values(ascending=False).head(10)
top_10_countries_pop_1970

Unnamed: 0_level_0,1970 Population
Country/Territory,Unnamed: 1_level_1
China,822534450
India,557501301
United States,200328340
Russia,130093010
Indonesia,115228394
Japan,105416839
Brazil,96369875
Germany,78294583
Bangladesh,67541860
Pakistan,59290872


In [22]:
top_10_countries_pop_2022 = data.groupby(by='Country/Territory')['2022 Population'].sum().sort_values(ascending=False).head(10)
top_10_countries_pop_2022

Unnamed: 0_level_0,2022 Population
Country/Territory,Unnamed: 1_level_1
China,1425887337
India,1417173173
United States,338289857
Indonesia,275501339
Pakistan,235824862
Nigeria,218541212
Brazil,215313498
Bangladesh,171186372
Russia,144713314
Mexico,127504125


In [23]:
features = {'top_10_countries_pop_1970':top_10_countries_pop_1970,
            'top_10_countries_pop_2022':top_10_countries_pop_2022}

for feature_name , feature_data in features.items():
  year = feature_name.split('_')[-1]
  fig = px.bar(x = feature_data.index , y = feature_data.values ,
               text = feature_data.values , color = feature_data.values ,
               title = f'Top 10 Most Populated Countries ({year})' ,
               template = 'plotly_white')

  fig.update_layout(xaxis_title = 'Countries' , yaxis_title = 'Population Growth')
  fig.show()

In [24]:
data.head(5)

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [25]:
data_melted = data.melt(id_vars=['Continent'] , value_vars=['2022 Population' , '2020 Population' , '2015 Population' , '2010 Population' ,
                                                            '2000 Population' , '1990 Population' , '1980 Population' , '1970 Population'],
                        var_name='Year' , value_name='Population')
data_melted.head(10)

Unnamed: 0,Continent,Year,Population
0,Asia,2022 Population,41128771
1,Europe,2022 Population,2842321
2,Africa,2022 Population,44903225
3,Oceania,2022 Population,44273
4,Europe,2022 Population,79824
5,Africa,2022 Population,35588987
6,North America,2022 Population,15857
7,North America,2022 Population,93763
8,South America,2022 Population,45510318
9,Asia,2022 Population,2780469


In [26]:
data_melted['Year'] = data_melted['Year'].str.split().str[0].astype(int)
data_melted.head(10)

Unnamed: 0,Continent,Year,Population
0,Asia,2022,41128771
1,Europe,2022,2842321
2,Africa,2022,44903225
3,Oceania,2022,44273
4,Europe,2022,79824
5,Africa,2022,35588987
6,North America,2022,15857
7,North America,2022,93763
8,South America,2022,45510318
9,Asia,2022,2780469


In [27]:
population_by_continent = data_melted.groupby(['Continent' , 'Year']).sum().reset_index()
population_by_continent

Unnamed: 0,Continent,Year,Population
0,Africa,1970,365444348
1,Africa,1980,481536377
2,Africa,1990,638150629
3,Africa,2000,818946032
4,Africa,2010,1055228072
5,Africa,2015,1201102442
6,Africa,2020,1360671810
7,Africa,2022,1426730932
8,Asia,1970,2144906290
9,Asia,1980,2635334228


In [28]:
fig = px.line(population_by_continent , x = 'Year' , y = 'Population' , color = 'Continent' ,
              title = 'Population Trends By Continent Over Time' , labels = {'Population' : 'Population' , 'Year' : 'Year'},
              color_discrete_sequence=custom_palette)
fig.show()

fig.update_layout(template='plotly_white',xaxis_title='Year',yaxis_title='Population',
                  font_family='Arial',title_font_size=20)
fig.update_traces(line=dict(width=3))
fig.show()

In [29]:
sorted_data_growth = data.sort_values(by = 'Growth Rate' , ascending = False)

In [30]:
top_fastest = sorted_data_growth.head(6)
top_fastest

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
133,135,MDA,Moldova,Chisinau,Europe,3272996,3084847,3277388,3678186,4251573,4480199,4103240,3711140,33846,96.7026,1.0691,0.04
164,37,POL,Poland,Warsaw,Europe,39857145,38428366,38553146,38597353,38504431,38064255,35521429,32482943,312679,127.4698,1.0404,0.5
148,54,NER,Niger,Niamey,Africa,26207977,24333639,20128124,16647543,11622665,8370647,6173177,4669708,1267000,20.6851,1.0378,0.33
202,60,SYR,Syria,Damascus,Asia,22125249,20772595,19205178,22337563,16307654,12408996,8898954,6319199,185180,119.4797,1.0376,0.28
189,116,SVK,Slovakia,Bratislava,Europe,5643453,5456681,5424444,5396424,5376690,5261305,4973883,4522867,49037,115.0856,1.0359,0.07
55,15,COD,DR Congo,Kinshasa,Africa,99010212,92853164,78656904,66391257,48616317,35987541,26708686,20151733,2344858,42.2244,1.0325,1.24


In [31]:
top_slowest = sorted_data_growth.tail(6)
top_slowest

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
111,151,LVA,Latvia,Riga,Europe,1850651,1897052,1991955,2101530,2392530,2689391,2572037,2397414,64559,28.666,0.9876,0.02
117,141,LTU,Lithuania,Vilnius,Europe,2750055,2820267,2963765,3139019,3599637,3785847,3521206,3210147,65300,42.1142,0.9869,0.03
30,108,BGR,Bulgaria,Sofia,Europe,6781953,6979175,7309253,7592273,8097691,8767778,8980606,8582950,110879,61.1654,0.9849,0.09
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
112,119,LBN,Lebanon,Beirut,Asia,5489739,5662923,6398940,4995800,4320642,3593700,2963702,2381791,10452,525.2334,0.9816,0.07
218,38,UKR,Ukraine,Kiev,Europe,39701739,43909666,44982564,45683020,48879755,51589817,49973920,47279086,603500,65.7858,0.912,0.5


In [47]:
def plot_population_trends(data, countries):
    n_cols = 2
    n_rows = (len(countries) + n_cols - 1) // n_cols


    fig = sp.make_subplots(rows=n_rows, cols=n_cols, subplot_titles=countries,
                        horizontal_spacing=0.1, vertical_spacing=0.1)

    for i, country in enumerate(countries, start=1):
        country_data = data[data['Country/Territory'] == country]


        country_melted = country_data.melt(id_vars=['Country/Territory'],
                                           value_vars=['2022 Population', '2020 Population', '2015 Population',
                                                       '2010 Population', '2000 Population', '1990 Population',
                                                       '1980 Population', '1970 Population'],
                                           var_name='Year', value_name='Population')


        country_melted['Year'] = country_melted['Year'].str.split().str[0].astype(int)


        line_fig = px.line(country_melted, x='Year', y='Population', color='Country/Territory',
                           labels={'Population': 'Population', 'Year': 'Year'},
                           color_discrete_sequence=custom_palette)


        row = (i - 1) // n_cols + 1
        col = (i - 1) % n_cols + 1


        for trace in line_fig.data:
            fig.add_trace(trace, row=row, col=col)


    fig.update_layout(title='Population Trends of Selected Countries Over Time',
                      template='plotly_white',
                      font_family='Arial',
                      title_font_size=20,
                      showlegend=False,
                      height=600 * n_rows)

    fig.update_traces(line=dict(width=3))
    fig.update_xaxes(title_text='Year')
    fig.update_yaxes(title_text='Population')
    fig.show()

countries = ['Moldova', 'Poland', 'Niger', 'Syria', 'Slovakia', 'DR Congo']
plot_population_trends(data, countries)

countries = ['Latvia', 'Lithuania', 'Bulgaria', 'American Samoa','Lebanon', 'Ukraine']
plot_population_trends(data, countries)

In [37]:
fastest = top_fastest[['Country/Territory', 'Growth Rate']].sort_values(by='Growth Rate', ascending=False).reset_index(drop=True)
fastest

Unnamed: 0,Country/Territory,Growth Rate
0,Moldova,1.0691
1,Poland,1.0404
2,Niger,1.0378
3,Syria,1.0376
4,Slovakia,1.0359
5,DR Congo,1.0325


In [39]:
slowest = top_slowest[['Country/Territory', 'Growth Rate']].sort_values(by='Growth Rate', ascending=False).reset_index(drop=True)
slowest

Unnamed: 0,Country/Territory,Growth Rate
0,Latvia,0.9876
1,Lithuania,0.9869
2,Bulgaria,0.9849
3,American Samoa,0.9831
4,Lebanon,0.9816
5,Ukraine,0.912


In [40]:
data.head(5)

Unnamed: 0,Rank,CCA3,Country/Territory,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [54]:
land_by_country = data.groupby('Country/Territory')['Area (km²)'].sum().sort_values(ascending=False)
most_land = land_by_country.head(5)
least_land = land_by_country.tail(5)

In [69]:
fig = sp.make_subplots(rows = 1 , cols = 2 , subplot_titles = ("Country with most lands" , "Country with least lands"))
fig.add_trace(go.Bar(x = most_land.index , y = most_land.values , name = 'Most Land' , marker_color = custom_palette[0]) , row = 1 , col = 1)
fig.add_trace(go.Bar(x=least_land.index, y=least_land.values, name='Least Land', marker_color=custom_palette[1]), row=1, col=2)
fig.update_layout(title_text="Geographical Distribution of Land Area by Country",showlegend=False,template='plotly_white')
fig.update_yaxes(title_text="Area (km²)", row=1, col=1)
fig.update_yaxes(title_text="Area (km²)", row=1, col=2)
fig.show()

In [70]:
data['Area per Person']=data['Area (km²)'] / data['2022 Population']
country_area_per_person = data.groupby('Country/Territory')['Area per Person'].sum()
most_land_available = country_area_per_person.sort_values(ascending=False).head(5)
least_land_available = country_area_per_person.sort_values(ascending=False).tail(5)

In [71]:
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Countries with Most Land Available Per Capita", "Countries with Least Land Available Per Capita"))

fig.add_trace(go.Bar(x=most_land_available.index, y=most_land_available.values,
name='Most Land', marker_color=custom_palette[2]), row=1, col=1)

fig.add_trace(go.Bar(x=least_land_available.index, y=least_land_available.values,
name='Least Land', marker_color=custom_palette[3]), row=1, col=2)
fig.update_layout(title_text="Distribution of Available Land Area by Country Per Capita",showlegend=False,template='plotly_white')

fig.update_yaxes(title_text="Land Available Per Person", row=1, col=1)
fig.update_yaxes(title_text="Land Available Per Person", row=1, col=2)

fig.show()