In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


In [None]:
# !pip install wbdata --quiet


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import wbdata
import pandas as pd
# Define the indicator for population (SP.POP.TOTL)
indicators = {'SP.POP.TOTL':'Population, total'}
# Define the countries
countries = ['PK', 'IND', 'BGD', 'AFG', 'IRN', 'CHN', 'NPL', 'LKA', 'BTN', 'MMR']

# Download the data
df = wbdata.get_dataframe(indicators, country=countries)
# Reset index to convert it into a DataFrame
df.reset_index(inplace=True)
df.rename(columns={'country':'Country', "date": "Year"}, inplace=True)
# make sure that year should be numerric value
df['Year'] = pd.to_numeric(df['Year'])

# Filter data between 1960 and 2023
df = df[(df['Year'] >= 1960) & (df['Year'] <= 2023)]

df.head()



Unnamed: 0,Country,Year,"Population, total"
0,Afghanistan,2023,42239854.0
1,Afghanistan,2022,41128771.0
2,Afghanistan,2021,40099462.0
3,Afghanistan,2020,38972230.0
4,Afghanistan,2019,37769499.0


# We will make a dataset only using API of WB.

In [25]:
import wbdata
import pandas as pd
# Define the indicator for population (SP.POP.TOTL)
indicator = {
    'SP.POP.TOTL': 'total_population', # Population 
    'SM.POP.NETM': 'net_migration'      # Net Migration
             }
countries = ['PK', 'IND', 'BGD', 'AFG', 'IRN', 'CHN', 'NPL', 'LKA', 'BTN', 'MMR']

# Fetch data
data = wbdata.get_dataframe(indicator, country=countries)

# Reset index to convert it into a DataFrame
data.reset_index(inplace=True)

# Rename columns for clarity
data.rename(columns={'country': 'Country', 'date': 'Year'}, inplace=True)

# Ensure Year column is numeric
data['Year'] = pd.to_numeric(data['Year'])

# Filter data between 1960 and 2023
data = data[(data['Year'] >= 1960) & (data['Year'] <= 2023)]

# Save to a CSV file (optional)
data.to_csv("D:\Drive D\One drive folder\OneDrive - Higher Education Commission\Drive G\Data science coding\Data sets\migration_data.csv", index=False)

# Display the first few rows of data
print(data.head())

       Country  Year  total_population  net_migration
0  Afghanistan  2023        42239854.0       -65846.0
1  Afghanistan  2022        41128771.0       -65846.0
2  Afghanistan  2021        40099462.0      -183672.0
3  Afghanistan  2020        38972230.0       166821.0
4  Afghanistan  2019        37769499.0        -8082.0


In [29]:
data.info()

<class 'wbdata.client.DataFrame'>
RangeIndex: 640 entries, 0 to 639
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country           640 non-null    object 
 1   Year              640 non-null    int64  
 2   total_population  640 non-null    float64
 3   net_migration     640 non-null    float64
dtypes: float64(2), int64(1), object(1)
memory usage: 20.1+ KB


In [30]:
df=data.copy()

In [31]:
df['Country'].unique()

array(['Afghanistan', 'Bangladesh', 'Bhutan', 'China', 'India',
       'Iran, Islamic Rep.', 'Sri Lanka', 'Myanmar', 'Nepal', 'Pakistan'],
      dtype=object)

In [32]:
df[['total_population', 'net_migration']].describe()

Unnamed: 0,total_population,net_migration
count,640.0,640.0
mean,241496800.0,-117657.0
std,409977200.0,353267.9
min,221266.0,-2290411.0
25%,17747730.0,-184523.8
50%,45289980.0,-44308.5
75%,156428500.0,2700.0
max,1428628000.0,1834556.0


In [35]:
# plotly for histogram
fig = px.histogram(df, x='total_population', nbins=50, title='Total Population Distribution')
fig.show()

In [36]:
# plotly for histogram
fig = px.histogram(df, x='net_migration', 
                   nbins=50, 
                   title='Population Distribution')
fig.show()

In [40]:
# line plot by taking year on x-axis and total population on y-axis
fig = px.line(df, x='Year', y='total_population', color='Country', title='Total Population Over Time')

# figure size
fig.update_layout(width=800, height=400)
fig.show()

In [41]:
# line plot by taking year on x-axis and total population on y-axis
fig = px.line(df, x='Year', y='total_population', color='Country', log_y=True, title='Total Population Over Time')

# figure size
fig.update_layout(width=800, height=400)
fig.show()

In [42]:
# line plot by taking year on x-axis and total population on y-axis
fig = px.line(df, x='Year', 
              y='total_population', 
              color='Country', 
                log_y=True,
                facet_row='Country',
              title='Total Population Over Time')

# figure size
fig.update_layout(width=800, height=400)
fig.show()

In [43]:
fig = px.line(df, x='Year', 
              y='total_population', 
              color='Country', 
                log_y=True,
                facet_col='Country',
              title='Total Population Over Time')

# figure size
fig.update_layout(width=800, height=400)
fig.show()

In [50]:
from plotly.subplots import make_subplots

# Calculate the number of rows needed
num_countries = len(df['Country'].unique())
num_cols = 2
num_rows = (num_countries // num_cols) + (num_countries % num_cols > 0)

# Create subplots
fig = make_subplots(rows=num_rows, cols=num_cols, 
                    subplot_titles=df['Country'].unique(),
                    shared_xaxes=True, 
                    shared_yaxes=True)

# Add traces for each country
countries = df['Country'].unique()
for i, country in enumerate(countries):
    row = i // num_cols + 1
    col = i % num_cols + 1
    country_data = df[df['Country'] == country]
    fig.add_trace(px.line(country_data, x='Year', y='total_population').data[0], row=row, col=col)

# Update layout
fig.update_layout(height=1200, width=800, title_text="Total Population Over Time")

# Show plot
fig.show()

In [51]:
# scatter plot using plotly express
fig = px.scatter(df, x='total_population', 
                 y='net_migration', 
                 color='Country', 
                 title='Population vs. Net Migration')
fig.show()

In [53]:
df.sort_values("Year", inplace=True)
# scatter plot using plotly express
fig = px.scatter(df, x='total_population', 
                 y='net_migration', 
                 color='Country', 
                 animation_frame='Year',
                 size='total_population',
                 title='Population vs. Net Migration')
fig.show()

In [54]:
# sort data by year
df.sort_values('Year', inplace=True)
# Line plot
fig = px.line(df, x='Year', 
              y='net_migration', 
              color='Country', 
            #   animation_frame='Year',
              title='Net Migration Over Time')
# figure size
fig.update_layout(width=800, height=400)

In [None]:
top_countries_population=df.groupby('Country')['total_population'].sum().sort_values(ascending=False)
top_countries_population

Country
China                 7.098402e+10
India                 5.862843e+10
Pakistan              8.221178e+09
Bangladesh            7.072180e+09
Iran, Islamic Rep.    3.498892e+09
Myanmar               2.535070e+09
Nepal                 1.301186e+09
Afghanistan           1.202076e+09
Sri Lanka             1.081652e+09
Bhutan                3.325062e+07
Name: total_population, dtype: float64

In [62]:
top_countries=df.groupby('Country')['net_migration'].sum().sort_values(ascending=False)
top_countries

Country
Iran, Islamic Rep.     6246124.0
Bhutan                   -7684.0
Afghanistan           -2570326.0
Sri Lanka             -3077939.0
Myanmar               -3739778.0
Nepal                 -4840949.0
India                -11247041.0
China                -17070233.0
Pakistan             -18487914.0
Bangladesh           -20504716.0
Name: net_migration, dtype: float64

In [64]:
px.bar(top_countries, x=top_countries.index, y=top_countries.values, title='Net migration by Country')

In [68]:
# draw the plot to see the population of the countries in 2023 only
fig = px.bar(df[df['Year'] == 2023], 
             x='Country', 
             y='total_population', 
             color='Country',
             title='Population of Countries in 2023')
fig.show()

In [74]:
fig = px.line(df, x='Year', 
                 y='total_population', 
                 color='Country', 
                 title="Population")
fig.show()