# Migration Analysis in East Africa
***
<img src="https://images.unsplash.com/photo-1587290538095-53583b6753e5?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxzZWFyY2h8Mnx8TWlncmF0aW9ufGVufDB8fDB8fHww&auto=format&fit=crop&w=600&q=600" alt="crowded street" style="width:500px;">

<a id="WebScraping"><h2>Getting Data</h2></a>
***
<a id="ScraperImport"><h3>Importing Libraries</h3></a>

In [81]:

import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.precision", 2)

# Data Wrapper
import wbdata

# visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

# styling
%matplotlib inline
sns.set_theme(style="dark")
mpl.rcParams['axes.unicode_minus'] = False
pd.set_option('display.max_columns',None)
plt.style.use('seaborn-dark-palette')
plt.style.use('dark_background')

<a id="indicatorspecific"><h3>Import Data from WBDATA API</h3></a>

In [95]:
# Define the list of indicators
indicators = {
    "SM.POP.NETM": "Net migration",
    "SM.POP.TOTL":"International migrant stock, total",
    "SM.POP.TOTL.ZS":"International migrant stock (% of population)",
}

# Define the list of countries
country_codes = ["SOM", "KEN", "ETH", "SDN", "SSD", "ERI", "DJI", "RWA", "UGA", "BDI", "TZA"]

# Get the data as a pandas DataFrame
df=wbdata.get_dataframe(indicators,country=country_codes,convert_date=True, keep_levels=True)

# Reset the index and rename the columns
df = df.reset_index().rename(columns={"country": "Country", "date": "Year"})

df.to_csv('east_africa_mig_data.csv',index=False, header=True)
# Display the DataFrame
print(df)

     Country       Year  Net migration  International migrant stock, total  \
0    Burundi 2022-01-01            NaN                                 NaN   
1    Burundi 2021-01-01       -14415.0                                 NaN   
2    Burundi 2020-01-01       -10186.0                                 NaN   
3    Burundi 2019-01-01        14241.0                                 NaN   
4    Burundi 2018-01-01        68145.0                                 NaN   
..       ...        ...            ...                                 ...   
688   Uganda 1964-01-01        -3172.0                                 NaN   
689   Uganda 1963-01-01        -3057.0                                 NaN   
690   Uganda 1962-01-01        -6470.0                                 NaN   
691   Uganda 1961-01-01        -7893.0                                 NaN   
692   Uganda 1960-01-01        -7586.0                            771730.0   

     International migrant stock (% of population)  
0         

<a id="EDA"><h2>Exploratory Data Analysis</h2></a>
***
<a id="structure"><h3>Checking Data Structure</h3></a>

In [97]:
df=pd.read_csv('east_africa_mig_data.csv')
df.rename(columns={'Country Name':'Country'},inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 693 entries, 0 to 692
Data columns (total 5 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Country                                        693 non-null    object 
 1   Year                                           693 non-null    object 
 2   Net migration                                  682 non-null    float64
 3   International migrant stock, total             122 non-null    float64
 4   International migrant stock (% of population)  57 non-null     float64
dtypes: float64(3), object(2)
memory usage: 27.2+ KB


In [98]:
print(df.shape)
df.tail(3)

(693, 5)


Unnamed: 0,Country,Year,Net migration,"International migrant stock, total",International migrant stock (% of population)
690,Uganda,1962-01-01,-6470.0,,
691,Uganda,1961-01-01,-7893.0,,
692,Uganda,1960-01-01,-7586.0,771730.0,


In [99]:
# Separate 'World' data and calculate mean for East African Countries
world_data = df[df['Country'] == 'World'].round(2)
east_africa_countries_data = df[df['Country'] != 'World']
world_data.columns = [f"{col} (World)" if col != 'Year' else col for col in world_data.columns]
world_data

Unnamed: 0,Country (World),Year,Net migration (World),"International migrant stock, total (World)",International migrant stock (% of population) (World)


<a id="structure"><h3> Data Summary</h3></a>

In [100]:
# Drop the 'Country' column and calculate summary statistics
summary_df = df.drop(columns=['Country']).describe().transpose()

# Rename the columns for better clarity
summary_df.rename(columns={'mean': 'Average', 'min': 'Minimum', 'max': 'Maximum'}, inplace=False)

# Transpose the summary DataFrame to have statistics as rows
summary_df = summary_df.transpose()

# Display the summary DataFrame
print(summary_df)

       Net migration  International migrant stock, total  \
count       6.82e+02                            1.22e+02   
mean       -1.56e+04                            3.78e+05   
std         1.58e+05                            3.58e+05   
min        -2.16e+06                            7.70e+03   
25%        -4.03e+04                            6.13e+04   
50%        -1.76e+02                            2.83e+05   
75%         9.95e+03                            6.02e+05   
max         1.37e+06                            1.61e+06   

       International migrant stock (% of population)  
count                                          57.00  
mean                                            3.47  
std                                             4.23  
min                                             0.23  
25%                                             0.92  
50%                                             2.25  
75%                                             3.80  
max                

<a id="structure"><h3>Functions for Plots</h3></a>

In [128]:
import numpy as np
import plotly.graph_objs as go

def create_line_chart_static(df_grouped, title, columns,colors):
    # Create a trace for each variable
    traces = []
    variable = columns[0]
    category = columns[1]
  
    unique_countries = df_grouped["Country"].unique()
    frames = []

    for i, country in enumerate(unique_countries):
        color = colors[i % len(colors)]  # Cycle through colors for each country
        trace = go.Scatter(
            x=df_grouped[df_grouped["Country"] == country]['Year'],
            y=df_grouped[df_grouped["Country"] == country][variable],
            mode='lines+markers',
            name=country,
            line=dict(color=color)
        )
        traces.append(trace)


    # Create the layout
    layout = {
        'autosize': True,
        'width': 900,
        'height': 700,
        'title': title,
        'xaxis': {'title': 'Year', 'showgrid': False},
        'yaxis': {'title': 'People', 'showgrid': True},
        'plot_bgcolor': '#f8f8f8',
        'paper_bgcolor': '#f8f8f8',
        'font': {'family': 'Poppins', 'size': 12, 'color': 'black'},
        'legend': {'x': 0.05, 'y': 0.5, 'bgcolor': 'rgba(255, 255, 255, 0.8)', 'bordercolor': 'rgba(0, 0, 0, 0.2)', 'borderwidth': 1},
        'margin': {'l': 50, 'r': 50, 'b': 100, 't': 100, 'pad': 4},
    }

    # Create the figure
    fig = go.Figure(data=traces, layout=layout)

    # Add animation frames
    fig.update(frames=frames)



    
    # Apply Style
    fig.update_layout(
         annotations=[
            {
                'x': 0,
                'y': 0.1,
                'xref': 'paper',
                'yref': 'paper',
                'xanchor': 'left',
                'yanchor': 'top',
                'text': "Copyright © 2023 Najib Abdullahi",
                'showarrow': False,
                'font': dict(size=10, color="#1100FF"),
                'align': "left",
            },
            {
                'x': 1.06,
                'y': 0.09,
                'xref': 'paper',
                'yref': 'paper',
                'xanchor': 'right',
                'yanchor': 'top',
                'text': "Source: World Bank",
                'showarrow': False,
                'font': dict(size=10, color="#1100FF"),
                'align': "right",
            }
        ],
        font=dict(family='Poppins'),
        title_font=dict(size=24, color='#000000'),
        legend_font=dict(size=12),
        legend_itemclick='toggle',
        legend_itemdoubleclick='toggleothers',
        legend_traceorder='normal',
        legend_tracegroupgap=10,
        legend_bordercolor='#000000',
        legend_borderwidth=1,
        legend_bgcolor='#ffffff',
        legend_xanchor='left',
        legend_yanchor='top',
        legend_x=1,
        legend_y=1.05,
        margin=dict(l=50, r=50, t=80, b=50),
    )
    fig.update_yaxes(gridcolor='#c9c1b8')
    
   

    # Display the plot
    fig.show()

In [168]:
def create_bar_chart_static(df, y_column, x_columns, color_sequence):
    total_net_migration = df

    # Create traces
    traces = [go.Bar(
        x=total_net_migration['Country'],
        y=total_net_migration['Net migration'],
        marker_color=color_sequence
    )]

    # Create the layout
    title = 'Total Net Migration by Country'
    layout = {
        'autosize': True,
        'width': 900,
        'height': 700,
        'title': title,
        'xaxis': {'title': 'Country', 'showgrid': False},
        'yaxis': {'title': 'Total Net Migration', 'showgrid': True},
        'plot_bgcolor': '#f8f8f8',
        'paper_bgcolor': '#f8f8f8',
        'font': {'family': 'Poppins', 'size': 12, 'color': 'black'},
        'legend': {'x': 0.05, 'y': 0.5, 'bgcolor': 'rgba(255, 255, 255, 0.8)', 'bordercolor': 'rgba(0, 0, 0, 0.2)', 'borderwidth': 1},
        'margin': {'l': 50, 'r': 50, 'b': 100, 't': 100, 'pad': 4},
    }
   # Create the layout
    layout = {
        'autosize': True,
        'width': 900,
        'height': 700,
        'title': title,
        'xaxis': {'title': 'Year', 'showgrid': False},
        'yaxis': {'title': 'People', 'showgrid': True},
        'plot_bgcolor': '#f8f8f8',
        'paper_bgcolor': '#f8f8f8',
        'font': {'family': 'Poppins', 'size': 12, 'color': 'black'},
        'legend': {'x': 0.05, 'y': 0.5, 'bgcolor': 'rgba(255, 255, 255, 0.8)', 'bordercolor': 'rgba(0, 0, 0, 0.2)', 'borderwidth': 1},
        'margin': {'l': 50, 'r': 50, 'b': 100, 't': 100, 'pad': 4},
    }
    
    # Create the figure
    fig = go.Figure(data=traces, layout=layout)

     # Apply Style
    fig.update_layout(
        annotations=[
            {
                'x': 0,
                'y': 0.1,
                'xref': 'paper',
                'yref': 'paper',
                'xanchor': 'left',
                'yanchor': 'top',
                'text': "Copyright © 2023 Najib Abdullahi",
                'showarrow': False,
                'font': dict(size=10, color="#1100FF"),
                'align': "left",
            },
            {
                'x': 1.06,
                'y': 0.09,
                'xref': 'paper',
                'yref': 'paper',
                'xanchor': 'right',
                'yanchor': 'top',
                'text': "Source: World Bank",
                'showarrow': False,
                'font': dict(size=10, color="#1100FF"),
                'align': "right",
            }
        ],
        font=dict(family='Poppins'),
        title_font=dict(size=24, color='#000000'),
        legend_font=dict(size=12),
        legend_itemclick='toggle',
        legend_itemdoubleclick='toggleothers',
        legend_traceorder='normal',
        legend_tracegroupgap=10,
        legend_bordercolor='#000000',
        legend_borderwidth=1,
        legend_bgcolor='#ffffff',
        legend_xanchor='left',
        legend_yanchor='top',
        legend_x=1,
        legend_y=1.05,
        margin=dict(l=50, r=50, t=80, b=50),
    )
    fig.update_yaxes(gridcolor='#c9c1b8')

    fig.show()

In [149]:
colors = ['#C10000', '#d39b81','#0476D9','#75A3BF','#F2668B','#025E73','#011F26','#026873','#03A688','#D4C2AD','#66796B','#BB0000']
columns = ['Year','Net migration', 'Country']
df_selected = east_africa_countries_data[columns]
columns=df_selected.columns
create_line_chart_static(df_selected,'Net migration',columns[1:],colors)

In [169]:
colors = ['#C10000', '#d39b81','#0476D9','#75A3BF','#F2668B','#025E73','#011F26','#026873','#03A688','#D4C2AD','#66796B','#BB0000']
columns = ['Year','Net migration', 'Country']
df_selected = east_africa_countries_data[columns]
columns=df_selected.columns
# Combine total net migration for each country
total_net_migration = df_selected.groupby('Country')['Net migration'].sum().reset_index().round(2)

create_bar_chart_static(total_net_migration,'Net migration',columns[1:],colors)

In [110]:
colors = ['#C10000', '#d39b81','#0476D9','#75A3BF','#F2668B','#025E73','#011F26','#026873','#03A688','#D4C2AD','#66796B','#BB0000']
columns = ['Year','International migrant stock, total', 'Country']
df_selected = east_africa_countries_data[columns]
df_selected['Year']=pd.to_datetime(df_selected['Year'])
df_selected=df_selected[df_selected['Year']<='2016']
columns=df_selected.columns
create_line_chart_static(df_selected,'International migrant stock',columns[1:],colors)

In [114]:
colors = ['#C10000', '#d39b81','#0476D9','#75A3BF','#F2668B','#025E73','#011F26','#026873','#03A688','#D4C2AD','#66796B','#BB0000']
columns = ['Year','International migrant stock (% of population)', 'Country']
df_selected = east_africa_countries_data[columns]
df_selected['Year']=pd.to_datetime(df_selected['Year'])
df_selected=df_selected[(df_selected['Year']>='1989') & (df_selected['Year']<='2016')]
columns=df_selected.columns
create_line_chart_static(df_selected,'International migrant stock (% of population)',columns[1:],colors)