# Asylum seekers: During interantional migration crisis


In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dash import Dash, dcc, html, Input, Output

![darien](img\barth-bailey-d2uHXWTkGn4-unsplash.jpg)

# Table of contents:
* [Data sources](#data_sources)
* [Total asylum seekers over the years](#total_asylum)
* [Migration crisis](#migration_crisis)
* [Country of origin](#country_of_origin)

# Data sources: <a class="anchor" id="data_sources">
<img src="img/unhcr-logo-png-transparent.png" width="400" height="340"><br>
<img src="img/The_World_Bank_logo.svg" width="500" height="100">

# Reason of the analysis:
This analysis tries to describe how people are migrating worldwide, showing the country of origin, country of destination, etc.


## Import data

In [3]:
asylum = pd.read_csv('.\\Data\\Clean\\Asylum_data.csv')
population = pd.read_csv(".\\Data\\Clean\\Population_data.csv")

# More than 100,000,000 people have seek asylum


In [4]:
print(format(asylum['count'].sum(), ",d"))

108,224,695


## Asylum seekers over the years <a class="anchor" id="total_asylum">

In [5]:
def Peak_finder(data: pd.DataFrame):
    percentaje_to_check = 0.5
    previous_value = data['count'][0]
    previous_year = data['year'][0]

    inside_peak = False
    current_highlight = {}
    # Start of the highlight:
    for index, row in data.iterrows():
        if not inside_peak:
            if previous_value > 1000:
                if (row['count'] - previous_value) > (previous_value * percentaje_to_check):
                    current_highlight = {'start': previous_year, 'end': 0}
                    inside_peak = True
        else:
            if (row['count']) <= (previous_value):
                current_highlight['end'] = row['year']
                yield current_highlight
                inside_peak = False
                current_highlight = {}
        previous_value = row['count']
        previous_year = row['year']


In [6]:
custom_for_template = asylum.groupby(['country_of_origin_name', 'year']).agg({'count': 'sum'}).reset_index().sort_values('count', ascending=False)
custom_for_template = custom_for_template.groupby('year')

# "By default the group keys are sorted during the groupby operation." Pandas docs https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html
extra_hover_text = [
        f"<b>Year: {year}<br>" +
        f"Total: {countries["count"].sum():,}<br><br>" +
        f"Top three origin countries:</b><br>" +
        f"{countries.iloc[0]['country_of_origin_name']}: {countries.iloc[0]['count']:,}<br>" +
        f"{countries.iloc[1]['country_of_origin_name']}: {countries.iloc[1]['count']:,}<br>" +
        f"{countries.iloc[2]['country_of_origin_name']}: {countries.iloc[2]['count']:,}<br>" +
        f"Other: {countries[3:]['count'].sum():,}"
     for year, countries in custom_for_template
]


In [7]:
# Plot the timelime
## Data
timeline = asylum.groupby('year').agg({'count': 'sum'}).reset_index()
custom_for_template = asylum.groupby(['country_of_origin_name', 'year']).agg({'count': 'sum'}).reset_index()
## Setting trace
trace = go.Scatter(x=timeline['year'], y=timeline['count'])
fig = go.Figure(trace)



for peak in Peak_finder(timeline):
    fig.add_shape(type="rect",
                x0=peak['start'], y0=0, x1=peak['end'], y1=10300000,
                fillcolor="tomato", opacity=0.5,
                layer="below", line_width=0)


fig.update_traces(
    customdata=extra_hover_text,
    hovertemplate="%{customdata}"

)

## Defining title
fig.update_layout(
    title='Total asylumn seeker population over the years (highlighted migration crisis)',
    xaxis={'title': {'text': "Years"}, 'showgrid':False},
    yaxis={'title': {'text': 'Asylum Seekers'}, 'rangemode': 'tozero', 'showgrid':False}
)

fig.show()

# Closer look at migration crisis <a class="anchor" id="migration_crisis">

In [8]:
def Specific_Timeframe_Data(df: pd.DataFrame, start_year: int, end_year: int) -> pd.DataFrame:
    YEAR_COLUMN_NAME = 'year'
    return df[(start_year <= df[YEAR_COLUMN_NAME]) & (df[YEAR_COLUMN_NAME] <= end_year)]

def yearly_data_asylum(df: pd.DataFrame) -> pd.DataFrame:
    return df.groupby(['country_of_origin_name', 'year']).agg({'count': 'sum'}).reset_index()

def only_the_top_for_year(df: pd.DataFrame) -> pd.DataFrame:
    """
        Compares the all the countries and change the name of the country to 'Other' if its less than the constant minimum value 

        Args:
            df (pandas.DataFrame): the DataFrame to work on

        Returns:
            pandas.DataFrame with the values changed.
    """
    MINIMUN_PARTICIPATION = 0.05
    years = df['year'].unique().tolist()

    for year in years:
        yearly_df = df[df['year'] == year]
        total_asylums = yearly_df['count'].sum()
        countries = yearly_df['country_of_origin_name'].unique().tolist()

        for country in countries:
            asylum_per_country = yearly_df[yearly_df['country_of_origin_name'] == country]['count'].sum()
            if (asylum_per_country / total_asylums) < MINIMUN_PARTICIPATION:
                df.loc[(df['country_of_origin_name'] == country) & (df['year'] == year), 'country_of_origin_name'] = 'Other'
    
    return df

def Top_four_countries(df: pd.DataFrame) -> pd.DataFrame:
    years = df['year'].unique().tolist()

    for year in years:
        yearly_df = df[df['year'] == year]
        countries = yearly_df['country_of_origin_name'].unique().tolist()
        yearly_df = yearly_df.groupby('country_of_origin_name').agg({'count': 'sum'})
        top_4 = yearly_df.sort_values('count', ascending=False).head(4).reset_index()

        for country in countries:
            if not top_4['country_of_origin_name'].isin([country]).any():
                df.loc[(df['country_of_origin_name'] == country) & (df['year'] == year), 'country_of_origin_name'] = 'Other'
        df.loc[~df['country_of_origin_name'].isin(top_4['country_of_origin_name']), 'country_of_origin_name'] = 'Other'
    return df
            
def wrap(df: pd.DataFrame, start_year: int, end_year: int) -> pd.DataFrame:
    result = Specific_Timeframe_Data(df, start_year, end_year)
    result = only_the_top_for_year(result)
    # result = Top_four_countries(result)
    result = yearly_data_asylum(result)
    return result


In [9]:
app = Dash(__name__)

migration_crisis = list(Peak_finder(timeline))
options = [f"{years['start']}-{years['end']}" for years in migration_crisis]
app.layout = html.Div([
    html.H2('Migration crisis'),
    html.P('Select period:'),
    dcc.Dropdown(
        id="dropdown",
        options=options,
        value=options[0],
        clearable=False,
    ),
    dcc.Graph(id="graph"),
    
], style={'backgroundColor':'white'})

@app.callback(
    Output("graph", "figure"),
    Input("dropdown", "value"))

def update_bar_chart(years):
    years = years.split('-')
    fig = go.Figure()
    period_data = wrap(asylum, int(years[0]), int(years[1])).sort_values('count')
    g = period_data.groupby('country_of_origin_name')
    for country, data in g:
        custom = [
        f"<b>{line['country_of_origin_name']}</b><br>"
        f"Total: {line['count']:,}<br>" +
        f"<b>{(line['count'] / (period_data[period_data['year'] == line['year']]['count'].sum())):%}</b> of the year asylum seeker population"
        for index, line in data.iterrows()
        ]
        fig.add_trace(
            go.Bar(name=country, x=data['year'], y=data['count'], customdata=custom, hovertemplate="%{customdata}"))
        
    fig.update_layout(barmode='stack')
    fig.update_layout(
    title='Total asylum seekers by country of origin (countries that represent less than 5% are group in "Other")',
    xaxis={'title': {'text': "Years"}, 'showgrid':False},
    yaxis={'title': {'text': 'Asylum Seekers'}}
)
    return fig



app.run()

# Countries origin of the most asylum seekers <a class="anchor" id="country_of_origin">

In [10]:
as_by_country = asylum.groupby('country_of_origin_name').agg({'count' : 'sum'})
as_by_country = as_by_country.sort_values('count', ascending=True).reset_index()
as_by_country = as_by_country[as_by_country['count'] > 100000]
fig = px.bar(as_by_country, y='country_of_origin_name', x='count', orientation='h')
# TODO: Log scale
fig.update_layout(height=2000)
fig.show()