# Asylum seekers: During interantional migration crisis


In [13]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import nbformat

![title](img/barth-bailey-d2uHXWTkGn4-unsplash.jpg)

# Reason of the analysis:
This analysis tries to describe how people are migrating worldwide, showing the country of origin, country of destination, etc.


## Import data

In [14]:
asylum = pd.read_csv('.\\Data\\Clean\\Asylum_data.csv')
population = pd.read_csv(".\\Data\\Clean\\Population_data.csv")

# More than 100,000,000 people have seek asylum


In [15]:
print(format(asylum['count'].sum(), ",d"))

108,224,695


## How does this asylum seekers look over the years??

In [16]:
def Peak_finder(data: pd.DataFrame):
    percentaje_to_check = 0.5
    previous_value = data['count'][0]
    previous_year = data['year'][0]

    inside_peak = False
    current_highlight = {}
    # Start of the highlight:
    for index, row in data.iterrows():
        if not inside_peak:
            # can be a small function call peak finder:
            if (row['count'] - previous_value) > (previous_value * percentaje_to_check):
                current_highlight = {'start': previous_year, 'end': 0}
                inside_peak = True
        else:
            if (row['count']) <= (previous_value):
                current_highlight['end'] = row['year']
                yield current_highlight
                inside_peak = False
                current_highlight = {}
        previous_value = row['count']
        previous_year = row['year']


In [21]:
custom_for_template = asylum.groupby(['country_of_origin_name', 'year']).agg({'count': 'sum'}).reset_index().sort_values('count', ascending=False)
# asylum
custom_for_template = custom_for_template.groupby('year')
# ye = custom_for_template.get_group(1965).sort_values('count', ascending=False)
# ye[3:]["count"].sum()
# "By default the group keys are sorted during the groupby operation." Pandas docs https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html
extra_hover_text = [
    
        "<br>" +
        f"{countries.iloc[0]['country_of_origin_name']}: {countries.iloc[0]['count']}<br>" +
        f"{countries.iloc[1]['country_of_origin_name']}: {countries.iloc[1]['count']}<br>" +
        f"{countries.iloc[2]['country_of_origin_name']}: {countries.iloc[2]['count']}<br>" +
        f"Other: {countries[3:]['count'].sum()}"

     for year, countries in custom_for_template
]
# print(extra_hover_text)
for x in extra_hover_text:
    print(x)

<br>Angola: 20277<br>Iraq: 17<br>Dem. Rep. of the Congo: 13<br>Other: 46
<br>Sudan: 13000<br>Rwanda: 7000<br>Angola: 5000<br>Other: 270
<br>Congo, Republic of: 58700<br>Rwanda: 38300<br>Angola: 25000<br>Other: 41790
<br>Sudan: 61700<br>Guinea-Bissau: 20000<br>Angola: 20000<br>Other: 33570
<br>Congo, Republic of: 45000<br>Angola: 34800<br>Sudan: 32500<br>Other: 31510
<br>Ethiopia: 28600<br>Sudan: 13170<br>Angola: 12400<br>Other: 14010
<br>Sudan: 15510<br>Angola: 15060<br>Zambia: 15000<br>Other: 16930
<br>Angola: 20480<br>Guinea-Bissau: 11050<br>Sudan: 9500<br>Other: 11370
<br>Ethiopia: 23364<br>Mozambique: 16720<br>Unknown/other: 7699<br>Other: 24730
<br>Guinea-Bissau: 13000<br>Sudan: 5901<br>Unknown/other: 5695<br>Other: 16802
<br>Burundi: 129037<br>Malawi: 20005<br>Angola: 8955<br>Other: 34567
<br>Unknown/other: 15427<br>Rwanda: 10380<br>Angola: 5780<br>Other: 23776
<br>Angola: 52800<br>Unknown/other: 30597<br>Rwanda: 11140<br>Other: 21739
<br>Lao People's Dem. Rep.: 57070<br>Ethiopia

In [23]:
# Plot the timelime
## Data
timeline = asylum.groupby('year').agg({'count': 'sum'}).reset_index()
custom_for_template = asylum.groupby(['country_of_origin_name', 'year']).agg({'count': 'sum'}).reset_index()
## Setting trace
trace = go.Scatter(x=timeline['year'], y=timeline['count'])
fig = go.Figure(trace)



for peak in Peak_finder(timeline):
    fig.add_shape(type="rect",
                x0=peak['start'], y0=0, x1=peak['end'], y1=10000000,
                fillcolor="tomato", opacity=0.5,
                layer="below", line_width=0)


fig.update_traces(
    customdata=extra_hover_text,
    hovertemplate="Year: %{x}<br>Total: %{y}<br>%{customdata}"

)

## Defining title
fig.update_layout(
    title='Total asylumn seeker population over the years',
    xaxis={'title': {'text': "Years"}},
    yaxis={'title': {'text': 'Asylum Seekers'}, 'rangemode': 'tozero'},

)

fig.show()

# Adding country population

In [None]:
def Country_population_data(country_code: str, population_data: pd.DataFrame, asylum_data: pd.DataFrame) -> pd.DataFrame:
    country_population = population_data[population_data["Country Code"] == country_code].drop(columns=["Country Name", "Country Code"]).transpose().reset_index()
    country_population.columns = ['year', 'population']
    country_population['year'] = country_population['year'].astype(int)

    country = asylum_data[asylum_data["country_of_origin_abbr"] == country_code].groupby("year", as_index=False)["count"].sum()
    
    country = country.merge(right= country_population, how='left', on='year')
    country.drop(country[country['population'].isna()].index, inplace=True)
    country["percentage_of_population_migration"] = (country["count"] / country["population"]) * 100

    country.rename(columns={'count': "displaced"}, inplace=True)
    country["country_of_origin_abbr"] = country_code
    return country

def Get_country_population_df(pop, asy) -> pd.DataFrame:
    countries = asy["country_of_origin_abbr"].unique()
    countries_not_in_the_analysis = []
    full_data = pd.DataFrame()
    for country in countries:
        if country in pop['Country Code'].values:
            full_data = pd.concat([full_data, Country_population_data(country, population_data=pop, asylum_data=asy)])
        else:
            countries_not_in_the_analysis.append(asylum[asylum['country_of_origin_abbr'] == country]['country_of_origin_name'].iloc[0])
        
    print('These countries were remove from the asylum due to not appear in the WorldBank population dataset:')
    print(countries_not_in_the_analysis)
    return full_data
    

In [None]:
tt = Get_country_population_df(population, asylum)
tt[tt['country_of_origin_abbr'] == 'ITA']

These countries were remove from the asylum due to not appear in the WorldBank population dataset:
['Unknown/other', 'Stateless', 'State of Palestine', 'Western Sahara', 'French Guiana', 'Niue', 'Cook Islands', 'Guadeloupe', 'Holy See (the)', 'Anguilla', 'Bonaire, Sint Eustatius and Saba']


Unnamed: 0,year,displaced,population,percentage_of_population_migration,country_of_origin_abbr
0,1970,5,53821850.0,9e-06,ITA
1,1972,5,54381345.0,9e-06,ITA
2,1974,5,55110868.0,9e-06,ITA
3,1984,6,56576718.0,1.1e-05,ITA
4,1986,5,56596155.0,9e-06,ITA
5,1989,21,56671781.0,3.7e-05,ITA
6,1990,60,56719240.0,0.000106,ITA
7,1991,18,56758521.0,3.2e-05,ITA
8,1992,22,56797087.0,3.9e-05,ITA
9,1993,5,56831821.0,9e-06,ITA


In [None]:

for i, x in tt.groupby(['year']):
    print(x)
    

   year  displaced   population  percentage_of_population_migration  \
0  1962          5   11800771.0                        4.237011e-05   
0  1962      20277    5354310.0                        3.787043e-01   
0  1962         13    2907943.0                        4.470514e-04   
0  1962          5  665770000.0                        7.510101e-07   
0  1962          8    1113175.0                        7.186651e-04   
0  1962         13   16075724.0                        8.086728e-05   
0  1962         17    7413875.0                        2.292998e-04   
0  1962          5    3030091.0                        1.650115e-04   
0  1962         10    8919028.0                        1.121198e-04   

  country_of_origin_abbr  
0                    DZA  
0                    AGO  
0                    BDI  
0                    CHN  
0                    COG  
0                    COD  
0                    IRQ  
0                    SOM  
0                    SDN  
   year  displaced 

In [None]:
toprin = tt.groupby('country_of_origin_abbr').agg({"percentage_of_population_migration": "sum"}).reset_index()
toprin[toprin["country_of_origin_abbr"] == "UKR"]
toprin = toprin.sort_values('percentage_of_population_migration')

In [None]:

fig = px.scatter(toprin, x='country_of_origin_abbr', y='percentage_of_population_migration')
fig.show()

In [None]:
venezuela = Country_population_data("VEN", population, asylum)
venezuela

Unnamed: 0,year,displaced,population,percentage_of_population_migration,country_of_origin_abbr
0,1973,5,12463537.0,4e-05,VEN
1,1984,10,17008841.0,5.9e-05,VEN
2,1986,5,17923536.0,2.8e-05,VEN
3,1989,61,19348921.0,0.000315,VEN
4,1990,171,19827010.0,0.000862,VEN
5,1991,246,20304896.0,0.001212,VEN
6,1992,290,20780948.0,0.001396,VEN
7,1993,297,21254520.0,0.001397,VEN
8,1994,471,21725188.0,0.002168,VEN
9,1995,1084,22193821.0,0.004884,VEN


In [None]:
af = Country_population_data('AFG', population, asylum)
af.style.format(precision=4, thousands=".", decimal=",")


Unnamed: 0,year,displaced,population,percentage_of_population_migration,country_of_origin_abbr
0,1.972,5,"11.853.696,0000",0,AFG
1,1.976,15,"13.059.851,0000",1,AFG
2,1.977,36,"13.340.756,0000",3,AFG
3,1.978,141,"13.611.441,0000",10,AFG
4,1.979,602,"13.655.567,0000",44,AFG
5,1.98,1.308.405,"13.169.311,0000",99353,AFG
6,1.981,2.354.516,"11.937.581,0000",197236,AFG
7,1.982,515.423,"10.991.378,0000",46893,AFG
8,1.983,204.003,"10.917.982,0000",18685,AFG
9,1.984,108.450,"11.190.221,0000",9691,AFG


In [None]:
syria = Country_population_data('SYR', population, asylum)
syria

Unnamed: 0,year,displaced,population,percentage_of_population_migration,country_of_origin_abbr
0,1970,176,6380609.0,0.002758,SYR
1,1971,209,6598094.0,0.003168,SYR
2,1972,143,6826024.0,0.002095,SYR
3,1973,5,7060321.0,7.1e-05,SYR
4,1974,5,7302160.0,6.8e-05,SYR
5,1975,10,7554364.0,0.000132,SYR
6,1976,247,7817320.0,0.00316,SYR
7,1977,254,8090501.0,0.003139,SYR
8,1978,373,8372759.0,0.004455,SYR
9,1979,84,8661909.0,0.00097,SYR


In [None]:
ukrine = Country_population_data("UKR", population, asylum)
ukrine.style.format(precision=4, thousands=".", decimal=",")

Unnamed: 0,year,displaced,population,percentage_of_population_migration,country_of_origin_abbr
0,1.981,5,"50.189.281,0000",0,UKR
1,1.99,5,"52.054.092,0000",0,UKR
2,1.991,115,"52.170.961,0000",2,UKR
3,1.992,3.318,"52.323.671,0000",63,UKR
4,1.993,6.322,"52.350.126,0000",121,UKR
5,1.994,3.781,"52.075.998,0000",73,UKR
6,1.995,2.595,"51.665.539,0000",50,UKR
7,1.996,3.128,"51.226.109,0000",61,UKR
8,1.997,3.380,"50.786.885,0000",67,UKR
9,1.998,2.423,"50.379.731,0000",48,UKR


In [None]:
from plotly.subplots import make_subplots

fit = make_subplots(rows=1, cols=1)
fit.add_trace(go.Scatter(y=venezuela['percentage_of_population_migration'], x=venezuela["year"]))
fit.add_trace(go.Scatter(y=ukrine["percentage_of_population_migration"], x=ukrine["year"]))
fit.add_trace(go.Scatter(y=af["percentage_of_population_migration"], x=af["year"]))

fit.show()

# Adding population movement

In [None]:
asylum

Unnamed: 0,country_of_origin_abbr,country_of_origin_name,country_of_asylum_abbr,country_of_asylum_name,region_of_asylum,category,year,count
0,DZA,Algeria,MLT,Malta,Europe,Refugee,1962,5
1,AGO,Angola,COD,Dem. Rep. of the Congo,Southern Africa,Refugee,1962,20000
2,AGO,Angola,NAM,Namibia,Southern Africa,Refugee,1962,277
3,BDI,Burundi,NAM,Namibia,Southern Africa,Refugee,1962,13
4,CHN,China,NPL,Nepal,Asia and the Pacific,Refugee,1962,5
...,...,...,...,...,...,...,...,...
103303,ZWE,Zimbabwe,POL,Poland,Europe,Asylum-seekers,2024,9
103304,ZWE,Zimbabwe,ZAF,South Africa,Southern Africa,Asylum-seekers,2024,89
103305,ZWE,Zimbabwe,SWE,Sweden,Europe,Asylum-seekers,2024,5
103306,ZWE,Zimbabwe,USA,United States of America,Americas,Asylum-seekers,2024,641


In [None]:
def Country_by_destination(country_abbr: str, asy: pd.DataFrame) -> pd.DataFrame:
    country = asy[asy["country_of_origin_abbr"] == country_abbr]
    country_destination = country.groupby("country_of_asylum_name")["count"].sum().reset_index().sort_values(by="count", ascending=False)
    return country_destination

af_des = Country_by_destination("UKR", asylum)
af_des.head(20)

Unnamed: 0,country_of_asylum_name,count
68,Russian Federation,1797280
63,Poland,1273423
29,Germany,1232215
19,Czechia,586657
79,United Kingdom,249132
73,Spain,224870
38,Italy,194074
11,Bulgaria,187758
66,Rep. of Moldova,169543
67,Romania,168063


In [None]:
asylum["category"].unique()

array(['Refugee', 'Asylum-seekers', 'People in refugee-like situation',
       'Other people in need of international protection'], dtype=object)