In [2]:
#Import dependencies.
import pandas as pd
import os
import csv
import geopandas as gpd
from shapely.geometry import Point

# Population Data Cleaning.

In [3]:
#Read dataframe.
conflict=pd.read_csv("Resources/cleaned conflict.csv",header=0)
demographics=pd.read_csv("Resources/demographics.csv",header=0)
disasters=pd.read_csv("Resources/cleaned disasters.csv",header=0)
decision=pd.read_csv("Resources/cleaned_asylum.csv",header=0)

In [4]:
country_list = demographics['Country of origin'].unique().tolist()

In [5]:
demographics.head(1)

Unnamed: 0,Year,Country of origin,Country of origin (ISO),Country of asylum,Country of asylum (ISO),Female 0 - 4,Female 5 - 11,Female 12 - 17,Female 18 - 59,Female 60,Female other,Female total,Male 0 - 4,Male 5 - 11,Male 12 - 17,Male 18 - 59,Male 60,Male other,Male total,Total
0,2001,Iran (Islamic Rep. of),IRN,Afghanistan,AFG,0,0,0,0,0,0,0,0,0,0,5,0,0,5,5


In [6]:
# Group the data by Country of origin and Year and calculate the sum of the Total
summed_df = demographics.groupby(['Country of origin','Country of origin (ISO)', 'Country of asylum', 'Country of asylum (ISO)','Year', 'Female 0 - 4', 'Female 5 - 11', 'Female 12 - 17', 'Female 18 - 59', 'Female 60', 'Female other', 'Female total', 'Male 0 - 4', 'Male 5 - 11', 'Male 12 - 17', 'Male 18 - 59', 'Male 60', 'Male other', 'Male total'], as_index=False)['Total'].sum()

# Group by Country of origin to get the full count of every year and drop if the Total count is below 50,000
filtered_df = summed_df.groupby('Country of origin').filter(lambda x: x['Total'].sum() >= 50000)

# Reset the index to ungroup the data
reduced_demographics = pd.DataFrame(filtered_df.reset_index(drop=True))

In [7]:
grouped_conflict = conflict.groupby(['Year', 'Country of origin']).sum()
grouped_demographics = reduced_demographics.groupby(['Year', 'Country of origin','Country of origin (ISO)','Country of asylum','Country of asylum (ISO)']).sum()
grouped_disasters = disasters.groupby(['Year', 'Country of origin']).sum()
grouped_decision = decision.groupby(['Year', 'Country of origin','Country of origin (ISO)', 'Country of asylum','Country of asylum (ISO)']).sum()
grouped_conflict = pd.DataFrame(grouped_conflict.reset_index())
grouped_demographics_df = pd.DataFrame(grouped_demographics.reset_index())
grouped_disasters_df = pd.DataFrame(grouped_disasters.reset_index())
grouped_decision_df = pd.DataFrame(grouped_decision.reset_index())
grouped_demographics_df.head(1)

Unnamed: 0,Year,Country of origin,Country of origin (ISO),Country of asylum,Country of asylum (ISO),Female 0 - 4,Female 5 - 11,Female 12 - 17,Female 18 - 59,Female 60,Female other,Female total,Male 0 - 4,Male 5 - 11,Male 12 - 17,Male 18 - 59,Male 60,Male other,Male total,Total
0,2001,Afghanistan,AFG,Australia,AUS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6262


In [8]:
new_country_list = grouped_demographics_df['Country of origin'].unique().tolist()
print(len(new_country_list))

87


In [9]:
dropped_countries = list(set(country_list) - set(new_country_list))
dropped_countries.sort()
print(len(dropped_countries))

114


In [10]:

filtered_decision = grouped_decision_df[['Year','Country of origin','Country of origin (ISO)','Country of asylum','Country of asylum (ISO)','Recognized decisions','Complementary protection','Rejected decisions','Otherwise closed', 'Total decisions']]
filtered_demographics = grouped_demographics_df[['Year','Country of origin', 'Country of origin (ISO)', 'Country of asylum', 'Country of asylum (ISO)','Female 0 - 4','Female 5 - 11','Female 12 - 17','Female 18 - 59','Female 60','Female other','Female total','Male 0 - 4','Male 5 - 11','Male 12 - 17','Male 18 - 59','Male 60','Male other', 'Male total', 'Total']]
filtered_conflict = grouped_conflict[['Year','Country of origin', 'Deaths civilians']]
filtered_disasters = grouped_disasters_df[['Year','Country of origin','Total deaths','Total affected']]
filtered_demographics.head(1)

Unnamed: 0,Year,Country of origin,Country of origin (ISO),Country of asylum,Country of asylum (ISO),Female 0 - 4,Female 5 - 11,Female 12 - 17,Female 18 - 59,Female 60,Female other,Female total,Male 0 - 4,Male 5 - 11,Male 12 - 17,Male 18 - 59,Male 60,Male other,Male total,Total
0,2001,Afghanistan,AFG,Australia,AUS,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6262


In [11]:
#adding all the male and female by age totals. 
filtered_demographics['MaleFemaleTotal'] = filtered_demographics[['Female total','Male total']].sum(axis=1)
#creating an Unknown demographic calculated field. 
filtered_demographics['unknown_demographic'] = filtered_demographics['Total'] - filtered_demographics['MaleFemaleTotal']
col_to_move = filtered_demographics.pop('unknown_demographic')  
filtered_demographics.insert(16, 'unknown_demographic', col_to_move)
filtered_demographics = filtered_demographics.drop('MaleFemaleTotal', axis=1)
filtered_demographics=filtered_demographics.replace(to_replace="TÃ¼rkiye", value="Turkey")
filtered_demographics = pd.merge(filtered_demographics, filtered_decision , on=['Year', 'Country of origin','Country of origin (ISO)', 'Country of asylum','Country of asylum (ISO)',], how='left')
filtered_demographics_df = pd.DataFrame(filtered_demographics)
filtered_demographics_df.head(1)

Unnamed: 0,Year,Country of origin,Country of origin (ISO),Country of asylum,Country of asylum (ISO),Female 0 - 4,Female 5 - 11,Female 12 - 17,Female 18 - 59,Female 60,...,unknown_demographic,Male 60,Male other,Male total,Total,Recognized decisions,Complementary protection,Rejected decisions,Otherwise closed,Total decisions
0,2001,Afghanistan,AFG,Australia,AUS,0,0,0,0,0,...,6262,0,0,0,6262,1914.0,0.0,522.0,10.0,2446.0


In [12]:
#Creating a function using geopandas to check to see if counteries share a border or not. 

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
def share_borders(country1, country2):
    try:
        # Get the geometry of country1
        geom1 = world.loc[world['iso_a3'] == country1, 'geometry'].iloc[0]

        # Get the geometry of country2
        geom2 = world.loc[world['iso_a3'] == country2, 'geometry'].iloc[0]

        # Check if the geometries intersect
        return geom1.intersects(geom2)
    except IndexError:
        # If either country is not found in the GeoDataFrame, return False
        return False

shares_border = []
# Loop through each row of the DataFrame
for index, row in filtered_demographics_df.iterrows():
    origin = row['Country of origin (ISO)']
    asylum = row['Country of asylum (ISO)']
    
    # Check if the two countries share a border
    if share_borders(origin, asylum):
        shares_border.append(1) #true 
    else:
        shares_border.append(0) #false 

filtered_demographics_df['share_borders'] = shares_border

# #drop the ISO columns
filtered_demographics_df = filtered_demographics_df.drop('Country of origin (ISO)', axis=1)
filtered_demographics_df = filtered_demographics_df.drop('Country of asylum (ISO)', axis=1)

#move the new shared border column next to the country of origin and country of asylum
col_to_move = filtered_demographics_df.pop('share_borders')  
filtered_demographics_df.insert(3, 'share_borders', col_to_move)
demographic_ml_df = pd.DataFrame(filtered_demographics_df)

In [13]:
demographic_ml_df.head()

Unnamed: 0,Year,Country of origin,Country of asylum,share_borders,Female 0 - 4,Female 5 - 11,Female 12 - 17,Female 18 - 59,Female 60,Female other,...,unknown_demographic,Male 60,Male other,Male total,Total,Recognized decisions,Complementary protection,Rejected decisions,Otherwise closed,Total decisions
0,2001,Afghanistan,Australia,0,0,0,0,0,0,0,...,6262,0,0,0,6262,1914.0,0.0,522.0,10.0,2446.0
1,2001,Afghanistan,Austria,0,0,0,0,0,0,0,...,1049,0,0,0,1049,432.0,0.0,335.0,0.0,767.0
2,2001,Afghanistan,Azerbaijan,0,8,12,10,38,0,0,...,0,0,0,175,243,226.0,0.0,45.0,0.0,271.0
3,2001,Afghanistan,Belarus,0,9,40,36,81,6,0,...,0,5,0,282,454,85.0,0.0,25.0,51.0,161.0
4,2001,Afghanistan,Belgium,0,0,0,0,0,0,144,...,0,0,213,213,357,100.0,0.0,42.0,0.0,142.0


In [14]:
shared_border_grouped = demographic_ml_df.groupby('share_borders').sum()
shared_border_grouped 

Unnamed: 0_level_0,Year,Female 0 - 4,Female 5 - 11,Female 12 - 17,Female 18 - 59,Female 60,Female other,Female total,Male 0 - 4,Male 5 - 11,...,unknown_demographic,Male 60,Male other,Male total,Total,Recognized decisions,Complementary protection,Rejected decisions,Otherwise closed,Total decisions
share_borders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,114724399,1081515,1663271,1361374,6200209,509575,2588933,13404877,1142255,1753684,...,36516284,497329,3433159,18327587,68248748,3934883.0,1755266.0,7782998.0,4417971.0,17895555.0
1,8397905,10748411,16186121,11413465,34478033,3097368,4395539,80318937,11056258,16765832,...,50288122,3008117,4473484,81656851,212263910,1308422.0,551702.0,659819.0,1272378.0,3791676.0


In [15]:
#rename all the columns to best practing naming conventions 

demographic_ml_df = demographic_ml_df.rename(columns={'Year' : 'year','Country of origin' : 'country_origin','Country of asylum' : 'country_asylum','Female 0 - 4' : 'female_0to4','Female 5 - 11' : 'female_5to11','Female 12 - 17' : 'female_12to17','Female 18 - 59' : 'female_18to59','Female 60' :	'female_60','Female other' : 'female_other','Male 0 - 4' : 'male_0to4','Male 5 - 11' : 'male_5to11','Male 12 - 17' : 'male_12to17','Male 18 - 59' : 'male_18to59','Male 60' : 'male_60','Male other' : 'male_other','Total' : 'total','Recognized decisions':'recognized_decisions','Complementary protection':'complementary_protection','Rejected decisions':'rejected','Otherwise closed':'otherwise_closed','Total decisions':'total_decisions'})
demographic_ml_df.head(1)

Unnamed: 0,year,country_origin,country_asylum,share_borders,female_0to4,female_5to11,female_12to17,female_18to59,female_60,female_other,...,unknown_demographic,male_60,male_other,Male total,total,recognized_decisions,complementary_protection,rejected,otherwise_closed,total_decisions
0,2001,Afghanistan,Australia,0,0,0,0,0,0,0,...,6262,0,0,0,6262,1914.0,0.0,522.0,10.0,2446.0


In [16]:
filtered_conflict = filtered_conflict.rename(columns={'Year':'year', 'Country of origin' : 'country_origin','Deaths civilians': 'conflict_deaths','Deaths civilians': 'conflict_deaths'})
filtered_disasters = filtered_disasters.rename(columns={'Year':'year','Country of origin' : 'country_origin','Total deaths': 'disaster_deaths', 'Total affected':'disaster_affected'})
filtered_disasters.head(1)

Unnamed: 0,year,country_origin,disaster_deaths,disaster_affected
0,2001,Afghanistan,331.0,200270.0


In [17]:
filtered_conflict.head(1)

Unnamed: 0,year,country_origin,conflict_deaths
0,2001,Afghanistan,864


In [18]:
#data merged by year totals. 
merged = pd.merge(filtered_conflict, filtered_disasters , on=['year','country_origin'], how='left')
col_to_move = merged.pop('conflict_deaths')  
merged.insert(4, 'conflict_deaths', col_to_move)
for col in merged.columns:
    if merged[col].dtype == float:
        merged[col] = merged[col].fillna(0).astype(int)
event_ml = merged
event_ml= event_ml.rename(columns={'Year':'year',
                                          'Country of origin': 'country_origin',
                                        })
event_ml_df = pd.DataFrame(event_ml)
event_ml_df 

Unnamed: 0,year,country_origin,disaster_deaths,disaster_affected,conflict_deaths
0,2001,Afghanistan,331,200270,864
1,2001,Algeria,921,45423,32
2,2001,Angola,48,39928,1038
3,2001,Azerbaijan,0,0,0
4,2001,Bangladesh,232,530150,6
...,...,...,...,...,...
1092,2021,Turkey,9,561088,0
1093,2021,Uganda,0,0,7
1094,2021,Ukraine,1,1700,18
1095,2021,Venezuela,0,0,19


In [61]:
event_ml_df = event_ml_df[event_ml_df['country_origin'].isin(demographic_ml_df['country_origin'].unique())]


In [19]:
demographic_ml_df.to_csv('Resources/demographic_ml_df.csv')
event_ml_df.to_csv('Resources/event_ml_df.csv')

In [20]:
demographic_ml_df

Unnamed: 0,year,country_origin,country_asylum,share_borders,female_0to4,female_5to11,female_12to17,female_18to59,female_60,female_other,...,unknown_demographic,male_60,male_other,Male total,total,recognized_decisions,complementary_protection,rejected,otherwise_closed,total_decisions
0,2001,Afghanistan,Australia,0,0,0,0,0,0,0,...,6262,0,0,0,6262,1914.0,0.0,522.0,10.0,2446.0
1,2001,Afghanistan,Austria,0,0,0,0,0,0,0,...,1049,0,0,0,1049,432.0,0.0,335.0,0.0,767.0
2,2001,Afghanistan,Azerbaijan,0,8,12,10,38,0,0,...,0,0,0,175,243,226.0,0.0,45.0,0.0,271.0
3,2001,Afghanistan,Belarus,0,9,40,36,81,6,0,...,0,5,0,282,454,85.0,0.0,25.0,51.0,161.0
4,2001,Afghanistan,Belgium,0,0,0,0,0,0,144,...,0,0,213,213,357,100.0,0.0,42.0,0.0,142.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61194,2021,Zimbabwe,Sweden,0,0,0,0,0,0,0,...,16,0,0,0,16,0.0,0.0,16.0,0.0,16.0
61195,2021,Zimbabwe,Switzerland,0,0,9,0,0,0,0,...,0,0,0,0,9,0.0,0.0,5.0,0.0,5.0
61196,2021,Zimbabwe,Thailand,0,0,0,0,0,0,0,...,0,0,0,9,9,,,,,
61197,2021,Zimbabwe,United Kingdom of Great Britain and Northern I...,0,0,0,0,0,0,0,...,1174,0,0,0,1174,44.0,5.0,53.0,10.0,112.0


In [21]:
death_ml_df = demographic_ml_df.groupby(['year','country_origin']).agg({'total': 'sum'})
death_ml_df = pd.DataFrame(death_ml_df.reset_index())

shares_border = demographic_ml_df.groupby(['year','country_origin','share_borders']).agg({'total': 'sum'})
shares_border_df = pd.DataFrame(shares_border.reset_index())
shares_border_df

recognized = demographic_ml_df.groupby(['year','country_origin','share_borders', 'recognized_decisions']).agg({'total': 'sum'})
recognized_df = pd.DataFrame(recognized.reset_index())

In [23]:

asylum_results = demographic_ml_df.groupby(['country_origin']).agg({'total': 'sum','recognized_decisions': 'sum','complementary_protection': 'sum','rejected': 'sum','otherwise_closed': 'sum'})
asylum_results.head(1)

Unnamed: 0_level_0,total,recognized_decisions,complementary_protection,rejected,otherwise_closed
country_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,55888611,287094.0,301774.0,506305.0,494961.0


How many refugees are expected in a country as result of a conflict or disaster 

In [24]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# load the data into a pandas DataFrame
demographic_summary = demographic_ml_df.groupby(['year','country_origin']).agg({'total': 'sum'}).reset_index()

# merge the two datasets on the year and country columns
df_merged = pd.merge(demographic_summary, event_ml_df, on=['year', 'country_origin'])

# select the relevant columns for analysis
X = df_merged[['conflict_deaths', 'disaster_deaths']]
y = df_merged['total']

# fit a linear regression model to predict the number of refugees based on deaths data
model = LinearRegression().fit(X, y)

# use the model to predict the number of refugees based on deaths data for a new set of inputs
new_X = pd.DataFrame({'conflict_deaths': [1000], 'disaster_deaths': [500]}) # example input with 10000 conflict deaths and 5000 disaster deaths
predicted_y = model.predict(new_X)

print(predicted_y.round()) # print the predicted number of refugees based on the given input

[416590.]


In [25]:
y

0      3809763
1         8399
2       470616
3       268755
4         5544
        ...   
736     825290
737       2412
738     105019
739       7887
740      27562
Name: total, Length: 741, dtype: int64

In [26]:
demographic_summary

Unnamed: 0,year,country_origin,total
0,2001,Afghanistan,3809763
1,2001,Albania,7627
2,2001,Algeria,8399
3,2001,Angola,470616
4,2001,Armenia,7206
...,...,...,...
1811,2021,Venezuela (Bolivarian Republic of),199202
1812,2021,Viet Nam,317737
1813,2021,Western Sahara,117041
1814,2021,Yemen,37615


In [30]:
demographic_ml_df.shape

(61199, 25)

In [37]:
event_ml_df

Unnamed: 0,year,country_origin,disaster_deaths,disaster_affected,conflict_deaths
0,2001,Afghanistan,331,200270,864
1,2001,Algeria,921,45423,32
2,2001,Angola,48,39928,1038
3,2001,Azerbaijan,0,0,0
4,2001,Bangladesh,232,530150,6
...,...,...,...,...,...
1092,2021,Turkey,9,561088,0
1093,2021,Uganda,0,0,7
1094,2021,Ukraine,1,1700,18
1095,2021,Venezuela,0,0,19


In [56]:
import pandas as pd
from sklearn.linear_model import LinearRegression

# load the data into a pandas DataFrame
demographic_summary = demographic_ml_df.groupby(['year', 'country_origin']).agg({'total': 'sum'}).reset_index()

countries_list = demographic_summary['country_origin'].unique().tolist()
predicted_value_by_country = {}


for country in countries_list:
    # split the dataset by country origin
    country_split = demographic_summary[demographic_summary['country_origin'] == country]

    # merge the two datasets on the year and country columns
    df_merged = pd.merge(country_split, event_ml_df[['year', 'country_origin', 'conflict_deaths', 'disaster_deaths']], on=['year', 'country_origin'], how='inner')

    # select the relevant columns for analysis
    X = df_merged[['conflict_deaths', 'disaster_deaths']]
    y = df_merged['total']

    # fit a linear regression model to predict the number of refugees based on deaths data
    model = LinearRegression().fit(X, y)

    # use the model to predict the number of refugees based on deaths data for a new set of inputs
    new_X = pd.DataFrame({'conflict_deaths': [1000], 'disaster_deaths': [500]}) # example input with 1000 conflict deaths and 500 disaster deaths
    predicted_value = model.predict(new_X)

    predicted_value_by_country[country] = predicted_value.round()

# printing the dictionary containing predicted value (rounded) by each country_origin
predicted_value_by_country

ValueError: Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required.

In [57]:

print(demographic_summary['country_origin'].unique())

['Afghanistan' 'Albania' 'Algeria' 'Angola' 'Armenia' 'Azerbaijan'
 'Bangladesh' 'Belarus' 'Bhutan' 'Bosnia and Herzegovina' 'Burkina Faso'
 'Burundi' 'Cambodia' 'Cameroon' 'Central African Rep.' 'Chad' 'China'
 'Colombia' 'Congo' "Cote d'Ivoire" 'Croatia' 'Cuba'
 'Dem. Rep. of the Congo' 'Egypt' 'El Salvador' 'Eritrea' 'Ethiopia'
 'Gambia' 'Georgia' 'Ghana' 'Guatemala' 'Guinea' 'Haiti' 'Honduras'
 'Hungary' 'India' 'Indonesia' 'Iran (Islamic Rep. of)' 'Iraq'
 'Kazakhstan' 'Kenya' 'Kyrgyzstan' "Lao People's Dem. Rep." 'Lebanon'
 'Liberia' 'Libya' 'Mali' 'Mauritania' 'Mexico' 'Morocco' 'Myanmar'
 'Nepal' 'Nicaragua' 'Niger' 'Nigeria' 'North Macedonia' 'Pakistan'
 'Palestinian' 'Peru' 'Philippines' 'Poland' 'Rep. of Moldova' 'Romania'
 'Russian Federation' 'Rwanda' 'Senegal'
 'Serbia and Kosovo: S/RES/1244 (1999)' 'Sierra Leone' 'Somalia'
 'Sri Lanka' 'Stateless' 'Sudan' 'Syrian Arab Rep.' 'Tajikistan' 'Tibetan'
 'Timor-Leste' 'Togo' 'Turkey' 'Uganda' 'Ukraine' 'Uzbekistan'
 'Venezuela (

In [58]:
print(event_ml_df['country_origin'].unique())

['Afghanistan' 'Algeria' 'Angola' 'Azerbaijan' 'Bangladesh' 'Burundi'
 'Cameroon' 'Chad' 'Colombia' 'Congo' 'Dem. Rep. of the Congo' 'Ethiopia'
 'Ghana' 'Guatemala' 'Guinea' 'India' 'Indonesia' 'Iraq' 'Kenya' 'Lebanon'
 'Liberia' 'Nepal' 'Nigeria' 'Pakistan' 'Peru' 'Philippines' 'Rwanda'
 'Senegal' 'Sierra Leone' 'Somalia' 'Sri Lanka' 'Sudan' 'Turkey' 'Uganda'
 'Georgia' 'Honduras' 'Mali' 'Mexico' 'Eritrea' 'Morocco' 'Bhutan' 'Egypt'
 'Haiti' 'Niger' 'Uzbekistan' 'Armenia' 'China' 'Gambia' 'Tajikistan'
 'Togo' 'Kyrgyzstan' 'Mauritania' 'Libya' 'South Sudan' 'Ukraine'
 'Burkina Faso']


In [33]:
countries_list

['Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Armenia',
 'Azerbaijan',
 'Bangladesh',
 'Belarus',
 'Bhutan',
 'Bosnia and Herzegovina',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Central African Rep.',
 'Chad',
 'China',
 'Colombia',
 'Congo',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Dem. Rep. of the Congo',
 'Egypt',
 'El Salvador',
 'Eritrea',
 'Ethiopia',
 'Gambia',
 'Georgia',
 'Ghana',
 'Guatemala',
 'Guinea',
 'Haiti',
 'Honduras',
 'Hungary',
 'India',
 'Indonesia',
 'Iran (Islamic Rep. of)',
 'Iraq',
 'Kazakhstan',
 'Kenya',
 'Kyrgyzstan',
 "Lao People's Dem. Rep.",
 'Lebanon',
 'Liberia',
 'Libya',
 'Mali',
 'Mauritania',
 'Mexico',
 'Morocco',
 'Myanmar',
 'Nepal',
 'Nicaragua',
 'Niger',
 'Nigeria',
 'North Macedonia',
 'Pakistan',
 'Palestinian',
 'Peru',
 'Philippines',
 'Poland',
 'Rep. of Moldova',
 'Romania',
 'Russian Federation',
 'Rwanda',
 'Senegal',
 'Serbia and Kosovo: S/RES/1244 (1999)',
 'Sierra Leone',
 'Somalia',
 'Sri Lanka',
 'Stateless'

In [34]:
country_split

Unnamed: 0,year,country_origin,total
1,2001,Albania,7627
87,2002,Albania,10758
173,2003,Albania,10382
259,2004,Albania,10474
345,2005,Albania,12718
431,2006,Albania,14079
517,2007,Albania,15335
603,2008,Albania,15007
689,2009,Albania,15707
775,2010,Albania,14771


In [36]:
df_merged

Unnamed: 0,year,country_origin,total,disaster_deaths,disaster_affected,conflict_deaths
