In [1]:
# Import required Python packages and modules
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import random
# Set the random seed
seed = 44
random.seed(seed)
np.random.seed(seed)

# Refugee data prep

In [2]:
# Reading refugee origin df
ref_origin_location = pd.read_csv('refugee_population_origin_updated.csv')
ref_origin_location

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of origin
0,Afghanistan,AFG,1979,500000
1,Afghanistan,AFG,1980,1734921
2,Afghanistan,AFG,1981,3879984
3,Afghanistan,AFG,1982,4488214
4,Afghanistan,AFG,1983,4712735
...,...,...,...,...
7090,Zimbabwe,ZWE,2018,15618
7091,Zimbabwe,ZWE,2019,10045
7092,Zimbabwe,ZWE,2020,8575
7093,Zimbabwe,ZWE,2021,8115


Checking properties of dataframe.

In [3]:
ref_origin_location.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7095 entries, 0 to 7094
Data columns (total 4 columns):
 #   Column                                                Non-Null Count  Dtype 
---  ------                                                --------------  ----- 
 0   Entity                                                7095 non-null   object
 1   Code                                                  6667 non-null   object
 2   Year                                                  7095 non-null   int64 
 3   Refugee population by country or territory of origin  7095 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 221.8+ KB


In [4]:
ref_origin_location.describe()

Unnamed: 0,Year,Refugee population by country or territory of origin
count,7095.0,7095.0
mean,2002.919521,312576.4
std,13.179365,1638159.0
min,1960.0,5.0
25%,1995.0,98.0
50%,2005.0,1535.0
75%,2014.0,33699.5
max,2022.0,35300390.0


In [5]:
ref_origin_location.isnull().any()

Entity                                                  False
Code                                                     True
Year                                                    False
Refugee population by country or territory of origin    False
dtype: bool

Loading in other dataframe.

In [6]:
# Reading refugee destination df
ref_destination_location = pd.read_csv('refugee_population_destination_updated.csv')
ref_destination_location

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of asylum
0,Afghanistan,AFG,1990,50
1,Afghanistan,AFG,1991,38
2,Afghanistan,AFG,1992,60025
3,Afghanistan,AFG,1993,32131
4,Afghanistan,AFG,1994,19134
...,...,...,...,...
7395,Zimbabwe,ZWE,2018,7795
7396,Zimbabwe,ZWE,2019,8956
7397,Zimbabwe,ZWE,2020,9261
7398,Zimbabwe,ZWE,2021,9483


In [7]:
ref_destination_location.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7400 entries, 0 to 7399
Data columns (total 4 columns):
 #   Column                                                Non-Null Count  Dtype 
---  ------                                                --------------  ----- 
 0   Entity                                                7400 non-null   object
 1   Code                                                  6971 non-null   object
 2   Year                                                  7400 non-null   int64 
 3   Refugee population by country or territory of asylum  7400 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 231.4+ KB


In [8]:
ref_origin_location.describe()

Unnamed: 0,Year,Refugee population by country or territory of origin
count,7095.0,7095.0
mean,2002.919521,312576.4
std,13.179365,1638159.0
min,1960.0,5.0
25%,1995.0,98.0
50%,2005.0,1535.0
75%,2014.0,33699.5
max,2022.0,35300390.0


In [9]:
ref_destination_location.isnull().any()

Entity                                                  False
Code                                                     True
Year                                                    False
Refugee population by country or territory of asylum    False
dtype: bool

In [10]:
# Countries in ref_destination_location but not in ref_origin_location
countries_only_in_ref_destination = ref_destination_location[~ref_destination_location['Entity'].isin(ref_origin_location['Entity'])]['Entity']

print("Countries only in ref_destination_location:")
print(countries_only_in_ref_destination)

Countries only in ref_destination_location:
4252         Micronesia (country)
4253         Micronesia (country)
5786    Sint Maarten (Dutch part)
5787    Sint Maarten (Dutch part)
5788    Sint Maarten (Dutch part)
5789    Sint Maarten (Dutch part)
5790    Sint Maarten (Dutch part)
5791    Sint Maarten (Dutch part)
5792    Sint Maarten (Dutch part)
5793    Sint Maarten (Dutch part)
5794    Sint Maarten (Dutch part)
Name: Entity, dtype: object


In [11]:
len(countries_only_in_ref_destination)

11

In [12]:
len(countries_only_in_ref_destination.unique())

2

In [13]:
ref_destination_location[ref_destination_location.Entity == 'Micronesia (country)']

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of asylum
4252,Micronesia (country),FSM,2015,5
4253,Micronesia (country),FSM,2016,5


In [14]:
# Countries in ref_origin_destination_location but not in ref_destination_location
countries_only_in_origin_destination = ref_origin_location[~ref_origin_location['Entity'].isin(ref_destination_location['Entity'])]['Entity']

print("Countries only in ref_origin_location:")
print(countries_only_in_origin_destination)

Countries only in ref_origin_location:
142     Andorra
143     Andorra
144     Andorra
145     Andorra
146     Andorra
         ...   
6563     Tuvalu
6564     Tuvalu
6565     Tuvalu
6566     Tuvalu
6567     Tuvalu
Name: Entity, Length: 321, dtype: object


In [15]:
len(countries_only_in_origin_destination)

321

In [16]:
len(countries_only_in_origin_destination.unique())

17

In [17]:
print(countries_only_in_origin_destination.unique())

['Andorra' 'Barbados' 'Brunei' 'Dominica' 'East Timor' 'Kiribati'
 'Maldives' 'Marshall Islands' 'Myanmar' 'North Korea' 'Puerto Rico'
 'Saint Vincent and the Grenadines' 'San Marino' 'Sao Tome and Principe'
 'Seychelles' 'Tonga' 'Tuvalu']


In [18]:
# Years in ref_destination_location but not in ref_origin_location
years_only_in_ref_destination = ref_destination_location[~ref_destination_location['Year'].isin(ref_origin_location['Year'])]['Year']

print("Years only in ref_destination_location:", years_only_in_ref_destination)

Years only in ref_destination_location: Series([], Name: Year, dtype: int64)


What you see is that there are only 2 entities (terrorities/countries) in the destination df that are NOT in the origin df. In total, this makes up a difference of 11 rows more in the destination df. However, there are 17 countries in the origin df that are NOT in the destination df. In total, this makes up a difference of 321 more rows in the origin df.

But in reality, the total difference (length data frames) is 7400-7095 = 305 data rows more in the destination df. This means that for some (or most) countries in the destination df, there are more years of data (more rows) per country/territory. In most of the cases. So in total, all years exist in all dfs, but per country the destination df has more data than the origin, so that has to be about more years of data.

--> There are more countries in origin than in destination df, BUT in destination is more data in terms of years than there is the origin df.

In [19]:
# Group by 'entity' and extract years
grouped_origin = ref_origin_location.groupby('Entity')['Year'].apply(set).reset_index()
grouped_origin

Unnamed: 0,Entity,Year
0,Afghanistan,"{1979, 1980, 1981, 1982, 1983, 1984, 1985, 198..."
1,Albania,"{1964, 1965, 1966, 1967, 1968, 1969, 1970, 197..."
2,Algeria,"{1981, 1982, 1983, 1984, 1985, 1988, 1989, 199..."
3,Andorra,"{2022, 2002, 2003, 2005, 2006, 2007, 2008, 200..."
4,Angola,"{1960, 1961, 1962, 1963, 1964, 1965, 1966, 196..."
...,...,...
208,Vietnam,"{1964, 1965, 1966, 1967, 1968, 1969, 1970, 197..."
209,World,"{1990, 1991, 1992, 1993, 1994, 1995, 1996, 199..."
210,Yemen,"{1978, 1979, 1980, 1986, 1987, 1988, 1989, 199..."
211,Zambia,"{1968, 1969, 1970, 1971, 1972, 1973, 1974, 197..."


In [20]:
grouped_destination = ref_destination_location.groupby('Entity')['Year'].apply(set).reset_index()
grouped_destination

Unnamed: 0,Entity,Year
0,Afghanistan,"{1990, 1991, 1992, 1993, 1994, 1995, 1996, 199..."
1,Albania,"{1992, 1993, 1994, 1995, 1996, 1997, 1998, 199..."
2,Algeria,"{1970, 1975, 1976, 1977, 1978, 1979, 1980, 198..."
3,Angola,"{1976, 1977, 1978, 1979, 1980, 1981, 1982, 198..."
4,Antigua and Barbuda,"{2016, 2015}"
...,...,...
193,Vietnam,"{1971, 1972, 1973, 1977, 1978, 1979, 1980, 198..."
194,World,"{1990, 1991, 1992, 1993, 1994, 1995, 1996, 199..."
195,Yemen,"{1977, 1978, 1979, 1980, 1981, 1982, 1983, 198..."
196,Zambia,"{1965, 1966, 1967, 1968, 1969, 1970, 1971, 197..."


In [21]:
# Merge the two DataFrames on 'entity' for comparison
merged_refugee = pd.merge(grouped_origin, grouped_destination, on='Entity', suffixes=('_origin', '_destination'), how='outer')
merged_refugee

Unnamed: 0,Entity,Year_origin,Year_destination
0,Afghanistan,"{1979, 1980, 1981, 1982, 1983, 1984, 1985, 198...","{1990, 1991, 1992, 1993, 1994, 1995, 1996, 199..."
1,Albania,"{1964, 1965, 1966, 1967, 1968, 1969, 1970, 197...","{1992, 1993, 1994, 1995, 1996, 1997, 1998, 199..."
2,Algeria,"{1981, 1982, 1983, 1984, 1985, 1988, 1989, 199...","{1970, 1975, 1976, 1977, 1978, 1979, 1980, 198..."
3,Andorra,"{2022, 2002, 2003, 2005, 2006, 2007, 2008, 200...",
4,Angola,"{1960, 1961, 1962, 1963, 1964, 1965, 1966, 196...","{1976, 1977, 1978, 1979, 1980, 1981, 1982, 198..."
...,...,...,...
210,Vietnam,"{1964, 1965, 1966, 1967, 1968, 1969, 1970, 197...","{1971, 1972, 1973, 1977, 1978, 1979, 1980, 198..."
211,World,"{1990, 1991, 1992, 1993, 1994, 1995, 1996, 199...","{1990, 1991, 1992, 1993, 1994, 1995, 1996, 199..."
212,Yemen,"{1978, 1979, 1980, 1986, 1987, 1988, 1989, 199...","{1977, 1978, 1979, 1980, 1981, 1982, 1983, 198..."
213,Zambia,"{1968, 1969, 1970, 1971, 1972, 1973, 1974, 197...","{1965, 1966, 1967, 1968, 1969, 1970, 1971, 197..."


In [22]:
# Calculate the difference in the number of years per entity
merged_refugee['difference'] = merged_refugee.apply(lambda row: len(row['Year_destination']) - len(row['Year_origin'])
                                                    if not (pd.isna(row['Year_origin']) or pd.isna(row['Year_destination'])) 
                                                    else np.nan, 
                                                    axis=1)
merged_refugee[['Entity', 'difference']]


Unnamed: 0,Entity,difference
0,Afghanistan,-15.0
1,Albania,-27.0
2,Algeria,9.0
3,Andorra,
4,Angola,-16.0
...,...,...
210,Vietnam,-17.0
211,World,0.0
212,Yemen,6.0
213,Zambia,13.0


In [23]:
merged_refugee.difference.sum()

615.0

This makes sense, because in total, the destination df has 305 more rows than the origin df, but the origin destination has around 300 (321-11=310 exactly) more rows for countries, but then there are in total 615 more years data for the destination df. So in total around 300 (615-321+11 = 305 exactly) more rows.

In [24]:
# Show all rows for this specific print statement
with pd.option_context('display.max_rows', None):
    print(merged_refugee[['Entity', 'difference']])

                                Entity  difference
0                          Afghanistan       -15.0
1                              Albania       -27.0
2                              Algeria         9.0
3                              Andorra         NaN
4                               Angola       -16.0
5                  Antigua and Barbuda       -20.0
6                            Argentina        11.0
7                              Armenia       -18.0
8                            Australia        30.0
9                              Austria        36.0
10                          Azerbaijan         0.0
11                             Bahamas         4.0
12                             Bahrain       -12.0
13                          Bangladesh        -1.0
14                            Barbados         NaN
15                             Belarus         0.0
16                             Belgium        35.0
17                              Belize        16.0
18                             

All in all, there is too much difference to combine the numbers on how many refugees a country host in a particular year and how many refugees there are elsewhere that are born in that country. The data (mostly in years) does not allign. For certain years you could make an overview of this, but it is most logical to do for now to threat the two data sources/columns differently.

The above analysis may differ if we scope the data for the right years that are also availabe in climate data (which we will see and use later). Now all the data is used, starting from 1960, therefore there is a lot of missing data.. If you start from 2003, differences may be smaller. However, it is also not really necessary to combine the numbers in one dataframe for the plots and analysis that will follow.

So now first, loading other data in! And see the time frame we have in years.

# Loading in climate data

In [25]:
# Reading world risk index df
world_risk_index= pd.read_csv('worldriskindex-trend.csv')
world_risk_index

Unnamed: 0,WRI.Country,ISO3.Code,Year,W,E,V,S,C,A,S_01,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
0,Afghanistan,AFG,2000,4.25,0.25,72.41,64.20,78.17,75.65,71.25,...,100.00,24,91.54,24,100.00,5,81.55,85.47,70.41,14.53
1,Albania,ALB,2000,6.55,2.38,18.00,12.72,44.07,10.40,51.82,...,19.61,97,19.81,97,27.05,91,49.54,1232.02,0.01,0.00
2,Algeria,DZA,2000,9.80,2.65,36.21,22.80,51.88,40.15,47.80,...,40.23,86,42.01,86,43.09,78,44.33,1759.71,15.96,0.30
3,Andorra,AND,2000,0.32,0.02,5.11,2.34,2.18,26.18,27.62,...,12.85,98,27.52,95,36.20,84,30.46,6112.59,60.03,7.22
4,Angola,AGO,2000,10.99,2.18,55.44,35.59,62.75,76.28,61.34,...,84.23,31,100.00,21,74.69,21,56.16,751.90,93.27,325.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4627,Vanuatu,VUT,2023,10.90,5.80,20.49,13.77,12.06,51.81,60.99,...,68.00,62,66.77,62,47.40,75,60.30,1318.79,31.68,4.60
4628,Viet Nam,VNM,2023,24.39,26.73,22.25,21.55,12.50,40.90,49.52,...,48.08,83,48.37,81,36.05,85,47.90,3698.73,24.27,3.20
4629,Yemen,YEM,2023,24.39,9.12,65.24,60.26,69.29,66.50,67.34,...,59.76,72,63.47,66,60.51,52,65.42,841.23,77.87,29.10
4630,Zambia,ZMB,2023,2.94,0.28,30.78,34.17,13.91,61.36,61.77,...,37.12,91,42.03,87,42.72,81,63.80,1028.14,59.22,11.00


The world risk index data is available from 2000 till 2023. The refugee data is available from 1960 till 2022. The choice in this study is to investigate 20 years, from 2003 up to and including 2022.

In [26]:
world_risk_index = world_risk_index[world_risk_index['Year']<=2022]
world_risk_index

Unnamed: 0,WRI.Country,ISO3.Code,Year,W,E,V,S,C,A,S_01,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
0,Afghanistan,AFG,2000,4.25,0.25,72.41,64.20,78.17,75.65,71.25,...,100.00,24,91.54,24,100.00,5,81.55,85.47,70.41,14.53
1,Albania,ALB,2000,6.55,2.38,18.00,12.72,44.07,10.40,51.82,...,19.61,97,19.81,97,27.05,91,49.54,1232.02,0.01,0.00
2,Algeria,DZA,2000,9.80,2.65,36.21,22.80,51.88,40.15,47.80,...,40.23,86,42.01,86,43.09,78,44.33,1759.71,15.96,0.30
3,Andorra,AND,2000,0.32,0.02,5.11,2.34,2.18,26.18,27.62,...,12.85,98,27.52,95,36.20,84,30.46,6112.59,60.03,7.22
4,Angola,AGO,2000,10.99,2.18,55.44,35.59,62.75,76.28,61.34,...,84.23,31,100.00,21,74.69,21,56.16,751.90,93.27,325.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4434,Vanuatu,VUT,2022,10.90,5.80,20.49,13.77,12.06,51.81,60.99,...,68.00,62,66.77,62,47.40,75,60.30,1318.79,31.68,4.60
4435,Viet Nam,VNM,2022,24.39,26.73,22.25,21.55,12.50,40.90,49.52,...,48.08,83,48.37,81,36.05,85,47.90,3698.73,24.27,3.20
4436,Yemen,YEM,2022,24.39,9.12,65.24,60.26,69.29,66.50,67.34,...,59.76,72,63.47,66,60.51,52,65.42,841.23,77.87,29.10
4437,Zambia,ZMB,2022,2.94,0.28,30.78,34.17,13.91,61.36,61.77,...,37.12,91,42.03,87,42.72,81,63.80,1028.14,59.22,11.00


In [27]:
world_risk_index = world_risk_index[world_risk_index['Year']>=2003]
world_risk_index

Unnamed: 0,WRI.Country,ISO3.Code,Year,W,E,V,S,C,A,S_01,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
579,Afghanistan,AFG,2003,4.22,0.25,71.17,62.53,74.70,77.17,67.49,...,77.27,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7
580,Albania,ALB,2003,7.95,2.38,26.55,9.53,47.08,41.69,51.80,...,19.31,97,19.06,97,27.40,93,45.54,1794.80,41.28,2.4
581,Algeria,DZA,2003,10.04,2.64,38.15,22.54,52.96,46.51,48.03,...,41.51,87,41.52,87,21.29,96,41.83,2429.99,51.00,4.3
582,Andorra,AND,2003,0.26,0.02,3.40,1.68,2.02,11.56,4.69,...,0.01,99,0.01,99,36.65,86,25.51,7375.33,56.36,6.1
583,Angola,AGO,2003,11.01,2.22,54.64,36.10,60.73,74.42,60.66,...,72.90,46,100.00,21,73.69,26,53.27,1134.39,92.98,98.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4434,Vanuatu,VUT,2022,10.90,5.80,20.49,13.77,12.06,51.81,60.99,...,68.00,62,66.77,62,47.40,75,60.30,1318.79,31.68,4.6
4435,Viet Nam,VNM,2022,24.39,26.73,22.25,21.55,12.50,40.90,49.52,...,48.08,83,48.37,81,36.05,85,47.90,3698.73,24.27,3.2
4436,Yemen,YEM,2022,24.39,9.12,65.24,60.26,69.29,66.50,67.34,...,59.76,72,63.47,66,60.51,52,65.42,841.23,77.87,29.1
4437,Zambia,ZMB,2022,2.94,0.28,30.78,34.17,13.91,61.36,61.77,...,37.12,91,42.03,87,42.72,81,63.80,1028.14,59.22,11.0


# Merging climate data with refugee data

We start with merging the world risk index data with the origin location refugee data

In [28]:
ref_origin_location = ref_origin_location[ref_origin_location['Year'] >= 2003]
ref_origin_location

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of origin
24,Afghanistan,AFG,2003,2136039
25,Afghanistan,AFG,2004,2414460
26,Afghanistan,AFG,2005,2166139
27,Afghanistan,AFG,2006,2107510
28,Afghanistan,AFG,2007,3057655
...,...,...,...,...
7090,Zimbabwe,ZWE,2018,15618
7091,Zimbabwe,ZWE,2019,10045
7092,Zimbabwe,ZWE,2020,8575
7093,Zimbabwe,ZWE,2021,8115


Now we have the complete data frames (total) for the analysis, we check the missing values.

In [29]:
ref_origin_location[ref_origin_location.isna().any(axis=1)]

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of origin
1825,East Asia and Pacific (WB),,2003,708146
1826,East Asia and Pacific (WB),,2004,711437
1827,East Asia and Pacific (WB),,2005,728168
1828,East Asia and Pacific (WB),,2006,806559
1829,East Asia and Pacific (WB),,2007,726465
...,...,...,...,...
6763,Upper-middle-income countries,,2018,1169550
6764,Upper-middle-income countries,,2019,1214177
6765,Upper-middle-income countries,,2020,1276535
6766,Upper-middle-income countries,,2021,1207833


In [30]:
ref_origin_location.isna().sum()

Entity                                                    0
Code                                                    260
Year                                                      0
Refugee population by country or territory of origin      0
dtype: int64

In [31]:
ref_origin_location[ref_origin_location[['Entity', 'Year', 'Refugee population by country or territory of origin']].isna().any(axis=1)]

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of origin


Only missing values in country code, which is no problem, because this are for example continents.

Eventually, we want to merge the refugee and world risk index data, so we check how many countries are only in one dataframe or the other.

In [32]:
# Countries in ref_origin_location but not in world_risk_index df
countries_in_org_not_risk = ref_origin_location[~ref_origin_location['Entity'].isin(world_risk_index['WRI.Country'])]['Entity']

print("Countries only in ref_destination_location, NOT in world risk index df:")
print(set(countries_in_org_not_risk))
print(len(set(countries_in_org_not_risk)))

Countries only in ref_destination_location, NOT in world risk index df:
{'North America (WB)', 'Latin America and Caribbean (WB)', 'South Asia (WB)', 'Curacao', 'Cayman Islands', 'United States', 'North Korea', 'Congo', 'Europe and Central Asia (WB)', 'Libya', 'Low-income countries', 'Hong Kong', 'East Asia and Pacific (WB)', 'Vietnam', 'Russia', 'Bolivia', 'Moldova', 'High-income countries', 'World', 'Lower-middle-income countries', 'United Kingdom', 'Turks and Caicos Islands', 'South Korea', 'Czechia', 'Middle-income countries', 'Syria', 'Macao', 'East Timor', 'Sub-Saharan Africa (WB)', 'Middle East and North Africa (WB)', 'Palestine', 'Tanzania', 'Iran', 'Laos', 'Brunei', 'European Union (27)', 'Venezuela', 'Upper-middle-income countries'}
38


A lot of the 38 names above are not countries, but continents, which we will not include anyway. So it is no poblem that these do not occur in both dfs.

And sometimes, the country code is in both df's, but the country name is different. For example, for Russia this is the case. Code: RUS.

In [33]:
# Countries in ref_origin_location but not in world_risk_index df #based on country code
countries_in_org_not_risk_based_on_code = ref_origin_location[~ref_origin_location['Code'].isin(world_risk_index['ISO3.Code'])]['Code']

print("Countries only in ref_destination_location, NOT in world risk index df:")
print(set(countries_in_org_not_risk_based_on_code))
print(len(set(countries_in_org_not_risk_based_on_code)))

Countries only in ref_destination_location, NOT in world risk index df:
{'CYM', 'MAC', 'HKG', 'TCA', nan, 'CUW', 'PSE', 'OWID_WRL'}
8


We check also the other way around: countries that are only in the world risk index and not the ref origin location df.

In [34]:
# Countries in world_risk_index df but not in ref_origin_location
countries_in_worldrisk_not_origin = world_risk_index[~world_risk_index['WRI.Country'].isin(ref_origin_location['Entity'])]['WRI.Country']

print("Countries only in world risk index, NOT in ref_origin_location:")
print(set(countries_in_worldrisk_not_origin))
print(len(set(countries_in_worldrisk_not_origin)))

Countries only in world risk index, NOT in ref_origin_location:
{'Bolivarian Republic of Venezuela', 'United States of America', 'United Kingdom of Great Britain and Northern Ireland', 'Iran (Islamic Republic of)', 'San Marino', 'Plurinational State of Bolivia', 'Republic of Congo', 'Brunei Darussalam', 'Republic of Moldova', 'United Republic of Tanzania', 'Czech Republic', 'Libyan Arab Jamahiriya', 'Syrian Arab Republic', 'Viet Nam', "Lao People's Democratic Republic", 'Federated States of Micronesia', 'Russian Federation', "Democratic People's Republic of Korea", 'Timor-Leste', 'Republic of Korea'}
20


In [35]:
# Countries in world_risk_index df but not in ref_origin_location_2000 #based on country code
countries_in_worldrisk_not_origin_based_on_code = world_risk_index[~world_risk_index['ISO3.Code'].isin(ref_origin_location['Code'])]['ISO3.Code']

print("Countries only in world risk index, NOT in ref_origin_location df:")
print(set(countries_in_worldrisk_not_origin_based_on_code))
print(len(set(countries_in_worldrisk_not_origin_based_on_code)))

Countries only in world risk index, NOT in ref_origin_location df:
{'SMR', 'FSM'}
2


In the end, only few, small countries (islands or palestina for example) are not in both countries. The only well known countries that do not overlap between the datasets are Palestine and Hong Kong. Further, there are only some minor countries, small islands in Oceania that are not listed in both datasets. 

We go on with checking for missing values.

In [36]:
world_risk_index.isna().sum()

WRI.Country    0
ISO3.Code      0
Year           0
W              0
E              0
              ..
AI_04c_Base    0
AI_05a_Norm    0
AI_05a_Base    0
AI_05b_Norm    0
AI_05b_Base    0
Length: 248, dtype: int64

In [37]:
# Show all rows for this specific print statement
with pd.option_context('display.max_rows', None):
    print(world_risk_index.isna().sum())

WRI.Country       0
ISO3.Code         0
Year              0
W                 0
E                 0
V                 0
S                 0
C                 0
A                 0
S_01              0
S_02              0
S_03              0
S_04              0
S_05              0
C_01              0
C_02              0
C_03              0
A_01              0
A_02              0
A_03              0
EI_01             0
EI_02             0
EI_03             0
EI_04             0
EI_05             0
EI_06             0
EI_07             0
SI_01             0
SI_02             0
SI_03             0
SI_04             0
SI_05             0
SI_06             0
SI_07             0
SI_08             0
SI_09             0
SI_10             0
SI_11             0
SI_12             0
SI_13             0
SI_14             0
CI_01             0
CI_02             0
CI_03             0
CI_04             0
CI_05             0
CI_06             0
CI_07             0
AI_01             0
AI_02             0


We drop the one column with missing values: SI_14a_Norm. The variable is: Internally Displaced Persons Due To Violence And Conflict. We actually do not need the IDP data, because we do cross country analysis. Also, IDPs are already segmented in categories 'climate' and 'conflict', which is not suitable for this research.

In [38]:
world_risk_index = world_risk_index.drop(columns=['SI_14a_Norm'])

In [39]:
world_risk_index.isna().sum().sum()

0

Now we rename columns to prepare the merge.

In [40]:
# Change the name of one column
ref_origin_location = ref_origin_location.rename(columns={'Entity':'Country'})
ref_origin_location

Unnamed: 0,Country,Code,Year,Refugee population by country or territory of origin
24,Afghanistan,AFG,2003,2136039
25,Afghanistan,AFG,2004,2414460
26,Afghanistan,AFG,2005,2166139
27,Afghanistan,AFG,2006,2107510
28,Afghanistan,AFG,2007,3057655
...,...,...,...,...
7090,Zimbabwe,ZWE,2018,15618
7091,Zimbabwe,ZWE,2019,10045
7092,Zimbabwe,ZWE,2020,8575
7093,Zimbabwe,ZWE,2021,8115


In [41]:
# Change the name of columns
world_risk_index= world_risk_index.rename(columns={'WRI.Country':'Country'})
world_risk_index= world_risk_index.rename(columns={'ISO3.Code':'Code'})
world_risk_index

Unnamed: 0,Country,Code,Year,W,E,V,S,C,A,S_01,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
579,Afghanistan,AFG,2003,4.22,0.25,71.17,62.53,74.70,77.17,67.49,...,77.27,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7
580,Albania,ALB,2003,7.95,2.38,26.55,9.53,47.08,41.69,51.80,...,19.31,97,19.06,97,27.40,93,45.54,1794.80,41.28,2.4
581,Algeria,DZA,2003,10.04,2.64,38.15,22.54,52.96,46.51,48.03,...,41.51,87,41.52,87,21.29,96,41.83,2429.99,51.00,4.3
582,Andorra,AND,2003,0.26,0.02,3.40,1.68,2.02,11.56,4.69,...,0.01,99,0.01,99,36.65,86,25.51,7375.33,56.36,6.1
583,Angola,AGO,2003,11.01,2.22,54.64,36.10,60.73,74.42,60.66,...,72.90,46,100.00,21,73.69,26,53.27,1134.39,92.98,98.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4434,Vanuatu,VUT,2022,10.90,5.80,20.49,13.77,12.06,51.81,60.99,...,68.00,62,66.77,62,47.40,75,60.30,1318.79,31.68,4.6
4435,Viet Nam,VNM,2022,24.39,26.73,22.25,21.55,12.50,40.90,49.52,...,48.08,83,48.37,81,36.05,85,47.90,3698.73,24.27,3.2
4436,Yemen,YEM,2022,24.39,9.12,65.24,60.26,69.29,66.50,67.34,...,59.76,72,63.47,66,60.51,52,65.42,841.23,77.87,29.1
4437,Zambia,ZMB,2022,2.94,0.28,30.78,34.17,13.91,61.36,61.77,...,37.12,91,42.03,87,42.72,81,63.80,1028.14,59.22,11.0


We merge on either country name or country code, because both or one of the two can overlap.

In [42]:
# Merge DataFrames on 'Year' and 'Code'
refugeeorigin_worldrisk = pd.merge(ref_origin_location, world_risk_index, on=['Year', 'Code'], how='inner')
refugeeorigin_worldrisk

Unnamed: 0,Country_x,Code,Year,Refugee population by country or territory of origin,Country_y,W,E,V,S,C,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
0,Afghanistan,AFG,2003,2136039,Afghanistan,4.22,0.25,71.17,62.53,74.70,...,77.27,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7
1,Afghanistan,AFG,2004,2414460,Afghanistan,4.20,0.25,70.48,63.15,73.40,...,72.62,50,71.58,50,100.00,2,73.91,184.09,77.87,16.4
2,Afghanistan,AFG,2005,2166139,Afghanistan,4.12,0.25,67.94,62.22,69.49,...,71.87,58,71.73,58,100.00,9,73.35,203.37,67.55,10.6
3,Afghanistan,AFG,2006,2107510,Afghanistan,4.14,0.25,68.48,61.59,75.41,...,71.07,58,71.09,58,86.07,18,77.87,179.24,56.41,6.8
4,Afghanistan,AFG,2007,3057655,Afghanistan,4.17,0.25,69.62,61.97,76.98,...,70.00,63,69.70,63,100.00,4,77.87,200.60,62.88,8.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3562,Zimbabwe,ZWE,2018,15618,Zimbabwe,2.50,0.20,31.27,30.67,15.24,...,44.93,89,43.43,89,45.21,78,81.55,271.45,70.58,10.6
3563,Zimbabwe,ZWE,2019,10045,Zimbabwe,2.55,0.20,32.58,32.41,15.45,...,43.26,90,41.92,90,51.49,75,82.72,245.70,93.27,255.3
3564,Zimbabwe,ZWE,2020,8575,Zimbabwe,2.54,0.20,32.23,31.81,15.39,...,44.29,86,43.58,86,49.38,74,84.04,210.90,92.64,557.2
3565,Zimbabwe,ZWE,2021,8115,Zimbabwe,2.53,0.20,32.04,31.69,15.34,...,43.86,86,43.35,86,48.28,74,82.72,288.13,89.84,98.5


In [43]:
refugeeorigin_worldrisk = refugeeorigin_worldrisk.drop(columns=['Country_y'])
refugeeorigin_worldrisk = refugeeorigin_worldrisk.rename(columns={'Country_x':'Country'})
refugeeorigin_worldrisk

Unnamed: 0,Country,Code,Year,Refugee population by country or territory of origin,W,E,V,S,C,A,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
0,Afghanistan,AFG,2003,2136039,4.22,0.25,71.17,62.53,74.70,77.17,...,77.27,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7
1,Afghanistan,AFG,2004,2414460,4.20,0.25,70.48,63.15,73.40,75.53,...,72.62,50,71.58,50,100.00,2,73.91,184.09,77.87,16.4
2,Afghanistan,AFG,2005,2166139,4.12,0.25,67.94,62.22,69.49,72.54,...,71.87,58,71.73,58,100.00,9,73.35,203.37,67.55,10.6
3,Afghanistan,AFG,2006,2107510,4.14,0.25,68.48,61.59,75.41,69.13,...,71.07,58,71.09,58,86.07,18,77.87,179.24,56.41,6.8
4,Afghanistan,AFG,2007,3057655,4.17,0.25,69.62,61.97,76.98,70.73,...,70.00,63,69.70,63,100.00,4,77.87,200.60,62.88,8.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3562,Zimbabwe,ZWE,2018,15618,2.50,0.20,31.27,30.67,15.24,65.41,...,44.93,89,43.43,89,45.21,78,81.55,271.45,70.58,10.6
3563,Zimbabwe,ZWE,2019,10045,2.55,0.20,32.58,32.41,15.45,69.09,...,43.26,90,41.92,90,51.49,75,82.72,245.70,93.27,255.3
3564,Zimbabwe,ZWE,2020,8575,2.54,0.20,32.23,31.81,15.39,68.36,...,44.29,86,43.58,86,49.38,74,84.04,210.90,92.64,557.2
3565,Zimbabwe,ZWE,2021,8115,2.53,0.20,32.04,31.69,15.34,67.64,...,43.86,86,43.35,86,48.28,74,82.72,288.13,89.84,98.5


In [44]:
refugeeorigin_worldrisk= refugeeorigin_worldrisk.rename(columns={'Refugee population by country or territory of origin':'#refugees_origin'})

In [45]:
refugeeorigin_worldrisk.to_csv('refugeeorigin_worldrisk.csv',index=False)

Now we start preparing the merge for the world risk index with the refugee destination data. 

(All the tests we did previously for differences between world risk index and ref_origin_location in countries will NOT be conducted again for differences between world_risk_index and ref_destionation_location, because we know already that, in terms of listed countries, the difference between origin and destination is acceptable and therefore also between destination and world risk index.) 

In [46]:
ref_destination_location = ref_destination_location[ref_destination_location['Year']>=2000]
ref_destination_location

Unnamed: 0,Entity,Code,Year,Refugee population by country or territory of asylum
8,Afghanistan,AFG,2001,10
9,Afghanistan,AFG,2003,24
10,Afghanistan,AFG,2004,31
11,Afghanistan,AFG,2005,31
12,Afghanistan,AFG,2006,34
...,...,...,...,...
7395,Zimbabwe,ZWE,2018,7795
7396,Zimbabwe,ZWE,2019,8956
7397,Zimbabwe,ZWE,2020,9261
7398,Zimbabwe,ZWE,2021,9483


In [47]:
ref_destination_location = ref_destination_location.rename(columns={'Refugee population by country or territory of asylum':'#refugees_destination'})
ref_destination_location

Unnamed: 0,Entity,Code,Year,#refugees_destination
8,Afghanistan,AFG,2001,10
9,Afghanistan,AFG,2003,24
10,Afghanistan,AFG,2004,31
11,Afghanistan,AFG,2005,31
12,Afghanistan,AFG,2006,34
...,...,...,...,...
7395,Zimbabwe,ZWE,2018,7795
7396,Zimbabwe,ZWE,2019,8956
7397,Zimbabwe,ZWE,2020,9261
7398,Zimbabwe,ZWE,2021,9483


In [48]:
# Merge DataFrames on 'Year' and 'Code'
refugeedestination_worldrisk = pd.merge(ref_destination_location, world_risk_index, on=['Year', 'Code'], how='inner')
refugeedestination_worldrisk

Unnamed: 0,Entity,Code,Year,#refugees_destination,Country,W,E,V,S,C,...,AI_04a_Norm,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base
0,Afghanistan,AFG,2003,24,Afghanistan,4.22,0.25,71.17,62.53,74.70,...,77.27,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7
1,Afghanistan,AFG,2004,31,Afghanistan,4.20,0.25,70.48,63.15,73.40,...,72.62,50,71.58,50,100.00,2,73.91,184.09,77.87,16.4
2,Afghanistan,AFG,2005,31,Afghanistan,4.12,0.25,67.94,62.22,69.49,...,71.87,58,71.73,58,100.00,9,73.35,203.37,67.55,10.6
3,Afghanistan,AFG,2006,34,Afghanistan,4.14,0.25,68.48,61.59,75.41,...,71.07,58,71.09,58,86.07,18,77.87,179.24,56.41,6.8
4,Afghanistan,AFG,2007,43,Afghanistan,4.17,0.25,69.62,61.97,76.98,...,70.00,63,69.70,63,100.00,4,77.87,200.60,62.88,8.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3092,Zimbabwe,ZWE,2018,7795,Zimbabwe,2.50,0.20,31.27,30.67,15.24,...,44.93,89,43.43,89,45.21,78,81.55,271.45,70.58,10.6
3093,Zimbabwe,ZWE,2019,8956,Zimbabwe,2.55,0.20,32.58,32.41,15.45,...,43.26,90,41.92,90,51.49,75,82.72,245.70,93.27,255.3
3094,Zimbabwe,ZWE,2020,9261,Zimbabwe,2.54,0.20,32.23,31.81,15.39,...,44.29,86,43.58,86,49.38,74,84.04,210.90,92.64,557.2
3095,Zimbabwe,ZWE,2021,9483,Zimbabwe,2.53,0.20,32.04,31.69,15.34,...,43.86,86,43.35,86,48.28,74,82.72,288.13,89.84,98.5


In [49]:
refugeedestination_worldrisk = refugeedestination_worldrisk.drop(columns=['Entity'])

In [50]:
len(refugeedestination_worldrisk.Country.unique())

173

In [51]:
len(refugeeorigin_worldrisk.Country.unique())

191

In [52]:
refugeedestination_worldrisk.to_csv('refugeedestination_worldrisk.csv',index=False)

# Data prep with geo data

In [53]:
geo_worldriskindex = gpd.read_file('worldriskindex_trend.gpkg')
geo_worldriskindex = geo_worldriskindex.rename(columns={'ISO':'Code'})
geo_worldriskindex

Unnamed: 0,Code,WRI.Country,Year,W,E,V,S,C,A,S_01,...,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base,geometry
0,ABW,,,,,,,,,,...,,,,,,,,,,"MULTIPOLYGON (((-69.97820 12.46986, -70.06292 ..."
1,AFG,Afghanistan,2000,4.25,0.25,72.41,64.2,78.17,75.65,71.25,...,24,91.54,24,100,5,81.55,85.47,70.41,14.53,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
2,AGO,Angola,2000,10.99,2.18,55.44,35.59,62.75,76.28,61.34,...,31,100,21,74.69,21,56.16,751.9,93.27,325,"MULTIPOLYGON (((12.00986 -5.03066, 12.17323 -4..."
3,AIA,,,,,,,,,,...,,,,,,,,,,"MULTIPOLYGON (((-63.06847 18.23680, -63.00681 ..."
4,ALB,Albania,2000,6.55,2.38,18,12.72,44.07,10.4,51.82,...,97,19.81,97,27.05,91,49.54,1232.02,0.01,0,"MULTIPOLYGON (((19.37540 41.85014, 19.34141 41..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
241,CYP,Cyprus,2000,3.73,1,13.89,8.87,8.07,37.44,40.32,...,97,19.81,97,34.79,85,37.12,3129.75,53.86,4.9,"MULTIPOLYGON (((33.89257 34.95486, 33.79900 35..."
242,ITA,Italy,2000,10.63,8.85,12.78,8.76,7.56,31.49,29,...,87,19.81,97,34.79,85,32.45,5613.1,42.34,2.6,"MULTIPOLYGON (((12.47319 37.73653, 12.42403 37..."
243,ISR,Israel,2000,4.59,0.88,23.93,14.31,31.31,30.59,30.67,...,93,32.15,93,19.99,95,30.03,6141.31,30.88,1.1,"MULTIPOLYGON (((34.22903 31.33431, 34.45208 31..."
244,SRB,Serbia,2000,1.95,0.17,22.35,19.19,12.86,45.24,56.21,...,95,13.11,98,43.09,78,56.41,734.53,85.59,70,"MULTIPOLYGON (((20.03311 42.54832, 20.07159 42..."


In [54]:
geoworld_originref = pd.merge(refugeeorigin_worldrisk, geo_worldriskindex[['Code', 'geometry']], on='Code', how='inner')
geoworld_originref = gpd.GeoDataFrame(geoworld_originref, geometry='geometry')
geoworld_originref

Unnamed: 0,Country,Code,Year,#refugees_origin,W,E,V,S,C,A,...,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base,geometry
0,Afghanistan,AFG,2003,2136039,4.22,0.25,71.17,62.53,74.70,77.17,...,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
1,Afghanistan,AFG,2004,2414460,4.20,0.25,70.48,63.15,73.40,75.53,...,50,71.58,50,100.00,2,73.91,184.09,77.87,16.4,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
2,Afghanistan,AFG,2005,2166139,4.12,0.25,67.94,62.22,69.49,72.54,...,58,71.73,58,100.00,9,73.35,203.37,67.55,10.6,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
3,Afghanistan,AFG,2006,2107510,4.14,0.25,68.48,61.59,75.41,69.13,...,58,71.09,58,86.07,18,77.87,179.24,56.41,6.8,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
4,Afghanistan,AFG,2007,3057655,4.17,0.25,69.62,61.97,76.98,70.73,...,63,69.70,63,100.00,4,77.87,200.60,62.88,8.7,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3562,Zimbabwe,ZWE,2018,15618,2.50,0.20,31.27,30.67,15.24,65.41,...,89,43.43,89,45.21,78,81.55,271.45,70.58,10.6,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."
3563,Zimbabwe,ZWE,2019,10045,2.55,0.20,32.58,32.41,15.45,69.09,...,90,41.92,90,51.49,75,82.72,245.70,93.27,255.3,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."
3564,Zimbabwe,ZWE,2020,8575,2.54,0.20,32.23,31.81,15.39,68.36,...,86,43.58,86,49.38,74,84.04,210.90,92.64,557.2,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."
3565,Zimbabwe,ZWE,2021,8115,2.53,0.20,32.04,31.69,15.34,67.64,...,86,43.35,86,48.28,74,82.72,288.13,89.84,98.5,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."


In [55]:
geoworld_originref.to_csv('geoworld_originref.csv',index=False)

In [56]:
geoworld_destinationref = pd.merge(refugeedestination_worldrisk, geo_worldriskindex[['Code', 'geometry']], on='Code', how='inner')
geoworld_destinationref = gpd.GeoDataFrame(geoworld_destinationref, geometry='geometry')
geoworld_destinationref

Unnamed: 0,Code,Year,#refugees_destination,Country,W,E,V,S,C,A,...,AI_04a_Base,AI_04b_Norm,AI_04b_Base,AI_04c_Norm,AI_04c_Base,AI_05a_Norm,AI_05a_Base,AI_05b_Norm,AI_05b_Base,geometry
0,AFG,2003,24,Afghanistan,4.22,0.25,71.17,62.53,74.70,77.17,...,41,83.88,41,92.14,6,69.97,252.88,86.91,35.7,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
1,AFG,2004,31,Afghanistan,4.20,0.25,70.48,63.15,73.40,75.53,...,50,71.58,50,100.00,2,73.91,184.09,77.87,16.4,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
2,AFG,2005,31,Afghanistan,4.12,0.25,67.94,62.22,69.49,72.54,...,58,71.73,58,100.00,9,73.35,203.37,67.55,10.6,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
3,AFG,2006,34,Afghanistan,4.14,0.25,68.48,61.59,75.41,69.13,...,58,71.09,58,86.07,18,77.87,179.24,56.41,6.8,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
4,AFG,2007,43,Afghanistan,4.17,0.25,69.62,61.97,76.98,70.73,...,63,69.70,63,100.00,4,77.87,200.60,62.88,8.7,"MULTIPOLYGON (((60.89944 29.83750, 60.86578 29..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3092,ZWE,2018,7795,Zimbabwe,2.50,0.20,31.27,30.67,15.24,65.41,...,89,43.43,89,45.21,78,81.55,271.45,70.58,10.6,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."
3093,ZWE,2019,8956,Zimbabwe,2.55,0.20,32.58,32.41,15.45,69.09,...,90,41.92,90,51.49,75,82.72,245.70,93.27,255.3,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."
3094,ZWE,2020,9261,Zimbabwe,2.54,0.20,32.23,31.81,15.39,68.36,...,86,43.58,86,49.38,74,84.04,210.90,92.64,557.2,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."
3095,ZWE,2021,9483,Zimbabwe,2.53,0.20,32.04,31.69,15.34,67.64,...,86,43.35,86,48.28,74,82.72,288.13,89.84,98.5,"MULTIPOLYGON (((29.36831 -22.19781, 29.24846 -..."


In [57]:
type(geoworld_destinationref)

geopandas.geodataframe.GeoDataFrame

In [58]:
geoworld_destinationref.to_csv('geoworld_destinationref.csv',index=False)