In [42]:
import pandas as pd

### Slavery data PreProcessing

In [43]:
slavery_2015 = pd.read_csv("../Datasets/slavery_2016.csv")[["Country", "POPULATION", "ESTIMATED NUMBER IN MODERN SLAVERY"]]
slavery_2017 = pd.read_csv("../Datasets/slavery_2018.csv")[["Country ", "Est. prevalence of population in modern slavery (victims per 1,000 population)"]]
slavery_2021 = pd.read_csv("../Datasets/slavery_2023.csv")[["Country", "Estimated prevalence of modern slavery per 1,000 population"]]

slavery_2015.columns = slavery_2015.columns.str.strip()
slavery_2017.columns = slavery_2017.columns.str.strip()
slavery_2021.columns = slavery_2021.columns.str.strip()

In [44]:
slavery_2015["Estimated prevalence of modern slavery per 1,000 population"] = (slavery_2015["ESTIMATED NUMBER IN MODERN SLAVERY"]/slavery_2015["POPULATION"])*1000
slavery_2015 = slavery_2015[["Country", "Estimated prevalence of modern slavery per 1,000 population"]]

In [45]:
slavery_2015.columns = ["Country", "2015"]
slavery_2017.columns = ["Country", "2017"]
slavery_2021.columns = ["Country", "2021"]

In [46]:
# Find common entries in the common column  
# NOTE: I HAVE GIVEN MORE IMPORTANCE TO COUNTRIES IN THE SALVERY DATASET. (Since slavery data is the main comparing data here)
common_entries = set(slavery_2015['Country']).intersection(slavery_2017['Country']).intersection(slavery_2021['Country'])

In [47]:
# Filter the DataFrames to keep only rows with common entries
slavery_2015_filtered = slavery_2015[slavery_2015['Country'].isin(common_entries)]
slavery_2017_filtered = slavery_2017[slavery_2017['Country'].isin(common_entries)]
slavery_2021_filtered = slavery_2021[slavery_2021['Country'].isin(common_entries)]

In [48]:
# Merge the filtered DataFrames on the common column
temp1 = pd.merge(slavery_2015_filtered, slavery_2017_filtered, on='Country', how='inner')
slavery = pd.merge(temp1, slavery_2021_filtered, on='Country', how='inner')

In [49]:
slavery.head()

Unnamed: 0,Country,2015,2017,2021
0,Afghanistan,11.30138,22.2,13.0
1,Albania,2.959394,6.9,11.8
2,Algeria,6.259611,2.7,1.9
3,Angola,6.382384,7.2,4.1
4,Armenia,4.671968,5.3,8.9


In [50]:
slavery.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 145 entries, 0 to 144
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  145 non-null    object 
 1   2015     145 non-null    float64
 2   2017     145 non-null    float64
 3   2021     139 non-null    float64
dtypes: float64(3), object(1)
memory usage: 4.7+ KB


In [51]:
slavery[slavery.isnull().any(axis=1)]

Unnamed: 0,Country,2015,2017,2021
20,Cape Verde,4.606526,4.1,
44,Iceland,1.208459,2.1,
64,Luxembourg,0.177936,1.5,
72,Montenegro,4.019293,5.9,
122,Barbados,2.112676,2.7,
142,Suriname,4.604052,2.3,


### Other parameters data preprocessing

In [52]:
corruption = pd.read_csv("../Datasets/corruption.csv")
democracy = pd.read_csv("../Datasets/democracy.csv")
gdppercapita = pd.read_csv("../Datasets/gdppercapita.csv")
lfpr = pd.read_csv("../Datasets/lfpr.csv")
migration = pd.read_csv("../Datasets/migration.csv")

corruption.columns = corruption.columns.str.strip()
democracy.columns = democracy.columns.str.strip()
gdppercapita.columns = gdppercapita.columns.str.strip()
lfpr.columns = lfpr.columns.str.strip()
migration.columns = migration.columns.str.strip()

In [53]:
lfpr = lfpr[lfpr['Type_LFPR'].isin(['Total'])]  # removing male, female entries
lfpr = lfpr[["Country", "Year", "LFPR"]]

In [54]:
lfpr.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6079 entries, 2 to 18172
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  6079 non-null   object 
 1   Year     6079 non-null   int64  
 2   LFPR     6079 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 190.0+ KB


In [55]:
years = [2015, 2017, 2021]

In [56]:
corruption = corruption[corruption['Year'].isin(years)]
democracy = democracy[democracy['Year'].isin(years)]
gdppercapita = gdppercapita[gdppercapita['Year'].isin(years)]
lfpr = lfpr[lfpr['Year'].isin(years)]
migration = migration[migration['Year'].isin(years)]

In [57]:
# Find common entries in the common column
common_entries = set(corruption['Country']).intersection(democracy['Country']).intersection(gdppercapita['Country']).intersection(lfpr['Country']).intersection(migration['Country'])

In [58]:
# Filter the DataFrames to keep only rows with common countries
corruption_filtered = corruption[corruption['Country'].isin(common_entries)]
democracy_filtered = democracy[democracy['Country'].isin(common_entries)]
gdppercapita_filtered = gdppercapita[gdppercapita['Country'].isin(common_entries)]
lfpr_filtered = lfpr[lfpr['Country'].isin(common_entries)]
migration_filtered = migration[migration['Country'].isin(common_entries)]

In [59]:
# Create a complete DataFrame with all combinations of Country and Year
all_combinations = pd.DataFrame([(country, year) for country in common_entries for year in years], columns=['Country', 'Year'])

In [60]:
all_combinations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 429 entries, 0 to 428
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Country  429 non-null    object
 1   Year     429 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 6.8+ KB


In [61]:
# Merge each DataFrame with the complete combinations to fill missing rows with NaN
corruption_complete = pd.merge(all_combinations, corruption_filtered, on=['Country', 'Year'], how='left')
democracy_complete = pd.merge(all_combinations, democracy_filtered, on=['Country', 'Year'], how='left')
gdppercapita_complete = pd.merge(all_combinations, gdppercapita_filtered, on=['Country', 'Year'], how='left')
lfpr_complete = pd.merge(all_combinations, lfpr_filtered, on=['Country', 'Year'], how='left')
migration_complete = pd.merge(all_combinations, migration_filtered, on=['Country', 'Year'], how='left')

In [62]:
# Merge the filtered DataFrames on the common column
temp2 = pd.merge(corruption_complete, democracy_complete, on=['Country', 'Year'], how='inner')
temp3 = pd.merge(temp2, gdppercapita_complete, on=['Country', 'Year'], how='inner')
temp4 = pd.merge(temp3, lfpr_complete, on=['Country', 'Year'], how='inner')
comparing_data = pd.merge(temp4, migration_complete, on=['Country', 'Year'], how='inner')

In [63]:
comparing_data.head()

Unnamed: 0,Country,Year,Corruption,Democracy score,GDP per capita,LFPR,Migration
0,Albania,2015,36.0,5.91,11878.495,64.775,-12240
1,Albania,2017,38.0,5.98,12771.054,67.665,-9768
2,Albania,2021,35.0,6.11,14596.016,68.684,-10612
3,Ethiopia,2015,33.0,3.83,1750.6727,82.588,62784
4,Ethiopia,2017,35.0,3.42,1987.9688,82.437,71769


In [64]:
comparing_data.head()

Unnamed: 0,Country,Year,Corruption,Democracy score,GDP per capita,LFPR,Migration
0,Albania,2015,36.0,5.91,11878.495,64.775,-12240
1,Albania,2017,38.0,5.98,12771.054,67.665,-9768
2,Albania,2021,35.0,6.11,14596.016,68.684,-10612
3,Ethiopia,2015,33.0,3.83,1750.6727,82.588,62784
4,Ethiopia,2017,35.0,3.42,1987.9688,82.437,71769


In [65]:
comparing_data[comparing_data.isnull().any(axis=1)]

Unnamed: 0,Country,Year,Corruption,Democracy score,GDP per capita,LFPR,Migration
165,Fiji,2015,,5.69,11903.502,60.66,-11047
166,Fiji,2017,,5.85,12822.014,59.878,-11605
258,Equatorial Guinea,2015,,1.77,24563.5,57.21,17694
264,Eswatini,2015,,3.09,8088.7344,53.082,-10464


### lfpr (male, female, both) and slavery preprocess

In [66]:
lfpr = pd.read_csv("../Datasets/lfpr.csv")

In [67]:
lfpr.head()

Unnamed: 0,Country,Year,LFPR,Type_LFPR
0,Afghanistan,1991,16.101,Female
1,Afghanistan,1991,80.99,Male
2,Afghanistan,1991,48.039,Total
3,Afghanistan,1992,16.17,Female
4,Afghanistan,1992,80.94,Male


In [68]:
lfpr_total = lfpr[lfpr['Type_LFPR'].isin(['Total'])]  # removing male, female entries
lfpr_male = lfpr[lfpr['Type_LFPR'].isin(['Male'])]
lfpr_female = lfpr[lfpr['Type_LFPR'].isin(['Female'])]

In [69]:
lfpr_total = lfpr_total[["Country", "Year", "LFPR"]]
lfpr_male = lfpr_male[["Country", "Year", "LFPR"]]
lfpr_female = lfpr_female[["Country", "Year", "LFPR"]]

In [70]:
lfpr_total = lfpr_total[lfpr_total['Year'].isin([2015, 2017, 2021])]
lfpr_male = lfpr_male[lfpr_male['Year'].isin([2015, 2017, 2021])]
lfpr_female = lfpr_female[lfpr_female['Year'].isin([2015, 2017, 2021])]

In [71]:
common_lfpr_slavery = set(slavery['Country']).intersection(lfpr_total['Country'])

In [72]:
# Filter the DataFrames to keep only rows with common entries
slavery_filtered = slavery[slavery['Country'].isin(common_lfpr_slavery)]

lfpr_total_filtered = lfpr_total[lfpr_total['Country'].isin(common_lfpr_slavery)]
lfpr_male_filtered = lfpr_male[lfpr_male['Country'].isin(common_lfpr_slavery)]
lfpr_female_filtered = lfpr_female[lfpr_female['Country'].isin(common_lfpr_slavery)]

In [73]:
# Pivot the DataFrame
lfpr_total_pivot = lfpr_total_filtered.pivot(index='Country', columns='Year', values='LFPR').reset_index()
lfpr_male_pivot = lfpr_male_filtered.pivot(index='Country', columns='Year', values='LFPR').reset_index()
lfpr_female_pivot = lfpr_female_filtered.pivot(index='Country', columns='Year', values='LFPR').reset_index()

# Rename the columns for clarity
lfpr_total_pivot.columns.name = None
lfpr_total_pivot.columns = ['Country', 2015, 2017, 2021]

lfpr_male_pivot.columns.name = None
lfpr_male_pivot.columns = ['Country', 2015, 2017, 2021]

lfpr_female_pivot.columns.name = None
lfpr_female_pivot.columns = ['Country', 2015, 2017, 2021]

In [74]:
slavery_reduced = slavery_filtered[["Country", "2015", "2017", "2021"]]
slavery_reduced.columns = ["Country", 2015, 2017, 2021]

In [75]:
slavery_reduced = slavery_reduced.sort_values(by='Country')

lfpr_total_pivot = lfpr_total_pivot.sort_values(by='Country')
lfpr_male_pivot = lfpr_male_pivot.sort_values(by='Country')
lfpr_female_pivot = lfpr_female_pivot.sort_values(by='Country')

In [76]:
slavery_reduced.info()

<class 'pandas.core.frame.DataFrame'>
Index: 138 entries, 0 to 120
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  138 non-null    object 
 1   2015     138 non-null    float64
 2   2017     138 non-null    float64
 3   2021     133 non-null    float64
dtypes: float64(3), object(1)
memory usage: 5.4+ KB


In [77]:
lfpr_total_pivot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138 entries, 0 to 137
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  138 non-null    object 
 1   2015     138 non-null    float64
 2   2017     138 non-null    float64
 3   2021     138 non-null    float64
dtypes: float64(3), object(1)
memory usage: 4.4+ KB


In [78]:
lfpr_male_pivot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138 entries, 0 to 137
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  138 non-null    object 
 1   2015     138 non-null    float64
 2   2017     138 non-null    float64
 3   2021     138 non-null    float64
dtypes: float64(3), object(1)
memory usage: 4.4+ KB


In [79]:
lfpr_female_pivot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138 entries, 0 to 137
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Country  138 non-null    object 
 1   2015     138 non-null    float64
 2   2017     138 non-null    float64
 3   2021     138 non-null    float64
dtypes: float64(3), object(1)
memory usage: 4.4+ KB


In [80]:
# Standardize Data
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [81]:
value_columns = [2015, 2017, 2021]
scaler = StandardScaler()

# Z-score normalization
lfpr_total_pivot.loc[:, value_columns] = scaler.fit_transform(lfpr_total_pivot[value_columns])
lfpr_male_pivot.loc[:, value_columns] = scaler.fit_transform(lfpr_male_pivot[value_columns])
lfpr_female_pivot.loc[:, value_columns] = scaler.fit_transform(lfpr_female_pivot[value_columns])

slavery_reduced.loc[:, value_columns] = scaler.fit_transform(slavery_reduced[value_columns])

In [82]:
slavery_reduced.head()

Unnamed: 0,Country,2015,2017,2021
0,Afghanistan,1.425598,1.78606,0.777161
1,Albania,-0.479608,0.101637,0.633768
2,Algeria,0.274121,-0.360754,-0.549223
3,Angola,0.302161,0.134665,-0.286337
121,Argentina,-0.232309,-0.514885,-0.274387


In [83]:
lfpr_total_pivot.head()

Unnamed: 0,Country,2015,2017,2021
0,Afghanistan,-1.692285,-1.669052,-2.122162
1,Albania,-0.178399,0.063976,0.169596
2,Algeria,-2.028683,-1.983959,-2.01918
3,Angola,0.93795,0.855275,0.918185
4,Argentina,0.009534,-0.048575,0.170895


In [84]:
slavery_reduced.describe()

Unnamed: 0,2015,2017,2021
count,138.0,138.0,133.0
mean,-1.415937e-16,5.792468000000001e-17,6.344132000000001e-17
std,1.003643,1.003643,1.003781
min,-1.11628,-0.6249776,-0.7165151
25%,-0.4992309,-0.4240577,-0.3938812
50%,-0.153779,-0.2341472,-0.1548931
75%,0.3023753,0.09888431,0.1318927
max,7.918369,9.580649,10.01405


In [85]:
lfpr_total_pivot.describe()

Unnamed: 0,2015,2017,2021
count,138.0,138.0,138.0
mean,8.173816e-16,-3.2180380000000005e-17,1.274343e-15
std,1.003643,1.003643,1.003643
min,-3.039078,-3.02392,-2.940041
25%,-0.4944412,-0.5129598,-0.5868746
50%,0.1134599,0.09877447,0.167604
75%,0.7187791,0.7009256,0.7449823
max,1.982372,1.954956,1.898635


### Finding correlation between lfpr and slavery

In [86]:
slavLfpr_total_2015_corr = slavery_reduced[2015].corr(lfpr_total_pivot[2015])
slavLfpr_male_2015_corr = slavery_reduced[2015].corr(lfpr_male_pivot[2015])
slavLfpr_female_2015_corr = slavery_reduced[2015].corr(lfpr_female_pivot[2015])

In [87]:
print("slavLfpr_total_2015_corr: ", slavLfpr_total_2015_corr)
print("slavLfpr_male_2015_corr: ", slavLfpr_male_2015_corr)
print("slavLfpr_female_2015_corr: ", slavLfpr_female_2015_corr)

slavLfpr_total_2015_corr:  0.023039783134268153
slavLfpr_male_2015_corr:  -0.01612454755860695
slavLfpr_female_2015_corr:  0.04696767688851087


In [88]:
slavLfpr_total_2017_corr = slavery_reduced[2017].corr(lfpr_total_pivot[2017])
slavLfpr_male_2017_corr = slavery_reduced[2017].corr(lfpr_male_pivot[2017])
slavLfpr_female_2017_corr = slavery_reduced[2017].corr(lfpr_female_pivot[2017])

In [89]:
print("slavLfpr_total_2017_corr: ", slavLfpr_total_2017_corr)
print("slavLfpr_male_2017_corr: ", slavLfpr_male_2017_corr)
print("slavLfpr_female_2017_corr: ", slavLfpr_female_2017_corr)

slavLfpr_total_2017_corr:  0.03660022550745558
slavLfpr_male_2017_corr:  0.00940518756239009
slavLfpr_female_2017_corr:  0.056578909528607


In [90]:
slavLfpr_total_2021_corr = slavery_reduced[2021].corr(lfpr_total_pivot[2021])
slavLfpr_male_2021_corr = slavery_reduced[2021].corr(lfpr_male_pivot[2021])
slavLfpr_female_2021_corr = slavery_reduced[2021].corr(lfpr_female_pivot[2021])

In [91]:
print("slavLfpr_total_2021_corr: ", slavLfpr_total_2021_corr)
print("slavLfpr_male_2021_corr: ", slavLfpr_male_2021_corr)
print("slavLfpr_female_2021_corr: ", slavLfpr_female_2021_corr)

slavLfpr_total_2021_corr:  0.00014366701498395978
slavLfpr_male_2021_corr:  -0.023372178683769895
slavLfpr_female_2021_corr:  0.02341750376760223


### gdppercapita and slavery preprocess

In [92]:
gdppercapita = pd.read_csv("../Datasets/gdppercapita.csv")

In [93]:
gdppercapita.head()

Unnamed: 0,Country,Year,GDP per capita
0,Afghanistan,2002.0,1280.4631
1,Afghanistan,2003.0,1292.3335
2,Afghanistan,2004.0,1260.0605
3,Afghanistan,2005.0,1352.3207
4,Afghanistan,2006.0,1366.9932


In [94]:
gdppercapita = gdppercapita[gdppercapita['Year'].isin([2015, 2017, 2021])]

In [95]:
common_gdppercapita_slavery = set(slavery['Country']).intersection(gdppercapita['Country'])

In [96]:
diff_set1 = common_lfpr_slavery - common_gdppercapita_slavery
diff_set2 = common_gdppercapita_slavery - common_lfpr_slavery
print("Countries in set1 but not in set2:", diff_set1)
print("Countries in set2 but not in set1:", diff_set2)

Countries in set1 but not in set2: {'Timor-Leste', 'South Sudan', 'Eritrea', 'Cuba'}
Countries in set2 but not in set1: {'Slovakia', 'Kosovo', 'Kyrgyzstan', 'Cape Verde', 'Gambia', 'Egypt'}


In [97]:
# Filter the DataFrames to keep only rows with common entries
slavery_filtered = slavery[slavery['Country'].isin(common_gdppercapita_slavery)]

gdppercapita_filtered = gdppercapita[gdppercapita['Country'].isin(common_gdppercapita_slavery)]

In [98]:
# Pivot the DataFrame
gdppercapita_pivot = gdppercapita_filtered.pivot(index='Country', columns='Year', values='GDP per capita').reset_index()

In [99]:
slavery_reduced = slavery_filtered[["Country", "2015", "2017", "2021"]]

slavery_reduced.columns = ["Country", 2015, 2017, 2021]
gdppercapita_pivot.columns = ["Country", 2015, 2017, 2021]

slavery_reduced = slavery_reduced.sort_values(by='Country')
gdppercapita_pivot = gdppercapita_pivot.sort_values(by='Country')

In [100]:
gdppercapita_pivot.describe()

Unnamed: 0,2015,2017,2021
count,140.0,140.0,140.0
mean,20369.611508,21020.987877,21810.009802
std,21105.582828,21468.031571,22798.554487
min,781.5793,750.7876,714.0667
25%,4465.25305,4790.60245,4908.8628
50%,12572.712,13596.941,14037.8935
75%,28991.882,30004.1825,32780.4675
max,113182.73,114985.84,118510.0


In [101]:
slavery_reduced.describe()

Unnamed: 0,2015,2017,2021
count,140.0,140.0,134.0
mean,5.0252,5.187143,5.873881
std,4.344604,5.037555,4.101236
min,0.171711,0.3,0.5
25%,2.904559,2.175,3.225
50%,4.542213,3.9,5.2
75%,6.382604,6.475,7.6
max,39.73012,40.0,32.0


In [102]:
value_columns = [2015, 2017, 2021]
scaler = StandardScaler()

# Z-score normalization
gdppercapita_pivot.loc[:, value_columns] = scaler.fit_transform(gdppercapita_pivot[value_columns])
slavery_reduced.loc[:, value_columns] = scaler.fit_transform(slavery_reduced[value_columns])

### Finding correlation between gdppercapita and slavery

In [103]:
slavGdp_2015_corr = slavery_reduced[2015].corr(gdppercapita_pivot[2015])
slavGdp_2017_corr = slavery_reduced[2017].corr(gdppercapita_pivot[2017])
slavGdp_2021_corr = slavery_reduced[2021].corr(gdppercapita_pivot[2021])

print("slavGdp_2015_corr: ", slavGdp_2015_corr)
print("slavGdp_2017_corr: ", slavGdp_2017_corr)
print("slavGdp_2021_corr: ", slavGdp_2021_corr)

slavGdp_2015_corr:  0.02623128277039244
slavGdp_2017_corr:  -0.09641837328440842
slavGdp_2021_corr:  -0.04043755971998401


### migration and slavery preprocess

In [104]:
migration = pd.read_csv("../Datasets/migration.csv")

In [105]:
migration.head()

Unnamed: 0,Country,Year,Migration
0,Afghanistan,1960,2606
1,Afghanistan,1961,6109
2,Afghanistan,1962,7016
3,Afghanistan,1963,6681
4,Afghanistan,1964,7079


In [106]:
migration = migration[migration['Year'].isin([2015, 2017, 2021])]

In [107]:
common_migration_slavery = set(slavery['Country']).intersection(migration['Country'])

In [108]:
diff_set1 = common_lfpr_slavery - common_migration_slavery
diff_set2 = common_migration_slavery - common_lfpr_slavery
print("Countries in set1 but not in set2:", diff_set1)
print("Countries in set2 but not in set1:", diff_set2)

Countries in set1 but not in set2: set()
Countries in set2 but not in set1: {'Kosovo'}


In [109]:
# Filter the DataFrames to keep only rows with common entries
slavery_filtered = slavery[slavery['Country'].isin(common_migration_slavery)]

migration_filtered = migration[migration['Country'].isin(common_migration_slavery)]

In [110]:
# Pivot the DataFrame
migration_pivot = migration_filtered.pivot(index='Country', columns='Year', values='Migration').reset_index()

In [111]:
slavery_reduced = slavery_filtered[["Country", "2015", "2017", "2021"]]

slavery_reduced.columns = ["Country", 2015, 2017, 2021]
migration_pivot.columns = ["Country", 2015, 2017, 2021]

slavery_reduced = slavery_reduced.sort_values(by='Country')
migration_pivot = migration_pivot.sort_values(by='Country')

In [112]:
migration_pivot.describe()

Unnamed: 0,2015,2017,2021
count,139.0,139.0,139.0
mean,-13325.91,-2414.331,-2181.748201
std,218421.9,172504.7,83431.971912
min,-2172159.0,-1574581.0,-471395.0
25%,-25328.0,-21208.5,-14977.0
50%,-3018.0,-1398.0,-1775.0
75%,15511.5,22386.0,9810.0
max,644996.0,430803.0,312735.0


In [113]:
value_columns = [2015, 2017, 2021]
scaler = StandardScaler()

# Z-score normalization
migration_pivot.loc[:, value_columns] = scaler.fit_transform(migration_pivot[value_columns])
slavery_reduced.loc[:, value_columns] = scaler.fit_transform(slavery_reduced[value_columns])

  8.66033617e-02 -6.41307946e-02  1.02369608e+00  5.90213047e-01
  6.62114036e-02  1.95630036e-01 -1.25524997e+00  6.09319173e-02
  1.24092768e-01  3.23173969e-01  7.66968584e-02 -9.48841464e-02
  7.10865427e-02  8.31204630e-02 -9.32798155e-03 -2.95361455e-02
 -1.02520865e+00 -1.78951579e-01  2.54246577e-01  1.13062751e+00
 -2.29223867e-01 -1.63540442e-01  3.17205255e-01 -6.52713098e-01
 -1.27452465e-01  7.99086607e-02 -3.77207689e-04 -4.80664609e-03
  8.26196240e-02  1.64008257e-01  6.91475147e-02 -7.80394149e-02
  1.28958718e-01 -1.74402674e-01  1.42531914e-01 -1.82521780e-01
  8.10252100e-02  3.49713840e-01  1.18358392e-01  1.47645985e-01
  1.16557209e-01  4.09239435e-05  1.88961913e+00  8.22776215e-05
 -7.34675361e-02 -1.60066733e-01 -8.27576018e-03  5.48299524e-02
  3.12537611e-02 -8.90119242e-02  3.13088994e-02  1.26321272e-01
  6.80171808e-02 -2.12175656e+00 -1.13332482e-01 -2.83231043e-02
  1.21561004e-01  1.16593968e-01  2.17598029e-01  9.17549192e-03
  7.51446443e-01  2.31658

### Finding correlation between migration and slavery

In [114]:
slavMigration_2015_corr = slavery_reduced[2015].corr(migration_pivot[2015])
slavMigration_2017_corr = slavery_reduced[2017].corr(migration_pivot[2017])
slavMigration_2021_corr = slavery_reduced[2021].corr(migration_pivot[2021])

print("slavMigration_2015_corr: ", slavMigration_2015_corr)
print("slavMigration_2017_corr: ", slavMigration_2017_corr)
print("slavMigration_2021_corr: ", slavMigration_2021_corr)

slavMigration_2015_corr:  -0.04227350699987872
slavMigration_2017_corr:  0.0043265376303777275
slavMigration_2021_corr:  -0.04038406787972066


### corruption and slavery preprocess

In [115]:
corruption = pd.read_csv("../Datasets/corruption.csv")

In [116]:
corruption.head()

Unnamed: 0,Country,Year,Corruption
0,Afghanistan,2012,8.0
1,Afghanistan,2013,8.0
2,Afghanistan,2014,12.0
3,Afghanistan,2015,11.0
4,Afghanistan,2016,15.0


In [117]:
corruption = corruption[corruption['Year'].isin([2015, 2017, 2021])]

In [118]:
common_corruption_slavery = set(slavery['Country']).intersection(corruption['Country'])

In [119]:
diff_set1 = common_lfpr_slavery - common_corruption_slavery
diff_set2 = common_corruption_slavery - common_lfpr_slavery
print("Countries in set1 but not in set2:", diff_set1)
print("Countries in set2 but not in set1:", diff_set2)

Countries in set1 but not in set2: set()
Countries in set2 but not in set1: {'Kyrgyzstan', 'Kosovo', 'Gambia', 'Yemen', 'Egypt'}


In [120]:
# Equatorial Guinea has Nan values
common_corruption_slavery.remove("Equatorial Guinea")

In [121]:
# Filter the DataFrames to keep only rows with common entries
slavery_filtered = slavery[slavery['Country'].isin(common_corruption_slavery)]

corruption_filtered = corruption[corruption['Country'].isin(common_corruption_slavery)]

In [122]:
# Pivot the DataFrame
corruption_pivot = corruption_filtered.pivot(index='Country', columns='Year', values='Corruption').reset_index()

In [123]:
slavery_reduced = slavery_filtered[["Country", "2015", "2017", "2021"]]

slavery_reduced.columns = ["Country", 2015, 2017, 2021]
corruption_pivot.columns = ["Country", 2015, 2017, 2021]

slavery_reduced = slavery_reduced.sort_values(by='Country')
corruption_pivot = corruption_pivot.sort_values(by='Country')

In [124]:
# Providing new rankings

# Replace the values with ranks (1 for the lowest, etc.)
slavery_ranked = slavery_reduced.copy()
slavery_ranked.iloc[:, 1:] = slavery_reduced.iloc[:, 1:].rank(axis=0, method='min')

corruption_ranked = corruption_pivot.copy()
corruption_ranked.iloc[:, 1:] = corruption_pivot.iloc[:, 1:].rank(axis=0, method='min')

In [125]:
corruption_ranked.describe()

Unnamed: 0,2015,2017,2021
count,141.0,142.0,142.0
mean,69.787234,70.380282,70.352113
std,41.152644,41.389301,41.275273
min,1.0,1.0,1.0
25%,32.0,32.0,34.75
50%,68.0,69.0,71.0
75%,104.0,106.75,106.75
max,140.0,142.0,140.0


In [126]:
slavery_ranked.describe()

Unnamed: 0,2015,2017,2021
count,142.0,142.0,137.0
mean,71.5,70.704225,68.357664
std,41.135953,41.426135,39.80536
min,1.0,1.0,1.0
25%,36.25,36.0,35.0
50%,71.5,71.0,69.0
75%,106.75,106.5,103.0
max,142.0,142.0,137.0


### Finding correlation between migration and slavery

In [127]:
slavCorruption_2015_corr = slavery_ranked[2015].corr(corruption_ranked[2015])
slavCorruption_2017_corr = slavery_ranked[2017].corr(corruption_ranked[2017])
slavCorruption_2021_corr = slavery_ranked[2021].corr(corruption_ranked[2021])

print("slavCorruption_2015_corr: ", slavCorruption_2015_corr)
print("slavCorruption_2017_corr: ", slavCorruption_2017_corr)
print("slavCorruption_2021_corr: ", slavCorruption_2021_corr)

slavCorruption_2015_corr:  -0.16137567476901077
slavCorruption_2017_corr:  -0.022211976676156927
slavCorruption_2021_corr:  -0.08214036927882974


### Democracy and slavery preprocess

In [128]:
democracy = pd.read_csv("../Datasets/democracy.csv")

In [129]:
democracy.head()

Unnamed: 0,Country,Year,Democracy score
0,Afghanistan,2006,3.06
1,Afghanistan,2008,3.02
2,Afghanistan,2010,2.48
3,Afghanistan,2011,2.48
4,Afghanistan,2012,2.48


In [130]:
democracy = democracy[democracy['Year'].isin([2015, 2017, 2021])]

In [131]:
common_democracy_slavery = set(slavery['Country']).intersection(democracy['Country'])

In [132]:
diff_set1 = common_lfpr_slavery - common_democracy_slavery
diff_set2 = common_democracy_slavery - common_lfpr_slavery
print("Countries in set1 but not in set2:", diff_set1)
print("Countries in set2 but not in set1:", diff_set2)

Countries in set1 but not in set2: {'Timor-Leste', 'South Sudan', 'Somalia', 'Barbados'}
Countries in set2 but not in set1: {'Slovakia', 'Kyrgyzstan', 'Cape Verde', 'Gambia', 'Yemen', 'Egypt'}


In [133]:
# Filter the DataFrames to keep only rows with common entries
slavery_filtered = slavery[slavery['Country'].isin(common_democracy_slavery)]

democracy_filtered = democracy[democracy['Country'].isin(common_democracy_slavery)]

In [134]:
# Pivot the DataFrame
democracy_pivot = democracy_filtered.pivot(index='Country', columns='Year', values='Democracy score').reset_index()

democracy_final = democracy_pivot.dropna()

In [135]:
slavery_reduced = slavery_filtered[["Country", "2015", "2017", "2021"]]

slavery_reduced.columns = ["Country", 2015, 2017, 2021]
democracy_final.columns = ["Country", 2015, 2017, 2021]

slavery_reduced = slavery_reduced.sort_values(by='Country')
democracy_final = democracy_final.sort_values(by='Country')

In [136]:
democracy_final.describe()

Unnamed: 0,2015,2017,2021
count,140.0,140.0,140.0
mean,5.682643,5.612143,5.416571
std,2.174846,2.175687,2.293766
min,1.5,1.5,0.32
25%,3.85,3.7275,3.3925
50%,5.965,5.955,5.73
75%,7.405,7.2425,7.165
max,9.93,9.87,9.75


In [137]:
value_columns = [2015, 2017, 2021]
scaler = StandardScaler()

# Z-score normalization
democracy_final.loc[:, value_columns] = scaler.fit_transform(democracy_final[value_columns])
slavery_reduced.loc[:, value_columns] = scaler.fit_transform(slavery_reduced[value_columns])

### Finding correlation between democracy and slavery

In [138]:
slavDemocracy_2015_corr = slavery_reduced[2015].corr(democracy_final[2015])
slavDemocracy_2017_corr = slavery_reduced[2017].corr(democracy_final[2017])
slavDemocracy_2021_corr = slavery_reduced[2021].corr(democracy_final[2021])

print("slavDemocracy_2015_corr: ", slavDemocracy_2015_corr)
print("slavDemocracy_2017_corr: ", slavDemocracy_2017_corr)
print("slavDemocracy_2021_corr: ", slavDemocracy_2021_corr)

slavDemocracy_2015_corr:  -0.03053967081589
slavDemocracy_2017_corr:  0.04275064921316011
slavDemocracy_2021_corr:  0.012178865613448513
