# Data Preparation

In [1]:
# Import necessary libraries

%matplotlib inline

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly.express as px

import plotly.io as pio
pio.renderers.default = 'notebook'

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

In [70]:
# Load data

kidnap_count = pd.read_csv('/home/muhammad/global-kidnap/Data/National Kidnapping.csv')
kidnap_rate = pd.read_csv('./Data/National Kidnap Rate.csv')
population = pd.read_csv('./Data/Population.csv')
gdp = pd.read_csv('./Data/GDP.csv')

# Check the first five rows of the life expectancy dataset
kidnap_count.head()

Unnamed: 0,location,variable,Unit,Date,Value
0,Rwanda,Kidnapping at the National Level Count,Number,2008,8
1,Rwanda,Kidnapping at the National Level Count,Number,2009,8
2,Rwanda,Kidnapping at the National Level Count,Number,2010,12
3,Rwanda,Kidnapping at the National Level Count,Number,2011,12
4,Rwanda,Kidnapping at the National Level Count,Number,2012,10


In [71]:
# Check the number of rows

kidnap_count.shape

(1435, 5)

From the above, we found that there are **266** rows (records) and **66** attributes (columns) in df_lif_exp dataframe

In [72]:
# Check the first 5 rows in the df_popu dataframe

kidnap_rate.head()

Unnamed: 0,location,variable,Unit,Date,Value
0,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2008,0.084
1,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2009,0.082
2,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2010,0.12
3,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2011,0.117
4,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2012,0.095


In [73]:
# Check the number of rows

kidnap_rate.shape

(1435, 5)

In [74]:
population.head()

Unnamed: 0,country,series,Unit,Date,Value
0,American Samoa,"Population, total",Number,1960,20127
1,American Samoa,"Population, total",Number,1961,20605
2,American Samoa,"Population, total",Number,1962,21246
3,American Samoa,"Population, total",Number,1963,22029
4,American Samoa,"Population, total",Number,1964,22850


In [75]:
population.shape

(13717, 5)

In [76]:
gdp.head()

Unnamed: 0,country,series,Unit,Date,Value
0,"Congo, Dem. Rep.",GDP (constant 2010 US$),constant 2010 US$,1970,25829140000.0
1,"Congo, Dem. Rep.",GDP (constant 2010 US$),constant 2010 US$,1990,29404730000.0
2,"Congo, Dem. Rep.",GDP (constant 2010 US$),constant 2010 US$,2000,16528060000.0
3,"Congo, Dem. Rep.",GDP (constant 2010 US$),constant 2010 US$,2005,19923850000.0
4,"Congo, Dem. Rep.",GDP (constant 2010 US$),constant 2010 US$,2010,26093580000.0


In [77]:
gdp.shape

(2733, 5)

From the above, we found that there are **266** rows (records) and **66** attributes (columns) in df_popu dataframe

In [78]:
# Check for available datasets in the GeoPandas dataset function

gpd.datasets.available

['naturalearth_lowres', 'naturalearth_cities', 'nybb']

In [79]:
# Read in the 'naturalearth_lowres' dataset

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.head()

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
0,920938,Oceania,Fiji,FJI,8374.0,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,53950935,Africa,Tanzania,TZA,150600.0,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,603253,Africa,W. Sahara,ESH,906.5,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,35623680,North America,Canada,CAN,1674000.0,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,326625791,North America,United States of America,USA,18560000.0,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [80]:
# Check the number of rows

world.shape

(177, 6)

From the above, we found that there are **177** rows (records) and **6** attributes (columns) in the 'world' dataframe

In [81]:
# Create a new dataframe from 'world' picking only ['name', 'iso_a3', 'geometry']

df_world = world[['name', 'iso_a3', 'geometry', 'continent']]
df_world.head()

Unnamed: 0,name,iso_a3,geometry,continent
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000...",Oceania
1,Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...",Africa
2,W. Sahara,ESH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948...",Africa
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742...",North America
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",North America


In [82]:
# Rename 'iso_a3' and 'geometry' columns

df_world.rename(columns={'name':'country', 'iso_a3':'Country Code', 'geometry':'Geometry', 'continent':'Continent'}, inplace=True)

In [83]:
df_world = df_world[['country', 'Continent']]
df_world.head()

Unnamed: 0,country,Continent
0,Fiji,Oceania
1,Tanzania,Africa
2,W. Sahara,Africa
3,Canada,North America
4,United States of America,North America


In [90]:
# Merge the world dataset on life expectany. This picks only countries (in the life expectancy dataframe) 
# that have matching country code (in the world dataframe)

kidnap = kidnap_rate.merge(kidnap_count, on=['location', 'Date'])
kidnap.head()

Unnamed: 0,location,variable_x,Unit_x,Date,Value_x,variable_y,Unit_y,Value_y
0,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2008,0.084,Kidnapping at the National Level Count,Number,8
1,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2009,0.082,Kidnapping at the National Level Count,Number,8
2,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2010,0.12,Kidnapping at the National Level Count,Number,12
3,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2011,0.117,Kidnapping at the National Level Count,Number,12
4,Rwanda,Kidnapping at the National Level Rate,"rates per 100,000 population",2012,0.095,Kidnapping at the National Level Count,Number,10


In [91]:
kidnap = kidnap[['location', 'Date', 'Value_x', 'Value_y']]
kidnap.rename(columns={'location':'country', 'Value_x':'Kidnapping Rate', 'Value_y':'Kidnapping Count'}, inplace=True)        
kidnap.head()

Unnamed: 0,country,Date,Kidnapping Rate,Kidnapping Count
0,Rwanda,2008,0.084,8
1,Rwanda,2009,0.082,8
2,Rwanda,2010,0.12,12
3,Rwanda,2011,0.117,12
4,Rwanda,2012,0.095,10


In [92]:
kidnap = kidnap.merge(population, on=['country', 'Date'])
kidnap.head()

Unnamed: 0,country,Date,Kidnapping Rate,Kidnapping Count,series,Unit,Value
0,Rwanda,2008,0.084,8,"Population, total",Number,9524532
1,Rwanda,2009,0.082,8,"Population, total",Number,9782770
2,Rwanda,2010,0.12,12,"Population, total",Number,10039338
3,Rwanda,2011,0.117,12,"Population, total",Number,10293333
4,Rwanda,2012,0.095,10,"Population, total",Number,10549668


In [93]:
kidnap = kidnap.merge(gdp, on=['country', 'Date'])
kidnap.head()

Unnamed: 0,country,Date,Kidnapping Rate,Kidnapping Count,series_x,Unit_x,Value_x,series_y,Unit_y,Value_y
0,Rwanda,2010,0.12,12,"Population, total",Number,10039338,GDP (constant 2010 US$),constant 2010 US$,6015646000.0
1,Rwanda,2011,0.117,12,"Population, total",Number,10293333,GDP (constant 2010 US$),constant 2010 US$,6494395000.0
2,Rwanda,2012,0.095,10,"Population, total",Number,10549668,GDP (constant 2010 US$),constant 2010 US$,7055609000.0
3,Rwanda,2013,0.194,21,"Population, total",Number,10811538,GDP (constant 2010 US$),constant 2010 US$,7388623000.0
4,Algeria,2005,0.305,101,"Population, total",Number,33149720,GDP (constant 2010 US$),constant 2010 US$,124380900000.0


In [94]:
kidnap = kidnap[['country', 'Date', 'Kidnapping Rate', 'Kidnapping Count', 'Value_x', 'Value_y']]
kidnap.rename(columns={'Value':'Population', 'Date':'Year', 'Value_x':'Population', 'Value_y':'GDP'}, inplace=True)
kidnap.head()

Unnamed: 0,country,Year,Kidnapping Rate,Kidnapping Count,Population,GDP
0,Rwanda,2010,0.12,12,10039338,6015646000.0
1,Rwanda,2011,0.117,12,10293333,6494395000.0
2,Rwanda,2012,0.095,10,10549668,7055609000.0
3,Rwanda,2013,0.194,21,10811538,7388623000.0
4,Algeria,2005,0.305,101,33149720,124380900000.0


In [95]:
kidnap.shape

(780, 6)

In [96]:
# Merge the world dataset on life expectany. This picks only countries (in the population dataframe) 
# that have matching country code (in the world dataframe)

kidnap = df_world.merge(kidnap, on='country')
kidnap.head()

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP
0,Canada,North America,2005,12.113,3896,32243753,1243745000000.0
1,Canada,North America,2010,12.595,4301,34004889,1399482000000.0
2,Canada,North America,2011,10.944,3780,34339328,1443522000000.0
3,Canada,North America,2012,10.415,3637,34714222,1468960000000.0
4,Canada,North America,2013,9.154,3231,35082954,1503174000000.0


In [97]:
#kidnap['kidnap_%_Change'] = kidnap['Kidnapping Count'].pct_change()
kidnap['kidnap_rate_%_Change'] = kidnap['Kidnapping Rate'].pct_change()
kidnap.head()

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
0,Canada,North America,2005,12.113,3896,32243753,1243745000000.0,
1,Canada,North America,2010,12.595,4301,34004889,1399482000000.0,0.039792
2,Canada,North America,2011,10.944,3780,34339328,1443522000000.0,-0.131084
3,Canada,North America,2012,10.415,3637,34714222,1468960000000.0,-0.048337
4,Canada,North America,2013,9.154,3231,35082954,1503174000000.0,-0.121075


For data consistency, we will drop the year **2005** from our dataframe

In [105]:
kidnap = kidnap[kidnap['Year'].between(2010, 2018)]
kidnap.head()

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
1,Canada,North America,2010,12.595,4301,34004889,1399482000000.0,0.039792
2,Canada,North America,2011,10.944,3780,34339328,1443522000000.0,-0.131084
3,Canada,North America,2012,10.415,3637,34714222,1468960000000.0,-0.048337
4,Canada,North America,2013,9.154,3231,35082954,1503174000000.0,-0.121075
5,Canada,North America,2014,9.225,3290,35437435,1546316000000.0,0.007756


In [106]:
# Check for the number of rows

kidnap.shape

(604, 8)

In [107]:
kidnap.isnull().sum()

country                  0
Continent                0
Year                     0
Kidnapping Rate          0
Kidnapping Count         0
Population               0
GDP                      0
kidnap_rate_%_Change    16
dtype: int64

In [108]:
kidnap.nunique(dropna=True)

country                  89
Continent                 6
Year                      9
Kidnapping Rate         482
Kidnapping Count        305
Population              604
GDP                     604
kidnap_rate_%_Change    543
dtype: int64

In [132]:
kidnap.nlargest(5, 'Kidnapping Rate')

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
225,Lebanon,Asia,2016,177.7355,10460,6714281,50703210000.0,10.55327
432,Turkey,Asia,2014,42.669,32953,77229262,814743700000.0,0.038478
431,Turkey,Asia,2013,41.088,31196,75925454,776392100000.0,1.283682
222,Lebanon,Asia,2013,20.531,1214,5913016,48636790000.0,0.249376
428,Turkey,Asia,2010,18.784,13586,72326992,614171100000.0,0.459859


In [133]:
kidnap.nsmallest(5, 'Kidnapping Rate')

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
155,Guyana,South America,2012,0.0,0,755388,4032798000.0,-1.0
156,Guyana,South America,2013,0.0,0,759281,4180128000.0,
157,Guyana,South America,2015,0.0,0,767433,4279840000.0,
158,Guyana,South America,2016,0.0,0,771363,4442795000.0,
159,Guyana,South America,2017,0.0,0,775218,4608711000.0,


In [109]:
kidnap.corr()

Unnamed: 0,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
Year,1.0,0.009828,-0.044942,-0.071548,-0.012369,0.045924
Kidnapping Rate,0.009828,1.0,0.277231,0.033325,0.05102,0.026771
Kidnapping Count,-0.044942,0.277231,1.0,0.76802,0.2082,0.020061
Population,-0.071548,0.033325,0.76802,1.0,0.346715,-0.005499
GDP,-0.012369,0.05102,0.2082,0.346715,1.0,-0.034269
kidnap_rate_%_Change,0.045924,0.026771,0.020061,-0.005499,-0.034269,1.0


In [111]:
fig = px.bar(kidnap, x='Kidnapping Rate', y='country',
             hover_data=['Population', 'GDP',],  
             color='Kidnapping Count', animation_frame='Year', template='plotly_dark', color_continuous_scale='Viridis',
             labels={'Percentage Change':'Percentage Change in Life Expectancy'}, height=400)
fig.show()

In [112]:
fig = px.bar(kidnap, x='Kidnapping Rate', y='Continent',
             hover_data=['Population', ],  
             color='Kidnapping Count', animation_frame='Year', template='plotly_dark', color_continuous_scale='Viridis',
             labels={'Percentage Change':'Percentage Change in Life Expectancy'}, height=400)
fig.show()

In [114]:
# Plot a scatter plot of the average life expectancy for each country between 2000 and 2010 
# (taking the average for each country over the years)

fig = px.scatter(kidnap, x='Kidnapping Count', y='Population', color='country', animation_frame='Year',
                 hover_data=['Kidnapping Rate', 'Continent', 'GDP', ])
fig.update_layout(title='Scatter Plot of Population VS Number of Kidnaps from year 2003 to 2018',
                  template="plotly_dark")
fig.show()

In [134]:
# Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010
fig = px.choropleth(kidnap,locations='country',locationmode='country names', 
                    color='Kidnapping Rate', color_continuous_scale='Viridis', animation_frame='Year',
                    hover_data=['Continent', 'Kidnapping Count', 'Population', 'GDP',])
fig.update_layout(title='Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010',
                  template="plotly_dark")
fig.show()

In [123]:
# Create a new dataframe containing the average Population, Kidnapping Count, Kidnapping Rate, etc. 
# over the years under study

df_kidnap = kidnap[['country', 'Continent', 'Kidnapping Rate', 'Kidnapping Count', 'Population', 'GDP',
                    'kidnap_rate_%_Change',]].groupby('country',
                as_index=False).mean()
df_kidnap.head()

Unnamed: 0,country,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
0,Albania,0.149,4.333333,2888828.0,11344350000.0,0.421225
1,Algeria,0.663833,254.5,37802440.0,152712300000.0,0.367784
2,Armenia,1.536556,44.777778,2911813.0,10170380000.0,0.159965
3,Australia,2.399556,562.333333,23477830.0,1320698000000.0,-0.125083
4,Austria,0.055778,4.777778,8580963.0,382563100000.0,0.312303


In [124]:
df_kidnap.nlargest(5, 'Kidnapping Rate')

Unnamed: 0,country,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
46,Lebanon,38.684071,2265.285714,5873390.0,48136100000.0,1.878094
83,Turkey,19.945109,23346.571429,75990470.0,765897600000.0,0.083673
7,Belgium,10.4585,1169.625,11173750.0,452767900000.0,0.022362
16,Canada,10.370875,3673.125,35242060.0,1513743000000.0,-0.017528
61,Pakistan,9.845889,19182.0,195533700.0,263177900000.0,15.736113


In [125]:
df_kidnap.nsmallest(5, 'Kidnapping Rate')

Unnamed: 0,country,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
72,Senegal,0.014,2.0,14578450.0,17774770000.0,-0.854167
65,Philippines,0.021333,20.333333,95583160.0,239961300000.0,0.158942
49,Madagascar,0.023167,5.5,22671250.0,10553170000.0,
54,Myanmar,0.024143,12.428571,52281690.0,56852790000.0,
25,Finland,0.032444,1.777778,5451628.0,239130400000.0,0.572403


In [127]:
fig = px.bar(df_kidnap, x='Kidnapping Rate', y='country',
             hover_data=['Kidnapping Count', 'Population', 'GDP', 'kidnap_rate_%_Change',] ,  
             color='Kidnapping Count', template='plotly_dark', color_continuous_scale='Viridis',
             labels={'Percentage Change':'Percentage Change in Life Expectancy'}, height=400)
fig.show()

In [128]:
# Plot a scatter plot of the average life expectancy for each country between 2000 and 2010 
# (taking the average for each country over the years)

fig = px.scatter(df_kidnap, x='Kidnapping Count', y='Population', color='country', color_continuous_scale='Viridis',
                 hover_data=['Kidnapping Rate', 'GDP', 'kidnap_rate_%_Change'])
fig.update_layout(title='Scatter Plot of Average Population VS Average Number of Kidnaps from year 2003 to 2018',
                  template="plotly_dark")
fig.show()

In [129]:
df_kidnap.corr()

Unnamed: 0,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
Kidnapping Rate,1.0,0.310092,0.054757,0.090444,0.054588
Kidnapping Count,0.310092,1.0,0.862714,0.256232,-0.010829
Population,0.054757,0.862714,1.0,0.350309,-0.030593
GDP,0.090444,0.256232,0.350309,1.0,-0.068336
kidnap_rate_%_Change,0.054588,-0.010829,-0.030593,-0.068336,1.0


## Africa

In [135]:
africa = kidnap[kidnap['Continent'] == 'Africa']
africa.head()

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
41,Kenya,Africa,2010,0.188,79,42030684,48919390000.0,-0.546988
42,Kenya,Africa,2011,0.127,55,43178270,51907510000.0,-0.324468
43,Kenya,Africa,2012,0.169,75,44343469,54276160000.0,0.330709
44,Kenya,Africa,2013,0.13,59,45519986,57466880000.0,-0.230769
45,Kenya,Africa,2014,0.071,33,46700063,60545450000.0,-0.453846


In [136]:
africa.nunique(dropna=True)

country                 14
Continent                1
Year                     9
Kidnapping Rate         65
Kidnapping Count        57
Population              65
GDP                     65
kidnap_rate_%_Change    65
dtype: int64

In [139]:
africa.nlargest(5, 'Kidnapping Rate')

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
56,South Africa,Africa,2017,9.569,5455,57009751,323398800000.0,0.07759
54,South Africa,Africa,2015,9.051,5013,55386369,317620500000.0,0.052197
55,South Africa,Africa,2016,8.88,4991,56207649,318888100000.0,-0.018893
53,South Africa,Africa,2014,8.602,4692,54544184,313873700000.0,25.714286
194,Benin,Africa,2017,4.743,530,11175192,12435940000.0,337.785714


In [140]:
africa.nsmallest(5, 'Kidnapping Rate')

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
226,Madagascar,Africa,2010,0.0,0,21151640,9925208000.0,-1.0
228,Madagascar,Africa,2012,0.004,1,22346641,10385450000.0,-0.2
227,Madagascar,Africa,2011,0.005,1,21743970,10081870000.0,inf
193,Senegal,Africa,2015,0.014,2,14578450,17774770000.0,-0.854167
48,Kenya,Africa,2018,0.021,11,51392570,75515430000.0,-0.723684


In [148]:
africa.corr()

Unnamed: 0,Year,Kidnapping Rate,Kidnapping Count,Population,GDP,kidnap_rate_%_Change
Year,1.0,0.412193,0.330102,0.001841,0.119535,0.207806
Kidnapping Rate,0.412193,1.0,0.927787,0.077148,0.450875,0.211661
Kidnapping Count,0.330102,0.927787,1.0,0.253968,0.62834,0.027477
Population,0.001841,0.077148,0.253968,1.0,0.851651,-0.085465
GDP,0.119535,0.450875,0.62834,0.851651,1.0,-0.062598
kidnap_rate_%_Change,0.207806,0.211661,0.027477,-0.085465,-0.062598,1.0


In [141]:
fig = px.bar(africa, x='Kidnapping Rate', y='country',
             hover_data=['Kidnapping Count', 'Population', 'GDP',],  
             color='Kidnapping Count', template='plotly_dark', color_continuous_scale='Viridis',
             animation_frame='Year',
             labels={'Percentage Change':'Percentage Change in Life Expectancy'}, height=400)
fig.show()

In [142]:
# Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010
fig = px.choropleth(africa,locations='country',locationmode='country names', 
                    color='Kidnapping Count', color_continuous_scale='Viridis', animation_frame='Year',
                    hover_data=['Kidnapping Rate', 'Population', 'GDP',])
fig.update_layout(title='Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010',
                  template="plotly_dark")
fig.show()

In [147]:
# Plot a scatter plot of the average life expectancy for each country between 2000 and 2010 
# (taking the average for each country over the years)

fig = px.scatter(africa, x='Kidnapping Count', y='Population', color='country', color_continuous_scale='Viridis', 
                 animation_frame='Year', hover_data=['Kidnapping Rate', 'GDP', 'kidnap_rate_%_Change'])
fig.update_layout(title='Scatter Plot of Average Population VS Average Number of Kidnaps from year 2010 to 2018',
                  template="plotly_dark")
fig.show()

In [153]:
# Create a new dataframe containing the average life expectancy for each country between 2000 and 2010

df_africa = africa[['country', 'Continent', 'Kidnapping Rate', 'Kidnapping Count', 'Population',
                    'GDP', 'kidnap_rate_%_Change']].groupby('country',
                as_index=False).mean()
df_africa.rename(columns={'Kidnapping Rate':'Average Kidnapping Rate', 'Kidnapping Count':'Average Kidnapping Count', 
                          'Population':'Average Population', 'GDP':'Average GDP', 
                          'kidnap_rate_%_Change':'Average kidnap_rate_%_Change'}, inplace=True)
df_africa.head()

Unnamed: 0,country,Average Kidnapping Rate,Average Kidnapping Count,Average Population,Average GDP,Average kidnap_rate_%_Change
0,Algeria,0.663833,254.5,37802440.0,152712300000.0,0.367784
1,Benin,4.743,530.0,11175190.0,12435940000.0,337.785714
2,Botswana,0.165,3.333333,2030377.0,12733390000.0,-0.182121
3,Burundi,0.5528,51.8,9252921.0,2965764000.0,0.157974
4,Cameroon,2.81725,636.875,22410380.0,28643340000.0,0.910847


In [154]:
df_africa.nlargest(5, 'Average Kidnapping Rate')

Unnamed: 0,country,Average Kidnapping Rate,Average Kidnapping Count,Average Population,Average GDP,Average kidnap_rate_%_Change
12,South Africa,9.0255,5037.75,55786990.0,318445300000.0,6.456295
1,Benin,4.743,530.0,11175190.0,12435940000.0,337.785714
4,Cameroon,2.81725,636.875,22410380.0,28643340000.0,0.910847
8,Morocco,2.065333,717.0,34726310.0,100739400000.0,0.234673
5,Guinea-Bissau,1.548,26.25,1715083.0,1031991000.0,2.167189


In [155]:
df_africa.nsmallest(5, 'Average Kidnapping Rate')

Unnamed: 0,country,Average Kidnapping Rate,Average Kidnapping Count,Average Population,Average GDP,Average kidnap_rate_%_Change
11,Senegal,0.014,2.0,14578450.0,17774770000.0,-0.854167
7,Madagascar,0.023167,5.5,22671250.0,10553170000.0,
6,Kenya,0.1265,57.5,46408070.0,60458270000.0,-0.047397
10,Rwanda,0.1315,13.75,10423470.0,6738568000.0,-0.030053
2,Botswana,0.165,3.333333,2030377.0,12733390000.0,-0.182121


In [156]:
df_africa.corr()

Unnamed: 0,Average Kidnapping Rate,Average Kidnapping Count,Average Population,Average GDP,Average kidnap_rate_%_Change
Average Kidnapping Rate,1.0,0.897064,0.03719,0.378259,0.364456
Average Kidnapping Count,0.897064,1.0,0.251659,0.604992,-0.005952
Average Population,0.03719,0.251659,1.0,0.88934,-0.161968
Average GDP,0.378259,0.604992,0.88934,1.0,-0.166758
Average kidnap_rate_%_Change,0.364456,-0.005952,-0.161968,-0.166758,1.0


In [157]:
fig = px.bar(df_africa, x='Average Kidnapping Rate', y='country',
             hover_data=['Average Kidnapping Count', 'Average Population', 'Average GDP', ],  
             color='Average Kidnapping Count', template='plotly_dark', color_continuous_scale='Viridis',
             labels={'Percentage Change':'Percentage Change in Life Expectancy'}, height=400)
fig.show()

In [158]:
# Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010
fig = px.choropleth(df_africa,locations='country',locationmode='country names', 
                    color='Average Kidnapping Count', color_continuous_scale='Viridis', 
                    hover_data=['Average Kidnapping Rate', 'Average Population', 'Average GDP'])
fig.update_layout(title='Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010',
                  template="plotly_dark")
fig.show()

## Asia

In [54]:
asia = kidnap[kidnap['Continent']=='Asia']
asia.head()

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population,kidnap_%_Change,kidnap_rate_%_Change
15,Kazakhstan,Asia,2005,0.597,92,15147029,-0.975648,-0.941954
16,Kazakhstan,Asia,2006,0.54,84,15308085,-0.086957,-0.095477
17,Kazakhstan,Asia,2007,0.535,84,15484192,0.0,-0.009259
18,Kazakhstan,Asia,2008,0.561,89,15776938,0.059524,0.048598
19,Kazakhstan,Asia,2009,0.748,120,16092822,0.348315,0.333333


In [55]:
asia.nunique(dropna=True)

country                  30
Continent                 1
Year                     16
Kidnapping Rate         223
Kidnapping Count        166
Population              257
kidnap_%_Change         217
kidnap_rate_%_Change    232
dtype: int64

## Europe

In [64]:
# Create a new dataframe for countires having population greater than 295,516,599

europe = kidnap[kidnap['Continent']=='Europe']
europe.head()

Unnamed: 0,country,Continent,Year,Kidnapping Rate,Kidnapping Count,Population
235,France,Europe,2003,3.371,2031,62244880
236,France,Europe,2004,3.531,2143,62704901
237,France,Europe,2005,3.292,2012,63179356
238,France,Europe,2006,3.72,2288,63621376
239,France,Europe,2007,3.409,2109,64016227


In [65]:
# Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010
fig = px.choropleth(europe,locations='country',locationmode='country names', 
                    color='Kidnapping Count', color_continuous_scale='Viridis', animation_frame='Year',
                    hover_data=['Kidnapping Rate', 'Population', 'Continent'])
fig.update_layout(title='Choropleth Map of Life Expectancy for Countries with Year ranging from 2000 to 2010',
                  template="simple_white")
fig.show()

## Task 1.2.3: 

In [184]:
population_2005_3 = df_merged_data[df_merged_data['Population'].between(63179356, 1147609924)]
population_2005_3.head()

Unnamed: 0,Country Name,Population,Life Expectancy
3,United States,295516599.0,75.0
7,Indonesia,226289468.0,65.579
17,Russian Federation,143518814.0,58.92
23,Mexico,106005199.0,72.575
25,Brazil,186127108.0,68.166


In [185]:
population_2005_3.shape

(18, 3)

In [225]:
# Choropleth Map of the World
fig = px.choropleth(population_2005_3,locations='Country Name',locationmode='country names',
                    color='Life Expectancy', color_continuous_scale='Viridis',)
fig.update_layout(title='Choropleth Map of Life Expectancy for Countries having population between 63179356 and 1147609924 in the year 2005',
                  template="simple_white")
fig.show()

## Task 1.2.4:

In [187]:
# Check for the country with the highest population in the year 2005

def find_max_country(df_merged_data):
    return df_merged_data[df_merged_data['Population'] == max(df_merged_data['Population'])].index[0]

find_max_country(df_merged_data)

print(df_merged_data.loc[find_max_country(df_merged_data)])

Country Name              China
Population         1303720000.0
Life Expectancy          71.297
Name: 134, dtype: object


In [188]:
# Create a new dataframe for the country with the highest population in the year 2005 
# (taking into consideration records from 2000 to 2010)

df_China = final_data[final_data['Country Name']=='China']
df_China.head()

Unnamed: 0,Country Name,Country Code,Geometry,Year,Life Expectancy,Population
134,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2000,69.595,1262645000.0
301,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2001,69.981,1271850000.0
468,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2002,70.351,1280400000.0
635,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2003,70.697,1288400000.0
802,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2004,71.013,1296075000.0


In [190]:
df_China.tail()

Unnamed: 0,Country Name,Country Code,Geometry,Year,Life Expectancy,Population
1637,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2006,71.551,1311020000.0
1804,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2007,71.788,1317885000.0
1971,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2008,72.017,1324655000.0
2138,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2009,72.244,1331260000.0
2305,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2010,72.476,1337705000.0


In [192]:
df_China.nunique(dropna=True)

Country Name        1
Country Code        1
Geometry            1
Year               11
Life Expectancy    11
Population         11
dtype: int64

In [193]:
df_China['Percentage Change'] = df_China['Life Expectancy'].pct_change()
df_China.head()

Unnamed: 0,Country Name,Country Code,Geometry,Year,Life Expectancy,Population,Percentage Change
134,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2000,69.595,1262645000.0,
301,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2001,69.981,1271850000.0,0.005546
468,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2002,70.351,1280400000.0,0.005287
635,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2003,70.697,1288400000.0,0.004918
802,China,CHN,"MULTIPOLYGON (((109.47521 18.19770, 108.65521 ...",2004,71.013,1296075000.0,0.00447


In [194]:
fig = px.bar(df_China, x='Percentage Change', y='Year',
             hover_data=['Population', 'Life Expectancy'], color='Life Expectancy',
             labels={'Percentage Change':'Percentage Change in Life Expectancy'}, height=400)
fig.show()

## Task 1.2.5: 

In [195]:
# Create a new dataframe containing the average life expectancy for each country between 2000 and 2010

df_Countries = final_data[['Country Name', 'Year', 'Geometry', 'Population', 'Life Expectancy']].groupby('Country Name',
                as_index=False).mean()
df_Countries.head()

Unnamed: 0,Country Name,Population,Life Expectancy
0,Afghanistan,25346330.0,57.090071
1,Albania,3004509.0,72.697929
2,Algeria,33268140.0,71.694
3,Angola,19578290.0,48.144429
4,Argentina,38878950.0,70.999929


In [227]:
# Plot a scatter plot of the average life expectancy for each country between 2000 and 2010 
# (taking the average for each country over the years)

fig = px.scatter(final_data, x='Population', y='Life Expectancy', color='Country Name', animation_frame='Year',)
fig.update_layout(title='Scatter Plot of Population VS Life Expectancy ranging from year 2000 to 2010',
                  template="plotly_dark")
fig.show()