# Data visualization of world contraceptive use

In [None]:
import geopandas 
import pandas as pd
import seaborn as sns
import numpy as np
import fiona
import pycountry 
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) # The data is used for map canvas

In [None]:
# loading contraceptive data
with pd.ExcelFile('Contraceptive_2019.xls') as xls:
    country2019 = pd.read_excel(xls, 'Sheet1', na_values= ["."])
    area2019 = pd.read_excel(xls, 'Sheet2',na_values= ["."])
    country_trend= pd.read_excel(xls, "Sheet3",na_values= ["."])

## Merge data

In [None]:
# The contraceptive data do not contain a col of unique identifiers. Use fuzzy merge to join two datasets.
def fuzzy_merge(data1, data2, key1, key2, threshold=95, limit=1):
    
    s = data2[key2].tolist()

    m = data1[key1].apply(lambda x: process.extract(x, s, limit=limit))    
    data1['matches'] = m

    m2 = data1['matches'].apply(lambda x: ', '.join([i[0] for i in x if i[1] >= threshold]))
    data1['matches'] = m2

    return data1

In [None]:
match=fuzzy_merge(world, country2019, 'name', 'area')

In [None]:
match.loc[match['matches'] == ""] ## display countries that have no matches from contraceptive data

In [None]:
World_con = pd.merge(match, country2019, 
                        left_on='matches', 
                        right_on='area',
                        how = 'left'
                       )

In [None]:
# Clean the data 
World_con=World_con[World_con.columns.drop('matches')];
World_con=World_con[World_con.columns.drop('area')]
World_con=World_con[World_con.continent!='Antarctica']

In [None]:
World_con.describe()

## Contraceptive prevalence in the world
### General trend 
#### A world map to show the estimated prevalence of contraceptive use among women of reproductive age

In [None]:
fig, ax = plt.subplots(1, figsize=(16, 17))
divider = make_axes_locatable(ax) # align the legend to the plot
cax = divider.append_axes("right", size="5%", pad=0.1)
World_con.plot(column='Any method', linewidth=0.3, ax=ax, edgecolor='0.7',alpha=0.95, cax=cax, cmap='GnBu',legend= True)
ax.axis('off')# remove the axis
# add plot title and annotation
ax.set_title('Estimated prevalence of contraceptive use among women of reproductive age (15-49 years), 2019(%)', fontdict={'fontsize': '16', 'fontweight' : '5','horizontalalignment': 'center'})
ax.annotate('Source: United  Nations, Population  Division  (2019).',xy=(0.1, 0.28),  xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=10, color='#555555')


#### Contraceptive use prevalence among women in geographic regions

In [None]:
g_contra=area2019.iloc[1:9,:2]
g_contra=g_contra.sort_values("Any method", ascending=False)

In [None]:
g=sns.catplot(y="area", x="Any method",palette=(sns.cubehelix_palette(8, start=.5, rot=-.5)), height=4,aspect=2, kind="bar",  data=g_contra)
#plt.suptitle('Contraceptive use in geographic regions',size=16)
#plt.show()
(g.set_ylabels("")
  .set_xlabels(""))
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Contraceptive use prevalence in geographic regions: all method (%)', fontsize=18)

#### Contraceptive use prevalence among women by income levels

In [None]:
inc_contra=area2019.iloc[16:21,[0,1,2,3,4,7,8]]

In [None]:
inc_contra

In [None]:
sns.set(style="whitegrid",rc={'font.size': 15, 'axes.labelsize': 15, 'legend.fontsize': 15, 
   'axes.titlesize': 15, 'xtick.labelsize': 13, 'ytick.labelsize': 15})
inc= sns.PairGrid(inc_contra,
                 x_vars=inc_contra.columns[1:7], y_vars=["area"],
                 height=5, aspect=.5)
inc.map(sns.stripplot, size=14, orient="h",
      palette="ch:s=1,r=-.1,h=1_r", linewidth=1, edgecolor="w")
inc.set(ylabel="")
for ax in inc.axes.flat:
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)
inc.fig.subplots_adjust(top=0.9)
inc.fig.suptitle('Contraceptive use prevalence among women by income levels(%)', fontsize=18)

### Country specific trend

In [None]:
## Select 12 countries to see specific trend
countryname=[]
for i in ['USA', 'CAN', 'GBR','FRA','DEU','MEX' ,'JPN','KOR','CHN','IND','AUS','TUR']: 
    name=pycountry.countries.get(alpha_3=i).numeric
    countryname.append(int(name))
subcountry=country_trend.loc[country_trend['ISO code'].isin(countryname),['ISO code','area', 'Survey\nend year','Any method','Female\nsterilization', 'Pill', 'Male condom'] ]

In [None]:
subcountry = subcountry.rename(columns = {'ISO code':'iso','Survey\nend year':'year','Any method':'Any method' ,'Female\nsterilization':'Female sterilization', 'Pill':'Pill', 'Male condom': 'Male condom'})


In [None]:
pd.pivot_table(subcountry, values=['Any method','Female sterilization','Pill','Male condom'], index=['area'],aggfunc=np.mean)

In [None]:
## Only examine the trend since 1980
subcountry = subcountry.query('year>1979')
long_subc = pd.melt(subcountry, id_vars=['area','iso', 'year'],value_vars=['Any method','Female sterilization','Pill','Male condom'])
                          

In [None]:
sns.set(style="ticks",rc={"lines.linewidth": 2,'xtick.labelsize': 12, 'ytick.labelsize': 12,'font.size': 15, 'axes.labelsize': 12, 'legend.fontsize': 12})

g=sns.relplot(x="year", y="value",
            hue="variable", 
            kind="line",col="area",style="variable", col_wrap=4, height=3,data=long_subc)
(g.set_ylabels("prevalence(%)")
  .set_xlabels(""))
g._legend.texts[0].set_text("") ## remove legend title 
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Contraceptive use prevalence trend of selected countries', fontsize=18)
# change column titles for each plot.
name=list(set(subcountry['area']))
titles=sorted(name)
for ax, title in zip(g.axes.flat, titles):
    ax.set_title(title)
