##### Importing suicide statistics from WHO API

In [1]:
import requests
import json
import pandas as pd
r = requests.get('http://apps.who.int/gho/athena/api/GHO/MH_12/.json?filter=Year:2016')
data = r.json()

##### Converting JSON data to Python List

In [2]:
list_of_data = []
for fact in data['fact']:
    for category in fact['Dim']:
        if category['category'] == 'COUNTRY':
            country_code = category['code']
        elif category['category'] == "SEX":
            sex = category['code']
    suicide_rate = fact['value']['numeric']
    list_of_data.append({'Country Code': country_code, 'Sex': sex, "Suicide Rate": suicide_rate})

country_codes = []
for country in list_of_data:
    country_codes.append(country['Country Code'])

country_codes = set(country_codes)

data_dictionary = []
for country in country_codes:
    data_dictionary.append({'Country Code': country, 'Male Suicide Rate':'', 'Female Suicide Rate':'', 'Combined Suicide Rate':''})

##### creating Python list with dictionary for each country

In [3]:
for data in list_of_data:
    for country in data_dictionary:
        if data['Country Code'] == country['Country Code']:
            if data['Sex'] == 'MLE':
                country['Male Suicide Rate'] = data['Suicide Rate']
            elif data['Sex'] == "FMLE":
                country['Female Suicide Rate'] = data['Suicide Rate']
            elif data['Sex'] == 'BTSX':
                country['Combined Suicide Rate'] = data['Suicide Rate']

###### Converting to Pandas DataFrame

In [4]:
suicide_rates = pd.DataFrame.from_dict(data_dictionary,)
suicide_rates.set_index(['Country Code'], inplace=True)
suicide_rates.sort_index(inplace=True)

In [5]:
suicide_rates.head()

Unnamed: 0_level_0,Combined Suicide Rate,Female Suicide Rate,Male Suicide Rate
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AFG,6.4,2.1,10.6
AGO,8.9,4.6,14.0
ALB,5.6,4.3,7.0
ARE,2.7,0.8,3.5
ARG,9.1,3.5,15.0


###### Adding country names

In [6]:
r = requests.get('http://apps.who.int/gho/athena/api/GHO/MH_12/.json?filter=Year:2016')
data = r.json()
country_names = []
for country in data['dimension'][4]['code']:
    country_names.append({'Country Code': country['label'], 'Country Name': country['display']})

country_names_df = pd.DataFrame.from_dict(country_names)

country_names_df.set_index(['Country Code'], inplace=True)

suicide_rates = suicide_rates.join(country_names_df, on=['Country Code'], how='inner')

suicide_rates = suicide_rates[['Country Name', 'Combined Suicide Rate', 'Male Suicide Rate', 'Female Suicide Rate']]

In [7]:
suicide_rates.head()

Unnamed: 0_level_0,Country Name,Combined Suicide Rate,Male Suicide Rate,Female Suicide Rate
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AFG,Afghanistan,6.4,10.6,2.1
AGO,Angola,8.9,14.0,4.6
ALB,Albania,5.6,7.0,4.3
ARE,United Arab Emirates,2.7,3.5,0.8
ARG,Argentina,9.1,15.0,3.5


##### importing percentage of population living in cities data from WHO API

In [8]:
r = requests.get('http://apps.who.int/gho/athena/api/GHO/AIR_3/?format=json')
data = r.json()

###### Converting JSON data to Python list

In [9]:
data_dictionary = []
for country in data['fact']:
    for category in country['Dim']:
        if category['category'] == "COUNTRY":
            countrycode = category['code']
    data_dictionary.append({"Country Code": countrycode, "% Living in Cities > 100k": country['value']['numeric']})

###### Converting info to Pandas DataFrame

In [10]:
percent_living_in_cities = pd.DataFrame.from_dict(data_dictionary)

In [11]:
percent_living_in_cities.set_index(['Country Code'], inplace=True)
percent_living_in_cities.head()

Unnamed: 0_level_0,% Living in Cities > 100k
Country Code,Unnamed: 1_level_1
AFG,15.56
ALB,9.34
DZA,24.73
AND,38.7
AGO,19.97


###### Combining with suicide stats DataFrame

In [12]:
suicide_rates = suicide_rates.join(percent_living_in_cities, on=['Country Code'], how='inner')

In [13]:
suicide_rates

Unnamed: 0_level_0,Country Name,Combined Suicide Rate,Male Suicide Rate,Female Suicide Rate,% Living in Cities > 100k
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AFG,Afghanistan,6.40000,10.6,2.1,15.56
AGO,Angola,8.90000,14.0,4.6,19.97
ALB,Albania,5.60000,7.0,4.3,9.34
ARE,United Arab Emirates,2.70000,3.5,0.8,70.03
ARG,Argentina,9.10000,15.0,3.5,74.17
ARM,Armenia,5.70000,10.1,2.0,53.58
ATG,Antigua and Barbuda,4.30386,0.0,0.9,33.07
AUS,Australia,11.70000,17.4,6.0,71.69
AUT,Austria,11.40000,17.5,5.7,37.26
AZE,Azerbaijan,2.60000,4.3,1.0,23.27


###### Scraping Internet Data from CIA World Fact Book using BeautifulSoup

In [14]:
from bs4 import BeautifulSoup
r = requests.get('https://www.cia.gov/library/publications/the-world-factbook/fields/204.html#AF')
c = r.content
soup = BeautifulSoup(c)



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


In [15]:
data = soup.findAll('tr')[1:]

##### Scraping country names and internet connectivity info

In [16]:
data_dictionary = []
for country in data:
    try:
        country_name = country.findAll('td', {'class': 'country'})[0].text
        country_name_formatted = country_name.replace('\n', "")
        internet_percentage = country.findAll('span', {'class': 'subfield-number'})[1].text
        internet_percentage_formatted = float(internet_percentage.replace("%", ""))
        data_dictionary.append({'country': country_name_formatted, "percentage of population with internet access": internet_percentage_formatted})
    except:
        pass

##### Converting to Pandas DataFrame

In [17]:
internet_stats_df = pd.DataFrame.from_dict(data_dictionary)

In [18]:
internet_stats_df.columns= ['Country Name', "% of Population with Internet Access"]
internet_stats_df.set_index(['Country Name'], inplace=True)
internet_stats_df.head()

Unnamed: 0_level_0,% of Population with Internet Access
Country Name,Unnamed: 1_level_1
Afghanistan,10.6
Albania,66.4
Algeria,42.9
American Samoa,31.3
Andorra,97.9


##### Combining with other data collected

In [19]:
suicide_rates = suicide_rates.join(internet_stats_df, on=['Country Name'], how='inner')

In [20]:
suicide_rates

Unnamed: 0_level_0,Country Name,Combined Suicide Rate,Male Suicide Rate,Female Suicide Rate,% Living in Cities > 100k,% of Population with Internet Access
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AFG,Afghanistan,6.40000,10.6,2.1,15.56,10.6
AGO,Angola,8.90000,14.0,4.6,19.97,13.0
ALB,Albania,5.60000,7.0,4.3,9.34,66.4
ARE,United Arab Emirates,2.70000,3.5,0.8,70.03,90.6
ARG,Argentina,9.10000,15.0,3.5,74.17,70.2
ARM,Armenia,5.70000,10.1,2.0,53.58,62.0
ATG,Antigua and Barbuda,4.30386,0.0,0.9,33.07,65.2
AUS,Australia,11.70000,17.4,6.0,71.69,88.2
AUT,Austria,11.40000,17.5,5.7,37.26,84.3
AZE,Azerbaijan,2.60000,4.3,1.0,23.27,78.2


###### Scraping Religous stats

In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
r = requests.get('https://rationalwiki.org/wiki/Importance_of_religion_by_country')
c = r.content
soup = BeautifulSoup(c)



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


In [22]:
data = soup.findAll('table', {'class': 'wikitable'})
data = data[0].findAll('td')

In [23]:
religous_stats = []
for i in range(len(data)):
    if i == 0 or i % 3 == 0:
        country = data[i].text.strip()
    elif i in list(range(1, len(data), 3)):
        percentage_religious = float(data[i].text.replace("%", ""))
        religous_stats.append({'Country Name': country, "% Religious": percentage_religious})

In [24]:
religious_df = pd.DataFrame.from_dict(religous_stats)
religious_df.set_index(['Country Name'], inplace=True)
religious_df.head()

Unnamed: 0_level_0,% Religious
Country Name,Unnamed: 1_level_1
Estonia,16.0
Sweden,16.5
Denmark,18.0
Czech Republic,20.5
Norway,20.5


###### Combine with rest of data

In [25]:
suicide_rates = suicide_rates.join(religious_df, on=['Country Name'], how='left')


In [26]:
suicide_rates

Unnamed: 0_level_0,Country Name,Combined Suicide Rate,Male Suicide Rate,Female Suicide Rate,% Living in Cities > 100k,% of Population with Internet Access,% Religious
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AFG,Afghanistan,6.40000,10.6,2.1,15.56,10.6,97.0
AGO,Angola,8.90000,14.0,4.6,19.97,13.0,88.0
ALB,Albania,5.60000,7.0,4.3,9.34,66.4,32.5
ARE,United Arab Emirates,2.70000,3.5,0.8,70.03,90.6,91.0
ARG,Argentina,9.10000,15.0,3.5,74.17,70.2,66.0
ARM,Armenia,5.70000,10.1,2.0,53.58,62.0,72.5
ATG,Antigua and Barbuda,4.30386,0.0,0.9,33.07,65.2,
AUS,Australia,11.70000,17.4,6.0,71.69,88.2,32.0
AUT,Austria,11.40000,17.5,5.7,37.26,84.3,55.0
AZE,Azerbaijan,2.60000,4.3,1.0,23.27,78.2,49.5
