In [1]:
'''
1. Step: Before everything in this notebopok, create a new SQL Schema in local MySQL Database called "newcovid"
2. Step: Import required libraries
3. Step: Establish a connection to the newly created MySQL DB "newcovid"
'''

import pandas as pd
import pymysql
import getpass
import requests
import json
from flatten_json import flatten 
import matplotlib
%matplotlib inline

# establishing a connection to local SPL Database and use secure poassword method "getpass"
conn = pymysql.connect(host="localhost",
                       port=3306,
                       user="root",
                       passwd=getpass.getpass(),
                       db="newcovid")

········


In [43]:
''' JSON import from Eurostat API

Definition of the URL-String components accoring to the Eurostat API REST Request requirements
(https://ec.europa.eu/eurostat/web/json-and-unicode-web-services/getting-started/rest-request)

1) Variable 'Eurostat_base' to use as URL-base 
2) Dictionary 'Eurostat_dict' with values to be called as second URL-component by identyable keys, 
   which will themselves later serve as names for the respective SQL tables
3) Empty list 'df_list', which which as an intermediate container lateron will receive the dataframes, 
   once they are created from the JSON-Files, which we receive from the Eurostat API.

'''

Eurostat_base = "http://ec.europa.eu/eurostat/wdds/rest/data/v2.1/json/en/"
Eurostat_dict ={"Eurostat_population_density_2018" : 
                "tps00003?unit=PER_KM2&precision=1&time=2018",
                "Eurostat_air_transport_passengers_2018" : 
                "ttr00012?tra_meas=PAS_CRD&precision=1&tra_cov=TOTAL&schedule=TOT&unit=PAS&time=2018",
                "Eurostat_gdp_head_2018" :
                "nama_10_pc?na_item=B1GQ&precision=1&unit=CLV10_EUR_HAB&time=2018",
                "Eurostat_health_euro_expenditure_per_capita_and_totals_2017" :
                "tps00207?precision=1&unit=EUR_HAB&unit=MIO_EUR&unit=PC_GDP&icha11_hc=TOT_HC&time=2017",
                "Eurostat_total_nights_spent_by_residents_non_residents_2018" :
                "tin00175?c_resid=FOR&c_resid=NAT&c_resid=TOTAL&precision=1&unit=NR&time=2018&nace_r2=I551-I553",
                "Eurostat_total_deaths_by_pneumonia_2016" :
                "tps00128?precision=1&sex=F&sex=M&sex=T&unit=RT&time=2016&age=TOTAL&icd10=J12-J18",
                "Eurostat_standardised_death_rate_due_to_chronic_diseases_by_sex" :
                "sdg_03_40?sex=F&sex=M&sex=T&precision=1&time=2016",
                "Eurostat_total_population_on_January1st_2018" :
                "proj_18np?precision=1&age=TOTAL",
                "Eurostat_no_of_practising_physicians_per_100k_inhabitants_2017" :
                "tps00044?precision=1&isco08=OC221&unit=P_HTHAB&wstatus=PRACT&time=2017",
                "Eurostat_share_of_people_with_good_or_very_good_perceived_health_by_sex_above_16_2019" :
                "sdg_03_20?precision=1&sex=F&sex=M&sex=T&unit=PC&quantile=TOTAL&time=2019&age=Y_GE16&levels=VG_G",
                "Eurostat_overcrowding_rate_by_sex_without_single-person_households_2019" :
                "tessi176?precision=1&sex=F&sex=M&sex=T&incgrp=TOTAL&unit=PC&time=2019&age=TOTAL",
                "Eurostat_population_by_age_group_2019" :
                "tps00010?indic_de=PC_Y0_14&indic_de=PC_Y15_24&indic_de=PC_Y25_49&indic_de=PC_Y50_64&indic_de=PC_Y65_79&indic_de=PC_Y80_MAX&precision=1&time=2019"
               }
df_list=[]

''' We now create a function, which 
a) receives a string(URL-base) and a dictionary of strings(secondary URL components),
b) iterates through the dictionary,
c) concatenates the URL components, 
d) reads the JSON files from the API, 
e) transforms it to a panda dataframe,  
f) transforms the respective df into a CSV-file and saves it in the repository
'''

def eurostat(base_str,url_dict):
    from pyjstat import pyjstat
    from collections import OrderedDict
    
    for key in url_dict:
        url = base_str + url_dict.get(key)
        dataset = pyjstat.Dataset.read(url)
        df = dataset.write('dataframe')
        df.to_csv(key +'.csv', index=False)
    return


In [44]:
# calling the function with the above created String variable and the URL-dictionary
eurostat(Eurostat_base,Eurostat_dict)