# Performance Comparison
Compare performance when reading json vs jsonstat formats

In [0]:
# imports
import pandas as pd # Dataframe
import json # Parsing json to object
import requests # Making HTTP get requests
import pycountry
from pyjstat import pyjstat
import timeit

In [0]:
def download_indicator_json(indicator_name):
  index = 0
  page = 1
  df = pd.DataFrame(columns=["country_region", "iso_code", "year", "value", "unit"])
  while True:
    url = f"http://api.worldbank.org/v2/country/all/indicator/{indicator_name}?format=json&per_page=1000&gapfill=N&page={page}" # REPLACE ALL WITH VARIABLE FOR ISO3_CODES
    response = requests.get(url)
    if response.status_code == 200:
      data = json.loads(response.text)
      total_pages = data[0]["pages"]
      print(f"Processing {indicator_name}: Page {page} / {total_pages}")
      for item in data[1]:
        df.loc[index] = { "country_region":item["country"]["value"], "iso_code": item["countryiso3code"], "year": item["date"], "value": item["value"], "unit": item["unit"] }
        index = index + 1
      if (page > total_pages):
        break
      else:
        page = page + 1
    else:
      raise Exception(f"Failed to download indicator {indicator_name}")
  return df

In [0]:
def download_indicator_jsonstat(indicator_name):
    url = f"http://api.worldbank.org/v2/country/all/indicator/{indicator_name}?format=jsonstat&gapfill=N"
    dataset = pyjstat.Dataset.read(url)
    df = dataset.write('dataframe')
    return df

In [0]:
# Timing download_indicator_json
time_one = timeit.timeit("download_indicator_json('SG.GEN.PARL.ZS')", globals=globals(), number=1)

# Timing download_indicator_jsonstat
time_two = timeit.timeit("download_indicator_jsonstat('SG.GEN.PARL.ZS')", globals=globals(), number=1)

# Storing the results in a DataFrame
results_df = pd.DataFrame({
    "Function": ["Raw JSON Results", "JSONSTAT Results"],
    "Time Taken (seconds)": [time_one, time_two]
})

In [0]:
results_df