In [63]:
import os
from requests import get
from json import dumps
import json

In [2]:
pwd = os.getcwd()
filepath = (pwd + "/data_sets/raw_data_sets/")

In [3]:
ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data"
AREA_TYPE = 'nation'
AREA_NAME = ['england', 'scotland', 'wales', 'northern ireland']
fltr = [] # Empty list to loop though our areas for download.

### URL STRUCTURES FOR METRICS IN EACH FILE - CATAGORIZED BY:  
- Total Cumalitive Cases, 
- Hospital Data, 
- Death's Broken Down By Sex (ENG DATA ONLY), 
- New Daily Cases

In [129]:
# Total Case Metrics Up To Todays Date
total_case_data_structure = {
    "date" : "date",
    "area" : "areaName",
    "total_cases" : "cumCasesByPublishDate",
    "case_rate_per_100k" : "cumCasesByPublishDateRate",
    "death_28_days" : "cumDeaths28DaysByPublishDate",
    "death_rate" : "cumDeaths28DaysByPublishDateRate",
    "cum_male_cases" : "maleCases",
    "cum_female_cases" : "femaleCases"
}

# Total Current Hospital Data including total admissions and current admisions
hospital_data_structure = {

    "date" : "date",
    "area" : "areaName",
    "patients_in_hospital" : "hospitalCases",
    "people_on_vents" : "covidOccupiedMVBeds",
    "total_hospitalisations" : "cumAdmissions",
    "hos_by_age" : "cumAdmissionsByAge", 

}

# Due to this data only producing 1 Dates worth of breakdown - Ive put it on its own structure - makes it easier to break down later
demographic_sex_death_data_structure = {

    "date" : "date", 
    "area" : "areaName",
    "female_28_days" : "femaleDeaths28Days",
    "male_deaths_28_days" : "maleDeaths28Days"
}

# New Cases From day of request
new_case_data_structure = {
    "date" : "date",
    "area" : "areaName",
    "new_cases" : "newCasesByPublishDate",
    "new_change" : "newCasesByPublishDateChange",
    "new_percentage_change" : "newCasesByPublishDateChangePercentage",
    "new_cases_rolling_rate" : "newCasesByPublishDateRollingRate",
    "new_deaths" : "newDeaths28DaysByPublishDate",
    "new_d_change" : "newDeaths28DaysByPublishDateChange",
    "new_d_percentage_change" : "newDeaths28DaysByPublishDateChangePercentage" ,
    "new_d_rolling_rate" : "newDeaths28DaysByDeathDateRollingRate",
    "new_d_by_age" : "newDeaths28DaysByDeathDateAgeDemographics"
}
# Set API_params via calls to functions. 
api_params = {
    
}
# Formats we are getting in the request - sticking with json as it seems easiest
formats = {
    "json"
}

# Dict_dict to loop through the url structures and pass them into the get() request
data_structures = {"total_case_data" : total_case_data_structure, "hospital_data" : hospital_data_structure, "demographic_sex_death_data" : demographic_sex_death_data_structure, "new_case_data" : new_case_data_structure}

In [139]:
def change_area():
    for area in AREA_NAME:
        fltr = [f"areaType={AREA_TYPE}", f"areaName={area}"]
        for name, value in data_structures.items():
            dmp = dumps(value, separators=(",", ":"))
            save_to_file(dmp, fltr, name, f'_{area}.json')

In [133]:
def fetch_data(dmp, fltr):
   api_params["structure"] = dmp
   api_params['filters'] = str.join(";", fltr)
   for fmt in formats:
      api_params["format"] = fmt
      response = get(ENDPOINT, params=api_params, timeout=10)
      
      if response.status_code >= 400:
         raise RuntimeError(f'Request Failed: {response.text}')

      try:
         data = response.json()
      except ValueError:
         return
   return data

In [7]:
def save_to_file(dmp, fltr, name='', fmat=''):
    with open(filepath + name + fmat, 'a') as file:
        json.dump(fetch_data(dmp, fltr), file, indent=4)

In [134]:
def combine_files():
    folder = filepath

    total_cases = []
    new_cases = []
    hospital = []

    for f in os.listdir(folder):
        filename = os.fsencode(f)
        if filename.startswith(b'hospital'):
            hospital.append(f)
        if filename.startswith(b"total"):
            total_cases.append(f)
        if filename.startswith(b"new"):
            new_cases.append(f)    


    #for tc in total_cases:
    merge_json(total_cases, 'combined_total_cases_data')
    # for ho in hospital:
    #     merge_json(ho, 'combined_hospital_data')
    # for nc in new_cases:
    #     merge_json(nc, 'combined_new_cases_data')
    

In [135]:
def merge_json(filename, resultingName):
    result = list()

    for f1 in filename: 
        with open(filepath + f1, 'r') as infile:
            result.extend(json.load(infile))

    with open(filepath + resultingName + '.json', 'w') as output_file:
        json.dump(result, output_file, indent=4)

In [136]:
def download_data():
    change_area()
    #combine_files()

In [141]:
combine_files()