# Getting data from opendata

In [91]:
import os, sys, json
module_path = os.path.abspath(os.pardir)
if module_path not in sys.path:
    sys.path.append(module_path)
    
import pandas as pd
from app.utils.naming import component_to_csv_file, format_component_name

## Scotland total

In [57]:
def save_scotland_total(path):
    df = pd.read_csv('https://www.opendata.nhs.scot/dataset/6dbdd466-45e3-4348-9ee3-1eac72b5a592/resource/42f17a3c-a4db-4965-ba68-3dffe6bca13a/download')
    df = df.drop(columns=['Country'])
    df.to_csv(path, index=False)
    
save_scotland_total('../../data/live/opendata/scotland/vaccination/daily_total.csv')

## Scotland sex & age group

In [58]:
def save_scotland_sex_agegroup(path):
    df = pd.read_csv('https://www.opendata.nhs.scot/dataset/6dbdd466-45e3-4348-9ee3-1eac72b5a592/resource/9b99e278-b8d8-47df-8d7a-a8cf98519ac1/download')
    qf_columns = [c for c in df.columns if c.endswith('QF')]
    df = df.drop(columns=['Country'] + qf_columns)
    df.to_csv(path, index=False)
    
save_scotland_sex_agegroup('../../data/live/opendata/scotland/vaccination/daily_sex_agegroup.csv')

## Scotland local authority

In [None]:
def generate_component(name, description, prefix, keyword):
    key = format_component_name(name)
    return {
        'product': 'opendata/scotland/vaccination',
        'components': [
            {
                'name': prefix + '_' + key,
                'dataType': 'timeseries',
                'keywords': ['opendata', 'scotland', 'vaccination', 'daily'] + [keyword, key],
                'description': name + ' - ' + description
            }
        ]
    }

In [90]:
def save_scotland_local_authority(path, print_manifest=False):
    df = pd.read_csv('https://www.opendata.nhs.scot/dataset/6dbdd466-45e3-4348-9ee3-1eac72b5a592/resource/d5ffffc0-f6f3-4b76-8f38-71ccfd7747a4/download')
    qf_columns = [c for c in df.columns if c.endswith('QF')]
    df = df.dropna(subset=['CA'])
    df = df.drop(columns=['CA'] + qf_columns)
    
    # Split to each file for a local authority
    groups = df.groupby('CAName')
    components = []
    
    for name, df_group in groups:
        key = format_component_name(name)
        df_group.to_csv(path + '_' + key + '.csv', index=False)
        
        if print_manifest:
            comp = generate_component(
                name=name, 
                description='Daily vaccination', 
                prefix='daily_local_authority',
                keyword='local_authority'
            )
            components.append(comp)
            
    if print_manifest:
        print(json.dumps(components, indent=4))
    
save_scotland_local_authority('../../data/live/opendata/scotland/vaccination/daily_local_authority', print_manifest=True)

  if (await self.run_code(code, result,  async_=asy)):


[
    {
        "product": "opendata/scotland/vaccination",
        "components": [
            {
                "name": "daily_local_authority_aberdeen_city",
                "dataType": "timeseries",
                "keywords": [
                    "opendata",
                    "scotland",
                    "vaccination",
                    "daily",
                    "local_authority",
                    "aberdeen_city"
                ],
                "description": "Aberdeen City - Daily vaccination"
            }
        ]
    },
    {
        "product": "opendata/scotland/vaccination",
        "components": [
            {
                "name": "daily_local_authority_aberdeenshire",
                "dataType": "timeseries",
                "keywords": [
                    "opendata",
                    "scotland",
                    "vaccination",
                    "daily",
                    "local_authority",
                    "aberdeenshire"
             

## Scotland health board

In [88]:
def save_scotland_health_board(path, print_manifest=False):
    df = pd.read_csv('https://www.opendata.nhs.scot/dataset/6dbdd466-45e3-4348-9ee3-1eac72b5a592/resource/758f72d6-7371-4eee-9e6b-0b0798470d7e/download')
    qf_columns = [c for c in df.columns if c.endswith('QF')]
    df = df.dropna(subset=['HB'])
    df = df.query('HBName != "Scotland"')
    df = df.drop(columns=['HB'] + qf_columns)
    df['HBName'] = df['HBName'].apply(lambda t: t[4:])
    
    # Split to each file for a health hboard
    groups = df.groupby('HBName')
    components = []
    
    for name, df_group in groups:
        key = format_component_name(name)
        df_group.to_csv(path + '_' + key + '.csv', index=False)
        
        if print_manifest:
            comp = generate_component(
                name=name, 
                description='Daily vaccination', 
                prefix='daily_health_board',
                keyword='health_board'
            )
            components.append(comp)

    if print_manifest:
        print(json.dumps(components, indent=4))
        
save_scotland_health_board('../../data/live/opendata/scotland/vaccination/daily_health_board', print_manifest=True)    

[
    {
        "product": "opendata/scotland/vaccination",
        "components": [
            {
                "name": "daily_health_board_ayrshire_and_arran",
                "dataType": "timeseries",
                "keywords": [
                    "opendata",
                    "scotland",
                    "vaccination",
                    "daily",
                    "health_board",
                    "ayrshire_and_arran"
                ],
                "description": "Ayrshire and Arran - Daily vaccination"
            }
        ]
    },
    {
        "product": "opendata/scotland/vaccination",
        "components": [
            {
                "name": "daily_health_board_borders",
                "dataType": "timeseries",
                "keywords": [
                    "opendata",
                    "scotland",
                    "vaccination",
                    "daily",
                    "health_board",
                    "borders"
                ],
   

## Composite files

In [118]:
def save_scotland_all_local_authority(path, latest_days=14):
    df = pd.read_csv('https://www.opendata.nhs.scot/dataset/6dbdd466-45e3-4348-9ee3-1eac72b5a592/resource/d5ffffc0-f6f3-4b76-8f38-71ccfd7747a4/download')
    latest_dates = sorted(df['Date'].unique())[-14:]
    df = df.query('Date == @latest_dates')
    qf_columns = [c for c in df.columns if c.endswith('QF')]
    df = df.dropna(subset=['CA'])
    df = df.drop(columns=['CA'] + qf_columns)
    df.to_csv(path)
        
save_scotland_all_local_authority('../../data/live/opendata/scotland/vaccination/daily_local_authorities.csv')

In [115]:
def save_scotland_all_health_board(path, latest_days=14):
    df = pd.read_csv('https://www.opendata.nhs.scot/dataset/6dbdd466-45e3-4348-9ee3-1eac72b5a592/resource/758f72d6-7371-4eee-9e6b-0b0798470d7e/download')
    latest_dates = sorted(df['Date'].unique())[-14:]
    df = df.query('Date == @latest_dates')
    qf_columns = [c for c in df.columns if c.endswith('QF')]
    df = df.dropna(subset=['HB'])
    df = df.query('HBName != "Scotland"')
    df = df.drop(columns=['HB'] + qf_columns)
    df['HBName'] = df['HBName'].apply(lambda t: t[4:])
    df.to_csv(path)
        
save_scotland_all_health_board('../../data/live/opendata/scotland/vaccination/daily_health_boards.csv')