# Exploring some ideas

In [1]:
from pathlib import Path
import json
import pandas as pd
import numpy as np
import shutil
import re

In [58]:
nhs_code_to_name = {
    'S08000015': 'Ayrshire and Arran',
    'S08000016': 'Borders',
    'S08000017': 'Dumfries and Galloway',
    'S08000018': 'Fife',
    'S08000019': 'Forth Valley',
    'S08000020': 'Grampian',
    'S08000021': 'Greater Glasgow and Clyde',
    'S08000022': 'Highland',
    'S08000023': 'Lanarkshire',
    'S08000024': 'Lothian',
    'S08000025': 'Orkney',
    'S08000026': 'Shetland',
    'S08000027': 'Tayside',
    'S08000028': 'Western Isles',
    'S08000029': 'Fife',
    'S08000030': 'Tayside',
    'S08000031': 'Greater Glasgow and Clyde',
    'S08000032': 'Lanarkshire'
}

In [71]:
def process_health_board(in_file, group_file, all_file, is_gender):
    df = pd.read_csv(in_file, index_col='health board')

    # 1. Grouping
    # 91: the last group is 90+
    age_groups = [range(0,1), range(1,15), range(15,45), range(45,65), range(65,75), range(75,85), range(85,91)]
    old_cols = df.columns
    prefixes = ['males___', 'females___'] if is_gender else ['']
    grouped_cols = []
    all_cols = []
    for prefix in prefixes:
        for group in age_groups:
            cols = [c for c in df.columns if c.startswith(prefix) and int(re.search(r'(\d+)', c).group()) in group]
            assert len(cols) == len(list(group))
            m1, m2 = min(list(group)), max(list(group))
            suffix = f'{m1} years and over' if m1 == 85 else f'{m1} years' if m1 == m2 else f'{m1}-{m2} years'
            new_col = (prefix + suffix).replace('males', 'male')
            df[new_col] = df[cols].sum(axis=1)
            grouped_cols.append(new_col)
            
        cols = [c for c in df.columns if c.startswith(prefix)]
        all_col = prefix + 'all'
        df[all_col] = df[cols].sum(axis=1)
        all_cols.append(all_col)
    
    # 2. Rename index
    df.index = df.index.map(nhs_code_to_name)
    df = df.sort_index()
    df[grouped_cols].to_csv(group_file)
    df[all_cols].to_csv(all_file)
    
process_health_board('../data/static/human/demographics/population/scotland/health board_age_genders.csv',
                     '../data/static/human/demographics/population/scotland/health board_age_genders_grouped.csv',
                     '../data/static/human/demographics/population/scotland/health board_age_genders_all.csv',
                     is_gender=True)