# Data Streams

In [36]:
from pathlib import Path
import json
import pandas as pd
import numpy as np
import shutil
import re
import requests
import h5py

import sys
import os

module_path = os.path.abspath(os.pardir)
if module_path not in sys.path:
    sys.path.append(module_path)

In [37]:
from app.utils.naming import component_to_csv_file, format_component_name

In [38]:
def create_stream(p, c, col=None):
    stream = {
        'urlCode': 'API_PY',
        'endpoint': f'/data?product={p["product"]}&component={c["name"]}',
        'dataType': c['dataType'],
        'keywords': c['keywords'],
        'description': ''
    }
    if col:
        stream['endpoint'] += f'&field={col}'
    return stream

In [39]:
def generate_streams(manifest, folder='../../data/live/'):
    folder = Path(folder)
    streams = []
    for p in manifest:
        for c in p['components']:
            # Register the component
            filepath = component_to_csv_file(folder, p['product'], c['name'])
            df = pd.read_csv(filepath, index_col=0)
            streams.append(create_stream(p, c))
            
            # Each field in a component csv file should be registered separately as individual data streams
            if len(df.columns) > 1:
                for col in df.columns:
                    stream = create_stream(p, c, col)
                    col = format_component_name(col)
                    # female___1-14 years: 2 separate keywords
                    extra_keywords = col.split('___') if '___' in col else [col]
                    stream['keywords'] = stream['keywords'] + extra_keywords
                    streams.append(stream)
    return streams

In [40]:
def test_endpoints(streams, base_url='http://localhost:3000'):
    # Can the endpoints be accessed?
    for s in streams:
        response = requests.get(base_url + s['endpoint'])
        assert len(response.json())

In [41]:
def test_streams(streams):
    # There should be 15 age_group/gender/scotland stream, 1 overall and 14 age_group x gender for covid_deaths
    subset = [s for s in streams if all(k in s['keywords'] for k in {'scotland', 'covid_deaths', 'age_group', 'gender'})]
    assert len(subset) == 15
    assert len([s for s in subset if 'male' in s['keywords']]) == 7
    assert len([s for s in subset if 'female' in s['keywords']]) == 7
    assert len([s for s in subset if '1_14_years' in s['keywords']]) == 2

## Create streams

In [6]:
with open('../manifest/manifest.json') as f:
    manifest = json.load(f)

In [7]:
streams = generate_streams(manifest)
test_endpoints(streams)
test_streams(streams)

## Register

In [42]:
def get_token():
    token = None
    try:
        res = requests.post('http://localhost:2000/api/v1/auth/login', {'password': "zCEEwRSZGaSG2uL2", 'email': "phong@admin.com"})
        if res and res.json() and res.json()['token']:
            token = res.json()['token']

    except ConnectionError as e:
        print("token request: error = ", e)

    except Exception as e:
        print("Something went wrong", e)

    else:
        return token

def register(data, token):
    url = 'http://localhost:2000/api/v1/ontology/data'
    headers = {'Authorization': 'Bearer ' + token}
    try:
        response = requests.post(url, data, headers=headers)
        print(response.json())
    except Exception as e:
        print(e)

In [43]:
token = get_token()

### Register agegroup/gender first

In [63]:
for s in streams:
    s['keywords'] = s['keywords'] + ['phong']

In [64]:
age_gen_streams = [s for s in streams if all(k in s['keywords'] for k in {'scotland', 'covid_deaths', 'age_group', 'gender'})]

In [None]:
for s in age_gen_streams:
    register(s, token)

### Mock England agegroup/gender

In [None]:
s = json.dumps(age_gen_streams)
wales_streams = json.loads(s.replace('scotland', 'wales'))
for s in wales_streams:
    register(s, token)

## ONS

In [44]:
with open('../manifest/ons-manifest.json') as f:
    ons_manifest = json.load(f)
ons_streams = generate_streams(ons_manifest)
for s in ons_streams:
    s['keywords'] = s['keywords'] + ['phong']

In [45]:
test_endpoints(ons_streams)

In [None]:
for s in ons_streams:
    register(s, token)