# Data Streams

In [2]:
from pathlib import Path
import json
import pandas as pd
import numpy as np
import shutil
import re
import requests
import h5py

import sys
import os

module_path = os.path.abspath(os.pardir)
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
from app.utils.naming import component_to_csv_file, format_component_name

In [5]:
with open('../manifest/manifest.json') as f:
    manifest = json.load(f)

In [8]:
def create_stream(p, c, col=None):
    stream = {
        'urlCode': 'API_PY',
        'endpoint': f'/stat/v1/data?product={p["product"]}&component={c["name"]}',
        'dataType': c['dataType'],
        'keywords': c['keywords'],
        'description': ''
    }
    if col:
        stream['endpoint'] += f'&field={col}'
    return stream

In [11]:
def generate_streams(manifest, folder='../../data/live/'):
    folder = Path(folder)
    streams = []
    for p in manifest:
        for c in p['components']:
            # Register the component
            filepath = component_to_csv_file(folder, p['product'], c['name'])
            df = pd.read_csv(filepath, index_col=0)
            streams.append(create_stream(p, c))
            
            # Each field in a component csv file should be registered separately as individual data streams
            if len(df.columns) > 1:
                for col in df.columns:
                    stream = create_stream(p, c, col)
                    col = format_component_name(col)
                    # female___1-14 years: 2 separate keywords
                    extra_keywords = col.split('___') if '___' in col else [col]
                    stream['keywords'] = stream['keywords'] + extra_keywords
                    streams.append(stream)
    return streams

streams = generate_streams(manifest)

In [31]:
def test_streams(streams, base_url='http://localhost:3000'):
    # Can the endpoints be accessed?
    for s in streams:
        response = requests.get(base_url + s['endpoint'])
        assert len(response.json())
        
    # There should be 15 age_group/gender/scotland stream, 1 overall and 14 age_group x gender for covid_deaths
    subset = [s for s in streams if all(k in s['keywords'] for k in {'scotland', 'covid_deaths', 'age_group', 'gender'})]
    assert len(subset) == 15
    assert len([s for s in subset if 'male' in s['keywords']]) == 7
    assert len([s for s in subset if 'female' in s['keywords']]) == 7
    assert len([s for s in subset if '1_14_years' in s['keywords']]) == 2
        
test_streams(streams)

## Register

In [38]:
def get_token():
    token = None
    try:
        res = requests.post('http://localhost:2000/api/v1/auth/login', {'password': "zCEEwRSZGaSG2uL2", 'email': "phong@admin.com"})
        if res and res.json() and res.json()['token']:
            token = res.json()['token']

    except ConnectionError as e:
        print("token request: error = ", e)

    except Exception as e:
        print("Something went wrong", e)

    else:
        return token

token = get_token()

In [56]:
data_to_register = {"urlCode":"API_PY","endpoint":"e3","dataType":"timeseries","description":"abc","keywords":["xl"]}

In [49]:
token

'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjYwMzZkMDJkYzg2ZTkzZGZmY2Y0NWE5YSIsInJvbGUiOiJhZG1pbiIsInBlcm1pc3Npb25zIjp7InVzZXIiOnsicmVhZDpvd24iOlsiaWQiXSwidXBkYXRlOmFueSI6WyJwYXNzd29yZCIsInJvbGUiXSwicmVhZDphbnkiOlsiKiJdLCJjcmVhdGU6YW55IjpbIioiXSwiZGVsZXRlOmFueSI6WyIqIl19fSwiaWF0IjoxNjE0ODk0NjE5LCJleHAiOjI1NjE1Nzk0MTl9.M-B5vktEnevmgTOH5IDP8mZoYWV4Lw0Qh99KtQxIMPqk3y6Sctnn1C20e83_n5hTkLev7XxkUztDuSBB9wXJoAV8zImZWNO-x9y6Hf0cRx9nqG8BUklDdz2y5n4IVwfOsuAKToaXOs6Hy3h8mkjLA48Bu_BHIGeQj1-feESwOQE1jWT1C4ZxqCOmtanOQrth4iiYN5MsmO8ujlIoRDRB5REHfoNzndpE4xtD05-vyA3rdNhgRrtkT8cc2hYfaEkw1Z6MH9bDAPW8iIbhGqKOn2SwFQiU4i9W9JlERykeQv2sSrRdt7laXe8zUTorot35DOnwZllWjyf1F95aZHc67EHqdfeIEgTFbh8SEWR-XZj841UUVh29uGWtoXqk87TmfrKkC6l_qa7TgPzRRDMB6lwzWoZxsi7DvM4MbWEo_38ThpuxT-yQp1e-_h5e8jFPeR3Lgt_BSlZxt_i0cj1Ozv9xHEFQkRPu8XhT4h9DnxzVHVP6wcXGaQuEGrrKahplAwpmMqcCu-PZqTdWanOyBZqnLpoXc4Me6dnz_hJDbViFizfkIhkX6dr7SyCvTNTQ1kldPdA2zCuf0VP3FlJC7GDtRoLlY821kW8jCE91_zWck4sE5toyx6DEAZ5grPZteYaowSasllqyU_bMFnHq-3St5HswzxNZKac6c

In [58]:
def register(data, token):
    url = 'http://localhost:2000/api/v1/ontology/data'
    headers = {'Authorization': 'Bearer ' + token}
    try:
        response = requests.post(url, data, headers=headers)
        print(response.json())
    except Exception as e:
        print(e)

register(data_to_register, token)

{'message': 'keywords must be an array', 'status': 400, 'code': 'INVALID_QUERY_PARAMETERS'}
