In [1]:
from pathlib import Path
import json
import os
from tinydb import TinyDB
import pandas as pd

current_dir = Path(os.getcwd()).absolute()
results_dir = current_dir.joinpath('results')
kv_data_dir = results_dir.joinpath('kvdb')
kv_data_dir.mkdir(parents=True, exist_ok=True)

    
class DocumentDB(object):
    def __init__(self, db_path):
        people_json = kv_data_dir.joinpath('people.json')
        visited_json = kv_data_dir.joinpath('visited.json')
        sites_json = kv_data_dir.joinpath('sites.json')
        measurements_json = kv_data_dir.joinpath('measurements.json')
        
        self._db_path = Path(db_path)
        self._db = None
        
        # Loading data
        self._measurements_lookup = json.load(open(measurements_json))
        self._visit_lookup = json.load(open(visited_json))
        self._site_lookup = json.load(open(sites_json))
        self._person_lookup = json.load(open(people_json))
        
        self._load_db()
    
    
    def _load_db(self):
        self._db = TinyDB(self._db_path, indent=4)
        persons = self._person_lookup.items()
  
        # Iterate over each person in the persons list and pull their measurement info
        for person_id, record in persons:
            measurements = []
            for key, values in self._measurements_lookup.items():
                if values['person_id'] == person_id:
                    measurements.append(values)
            
            # Iterate over the visit locations using the measurement info visit_id   
            # Use set due to multiple measurements per location
            visit_ids = set([measurement['visit_id'] for measurement in measurements])
            visits = []
            for visit_id in visit_ids:
                for key, value in self._visit_lookup.items():
                    key = key.split(":")[0]
                    if str(visit_id) == str(key):
                        visit = value
                        site_id = str(visit['site_id'])
                        site = self._site_lookup[site_id]
                        visit['site'] = site
        
                visit['measurements'] = [measurement for measurement in measurements if str(visit_id) == str(measurement['visit_id'])]
                visits.append(visit)
        
        
            # Finally, assign each visit to each record
            record['visits'] = visits
            self._db.insert(record)
        self._db.close()

In [2]:
db_path = results_dir.joinpath('patient-info.json')
if db_path.exists():
    os.remove(db_path)

db = DocumentDB(db_path)

In [3]:
# Walk through for group discussion

# Load all the data into memory
people_json = kv_data_dir.joinpath('people.json')
visited_json = kv_data_dir.joinpath('visited.json')
sites_json = kv_data_dir.joinpath('sites.json')
measurements_json = kv_data_dir.joinpath('measurements.json')

measurements_lookup = json.load(open(measurements_json))
visit_lookup = json.load(open(visited_json))
site_lookup = json.load(open(sites_json))
person_lookup = json.load(open(people_json))
        
# Get all the entries in persons
persons = person_lookup.items()
  
# Iterate over each person in the persons list and pull their measurement info
for person_id, record in persons:
    measurements = []
    for key, values in measurements_lookup.items():
        if values['person_id'] == person_id:
            measurements.append(values)
            
    # Iterate over the visit locations using the measurement info visit_id   
    # Use set due to multiple measurements per location

    visit_ids = set([measurement['visit_id'] for measurement in measurements])
    visits = []
    for visit_id in visit_ids:
        for key, value in visit_lookup.items():
            key = key.split(":")[0]
            if str(visit_id) == str(key):
                visit = value
                site_id = str(visit['site_id'])
                site = site_lookup[site_id]
                visit['site'] = site
        visit['measurements'] = [measurement for measurement in measurements if str(visit_id) == str(measurement['visit_id'])]
        visits.append(visit)
        
        
    # Finally, assign each visit to each record
    record['visits'] = visits
    print(json.dumps(record, indent=4))

{
    "person_id": "danforth",
    "personal_name": "Frank",
    "family_name": "Danforth",
    "visits": []
}
{
    "person_id": "dyer",
    "personal_name": "William",
    "family_name": "Dyer",
    "visits": [
        {
            "visit_id": 619,
            "site_id": "DR-1",
            "visit_date": "1927-02-08",
            "site": {
                "site_id": "DR-1",
                "latitude": -49.85,
                "longitude": -128.57
            },
            "measurements": [
                {
                    "visit_id": 619,
                    "person_id": "dyer",
                    "quantity": "rad",
                    "reading": 9.82
                },
                {
                    "visit_id": 619,
                    "person_id": "dyer",
                    "quantity": "sal",
                    "reading": 0.13
                }
            ]
        },
        {
            "visit_id": 622,
            "site_id": "DR-1",
            "visit_date": "1