# Analysis

In [None]:
tables = [
    "careplans",
    "conditions",
    "devices",
    "encounters",
    "imaging_studies",
    "immunizations",
    "medications",
    "observations",
    "patients",
    "procedures",
]

In [None]:
!mkdir -p data/allergy

In [None]:
from urllib.request import urlopen
import os

files = [
    "data/allergy/careplans.csv",
    "data/allergy/conditions.csv",
    "data/allergy/devices.csv",
    "data/allergy/disease.csv",
    "data/allergy/encounters.csv",
    "data/allergy/imaging_studies.csv",
    "data/allergy/immunizations.csv",
    "data/allergy/medications.csv",
    "data/allergy/observations.csv",
    "data/allergy/patients.csv",
    "data/allergy/payers.csv",
    "data/allergy/procedures.csv",
    "data/allergy/providers.csv",
    "data/allergy/supplies.csv",
]

def ensure_file_has_been_downloaded(filename):
    full_filename = "../" + filename

    url = "https://raw.githubusercontent.com/Fuenfgeld/DMA2022DataProjectC/main/" + filename
    if os.path.isfile(full_filename):
        print("File {} already exists, skipping download".format(filename))
    else:
        print("Downloading {}".format(filename))

    download_file(url, full_filename)

def download_file(url, filename):
    with open(filename, 'wb') as out_file:
        with urlopen(url) as file:
            out_file.write(file.read())

if not os.path.isfile("extract.py"):
    download_file(
        "https://raw.githubusercontent.com/Fuenfgeld/DMA2022DataProjectC/main/src/extract.py",
        "extract.py"
    )

for file in files:
    ensure_file_has_been_downloaded(file)

In [None]:
import json
import time

def timestampInMilliseconds():
    return int(round(time.time() * 1000))

class Logger:
    timings = {}
    logs = []
    print = lambda _self, message: print(message)

    def addTiming(self, name):
        self.timings[name] = timestampInMilliseconds()

    def log(self, message, params = None, type = "info"):
        jsonMessage = json.dumps(
            {"type": type, "time": timestampInMilliseconds(), "message": message, "params": params},
            ensure_ascii=False
        )
        self.print(jsonMessage)
        self.logs.append(jsonMessage)

    def logError(self, message, params = None):
        self.log(message, params, "error")

    def logTimings(self):
        dbConnectionTime = self.timings["connected-to-db"] - self.timings["start"]
        self.logWithTiming(f"⏳ Connected to db in {dbConnectionTime}ms", dbConnectionTime)

        dataLoadingTime = self.timings["data-loaded"] - self.timings["connected-to-db"]
        self.logWithTiming(f"⏳ Data loaded in {dataLoadingTime}ms", dataLoadingTime)

    def logWithTiming(self, message, timingInMilliseconds):
        self.log(message, {"timingInMilliseconds": timingInMilliseconds})

    def writeToFile(self, fileName):
        with open(fileName, 'w', encoding='utf-8') as fileHandle:
            messages = ",\n".join(self.logs)
            fileHandle.write(f"[\n{messages}\n]")


In [None]:
import extract
import time

tables = [
    "careplans",
    "conditions",
    "devices",
    "disease",
    "encounters",
    "imaging_studies",
    "immunizations",
    "medications",
    "observations",
    "patients",
    "procedures",
]

logger = Logger()

logger.addTiming('start')
connection = extract.connect_to_db(logger, "allergy.sqlite")
logger.addTiming('connected-to-db')

for table in tables:
    extract.insert_values_to_table(logger, connection.cursor(), table, "../data/allergy/" + table + ".csv")
    connection.commit()
logger.addTiming('data-loaded')

In [None]:
connection.close()
logger.logTimings()
logger.writeToFile("../artefacts-for-release/analysis-log.json")