# Analysis

## Setup

Zuerst laden wir die benötigten Daten herunter und initialisieren die genutzten Python Objekte.

In [None]:
tables = [
    "careplans",
    "conditions",
    "devices",
    "encounters",
    "imaging_studies",
    "immunizations",
    "medications",
    "observations",
    "patients",
    "procedures",
]

In [None]:
!mkdir -p data/allergy

In [None]:
from urllib.request import urlopen
import os

files = [
    "data/allergy/careplans.csv",
    "data/allergy/conditions.csv",
    "data/allergy/devices.csv",
    "data/allergy/disease.csv",
    "data/allergy/encounters.csv",
    "data/allergy/imaging_studies.csv",
    "data/allergy/immunizations.csv",
    "data/allergy/medications.csv",
    "data/allergy/observations.csv",
    "data/allergy/patients.csv",
    "data/allergy/payers.csv",
    "data/allergy/procedures.csv",
    "data/allergy/providers.csv",
    "data/allergy/supplies.csv",
]

def ensure_file_has_been_downloaded(filename):
    full_filename = "../" + filename

    url = "https://raw.githubusercontent.com/Fuenfgeld/DMA2022DataProjectC/main/" + filename
    if os.path.isfile(full_filename):
        print("File {} already exists, skipping download".format(filename))
    else:
        print("Downloading {}".format(filename))

    download_file(url, full_filename)

def download_file(url, filename):
    with open(filename, 'wb') as out_file:
        with urlopen(url) as file:
            out_file.write(file.read())

if not os.path.isfile("extract.py"):
    download_file(
        "https://raw.githubusercontent.com/Fuenfgeld/DMA2022DataProjectC/main/src/extract.py",
        "extract.py"
    )

for file in files:
    ensure_file_has_been_downloaded(file)

In [None]:
import json
import time

def timestampInMilliseconds():
    return int(round(time.time() * 1000))

class Logger:
    timings = []
    logs = []
    print = lambda _self, message: print(message)

    def startTimeMeasurement(self, identifier, description):
        self.timings.append({
            "identifier": identifier,
            "description": description,
            "start": timestampInMilliseconds(),
            "end": None,
        })
    
    def endTimeMeasurement(self, identifier):
        for index in range(len(self.timings)):
            if self.timings[index]["identifier"] == identifier:
                self.timings[index]["end"] = timestampInMilliseconds()
                break

    def log(self, message, params = None, type = "info"):
        jsonMessage = json.dumps(
            {"type": type, "time": timestampInMilliseconds(), "message": message, "params": params},
            ensure_ascii=False
        )
        self.print(jsonMessage)
        self.logs.append(jsonMessage)

    def logError(self, message, params = None):
        self.log(message, params, "error")

    def logTimings(self):
        for timing in self.timings:
            usedTime = timing['end'] - timing['start']
            message = f"⏳ {timing['description']} in {usedTime}ms"
            self.logWithTiming(message, usedTime)

    def logWithTiming(self, message, timingInMilliseconds):
        self.log(message, {"timingInMilliseconds": timingInMilliseconds})

    def writeToFile(self, fileName):
        with open(fileName, 'w', encoding='utf-8') as fileHandle:
            messages = ",\n".join(self.logs)
            fileHandle.write(f"[\n{messages}\n]")


In [None]:
logger = Logger()

In [None]:
class TestExecutor:
    testLogger = None
    resultLogger = None

    def __init__(self, logger):
        # The result logger is used to log whether or not a test was successful.
        self.resultLogger = logger

        # The test logger is handed to the unit tests so that they do not log any output.
        # We therefore override the log method so that it does not print anything.
        self.testLogger = Logger()
        self.testLogger.log = lambda _self, _message: None

    def execute(self, description, test):
        try:
            test(self.testLogger)

            self.resultLogger.log(f"✅ Test ran successfully: {description}")
        except Exception as exception:
            self.resultLogger.log(f"❌ Test failed: {description}", exception.args)

In [None]:
testExecutor = TestExecutor(logger)

## Mit Datenbank verbinden

In [None]:
import extract
import time

databaseFile = "allergy.sqlite"

logger.startTimeMeasurement('open-db', 'Connected to db and created tables')
connection = extract.connect_to_db(logger, databaseFile)
logger.endTimeMeasurement('open-db')

In [None]:
def test_sqliteConnection(_logger):
    cursor = connection.cursor()
    cursor.execute("SELECT name FROM sqlite_schema WHERE type='table'")
    tablesInDb = list(map(lambda tableResult: tableResult[0], cursor.fetchall()))
    tablesInDb.sort()

    for table in tables:
        if not(table in tablesInDb):
            raise Exception('Table not found')

testExecutor.execute('Test connection to database', test_sqliteConnection)

## Daten in Datenbank laden

In [None]:
logger.startTimeMeasurement('load-data', 'Loading data into db')
for table in tables:
    extract.insert_values_to_table(logger, connection.cursor(), table, "../data/allergy/" + table + ".csv")
    connection.commit()
logger.endTimeMeasurement('load-data')

## Aufräumen & Logs speichern

In [None]:
connection.close()
logger.logTimings()
logger.writeToFile("../artefacts-for-release/analysis-log.json")