# Analysis

## Setup

Zuerst laden wir die benötigten Daten herunter und initialisieren die genutzten Python Objekte.

In [1]:
tables = [
    "careplans",
    "conditions",
    "observations",
    "patients",
]

files = [
    "data/others/",
    "data/asthma/",
    "data/gallstones/",
    "data/hypertension/",
]

In [2]:
!mkdir -p data/allergy

In [2]:
from urllib.request import urlopen
import os

def ensure_file_has_been_downloaded(filename):
    full_filename = "../" + filename

    url = "https://raw.githubusercontent.com/Fuenfgeld/DMA2022DataProjectC/main/" + filename
    if os.path.isfile(full_filename):
        print("File {} already exists, skipping download".format(filename))
    else:
        print("Downloading {}".format(filename))
        download_file(url, full_filename)

def download_file(url, filename):
    with open(filename, 'wb') as out_file:
        with urlopen(url) as file:
            out_file.write(file.read())

if not os.path.isfile("extract.py"):
    download_file(
        "https://raw.githubusercontent.com/Fuenfgeld/DMA2022DataProjectC/main/src/extract.py",
        "extract.py"
    )

for file in files:
    for table in tables:
        ensure_file_has_been_downloaded(file+table+".csv")

File data/others/careplans.csv already exists, skipping download
File data/others/conditions.csv already exists, skipping download
File data/others/observations.csv already exists, skipping download
File data/others/patients.csv already exists, skipping download
File data/asthma/careplans.csv already exists, skipping download
File data/asthma/conditions.csv already exists, skipping download
File data/asthma/observations.csv already exists, skipping download
File data/asthma/patients.csv already exists, skipping download
File data/gallstones/careplans.csv already exists, skipping download
File data/gallstones/conditions.csv already exists, skipping download
File data/gallstones/observations.csv already exists, skipping download
File data/gallstones/patients.csv already exists, skipping download
File data/hypertension/careplans.csv already exists, skipping download
File data/hypertension/conditions.csv already exists, skipping download
File data/hypertension/observations.csv already exis

In [3]:
from logger import Logger
from test_executer import TestExecutor

logger = Logger()
testExecutor = TestExecutor(logger)

## Mit Datenbank verbinden

In [4]:
import extract
import time

databaseFile = "data.sqlite"

logger.startTimeMeasurement('open-db', 'Connected to db and created tables')
connection = extract.connect_to_db(logger, databaseFile)
logger.endTimeMeasurement('open-db')

In [5]:
def test_sqliteConnection(_logger):
    cursor = connection.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
    tablesInDb = list(map(lambda tableResult: tableResult[0], cursor.fetchall()))
    tablesInDb.sort()

    for table in tables:
        if not(table in tablesInDb):
            raise Exception('Table not found:', table)

testExecutor.execute('Test connection to database', test_sqliteConnection)

{"type": "info", "time": 1656009903598, "message": "✅ Test ran successfully: Test connection to database", "params": null}


## Daten in Datenbank laden

In [6]:
logger.startTimeMeasurement('load-data', 'Loading data into db')
for file in files:
    for table in tables:
        extract.insert_values_to_table(logger, connection.cursor(), table, "../"+file+ table + ".csv")
        connection.commit()
    logger.endTimeMeasurement('load-data')

{"type": "info", "time": 1656009905704, "message": "🏗 Extracting data from ../data/others/careplans.csv", "params": null}
{"type": "info", "time": 1656009905731, "message": "🏗 Extracting data from ../data/others/conditions.csv", "params": null}
{"type": "info", "time": 1656009905847, "message": "🏗 Extracting data from ../data/others/observations.csv", "params": null}
{"type": "info", "time": 1656009909273, "message": "🏗 Extracting data from ../data/others/patients.csv", "params": null}
{"type": "info", "time": 1656009909290, "message": "🏗 Extracting data from ../data/asthma/careplans.csv", "params": null}
{"type": "info", "time": 1656009909305, "message": "🏗 Extracting data from ../data/asthma/conditions.csv", "params": null}
{"type": "info", "time": 1656009909501, "message": "🏗 Extracting data from ../data/asthma/observations.csv", "params": null}
{"type": "info", "time": 1656009911434, "message": "🏗 Extracting data from ../data/asthma/patients.csv", "params": null}
{"type": "info", "

## Aufräumen & Logs speichern

In [7]:
connection.close()
logger.logTimings()
logger.writeToFile("../artefacts-for-release/analysis-log.json")

{"type": "info", "time": 1656009916010, "message": "⏳ Connected to db and created tables in 24ms", "params": {"timingInMilliseconds": 24}}
{"type": "info", "time": 1656009916010, "message": "⏳ Loading data into db in 8233ms", "params": {"timingInMilliseconds": 8233}}
