<a href="https://colab.research.google.com/github/Fuenfgeld/TeamDataScDatenmanagementUndArchivierung/blob/main/AbelHodelinHernandezWoche2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from functools import reduce
import sqlite3
from sqlite3 import Error
import csv

from google.colab import drive
drive.mount("/content/drive", force_remount=True)


Mounted at /content/drive


In [2]:
# list elements in directory
!ls /content/drive/MyDrive/csv_files/allergy/
material_path = '/content/drive/MyDrive/csv_files/allergy/'
patient_type = "allergy"

 allergy_test.db	 encounters.gsheet	    patients.csv
'careplans (1).gsheet'	 imaging_studies.csv	    payers.csv
 careplans.csv		 imaging_studies.gsheet     payer_transitions.csv
 careplans.gsheet	 immunizations.csv	    procedures.csv
 conditions.csv		 medications.csv	    procedures.gsheet
 devices.csv		'observations (1).gsheet'   providers.csv
 disease.csv		 observations.csv	    source_allergy_test.db
 disease.gsheet		 observations.gsheet	    supplies.csv
 encounters.csv		 organizations.csv


In [3]:
# source database path
DB_FILE_PATH = f"{material_path}{patient_type}_test.db"
# folder contains csv files
csv_path = f"{material_path}"
# print(csv_path)

# DB_FILE_PATH, csv_path

In [4]:
# dictionary key: name of table, values: sql query to create table
sql_table = {}
sql_index = {}

In [5]:
sql_table["disease"] = """CREATE TABLE IF NOT EXISTS disease (
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id)
                           
                       );"""
# kein Index für id, da die db erzeugt ein autematisch für die Primary Keys
# Die andere Indizes sind je nach praktischer Nutzung/Interese/Projekt
sql_index["disease_patient"] = """CREATE INDEX ix_disease_patient on disease(PATIENT);"""
sql_index["disease_encounter"] = """CREATE INDEX ix_disease_encounter on disease(ENCOUNTER);"""
sql_index["disease_code"] = """CREATE INDEX ix_disease_code on disease(CODE);"""
sql_index["disease_description"] = """CREATE INDEX ix_disease_description on disease(DESCRIPTION);"""

In [6]:
sql_table["patients"] = """CREATE TABLE IF NOT EXISTS patients (
                           Id STRING PRIMARY KEY,
                           BIRTHDATE DATE,
                           DEATHDATE DATE,
                           SSN STRING,
                           DRIVERS STRING,
                           PASSPORT STRING,
                           PREFIX STRING,
                           FIRST STRING,
                           LAST STRING,
                           SUFFIX STRING,
                           MAIDEN STRING,
                           MARITAL STRING,
                           RACE STRING,
                           ETHNICITY STRING,
                           GENDER STRING,
                           BIRTHPLACE STRING,
                           ADDRESS STRING,
                           CITY STRING,
                           STATE STRING,
                           COUNTRY STRING,
                           ZIP STRING,
                           LAT INTEGER,
                           LON INTEGER,
                           HEALTHCARE_EXPENSES INTEGER,
                           HEALTHCARE_COVERAGE INTEGER
                       );"""
# kein Index für id, da die db erzeugt ein autematisch für die Primary Keys
# Die andere Indizes sind je nach praktischer Nutzung/Interese/Projekt
sql_index["patients_race"] = """CREATE INDEX ix_patients_race on patients(RACE);""" 
sql_index["patients_ethnicity"] = """CREATE INDEX ix_patients_ethnicity on patients(ETHNICITY);"""
sql_index["patients_city"] = """CREATE INDEX ixpatients_city on patients(CITY);"""
sql_index["patients_gender"] = """CREATE INDEX ix_patients_gender on patients(GENDER);"""
sql_index["patients_country"] = """CREATE INDEX ix_patients_country on patients(COUNTRY);"""
sql_index["patients_zip"] = """CREATE INDEX ix_patients_zip on patients(ZIP);"""

In [7]:
sql_table["encounters"] = """CREATE TABLE IF NOT EXISTS encounters (
                           Id STRING PRIMARY KEY,
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ORGANIZATIONS STRING,
                           PROVIDER STRING,
                           PAYER STRING,
                           ENCOUNTERCLASS STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           BASE_ENCOUNTER_COST INTEGER,
                           TOTAL_CLAIM_COST INTEGER,
                           PAYER_COVERAGE INTEGER,
                           REASONCODE STRING,
                           REASONDESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                       );"""
sql_index["encounters_patient"] = """CREATE INDEX ix_encounters_patients on encounters(PATIENT);""" 
sql_index["encounters_description"] = """CREATE INDEX ix_encounters_description on encounters(DESCRIPTION);"""
sql_index["encounters_code"] = """CREATE INDEX ix_encounters_code on encounters(CODE);"""

In [8]:
sql_table["careplans"] = """CREATE TABLE IF NOT EXISTS careplans (
                           Id STRING PRIMARY KEY,
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           REASONCODE STRING,
                           REASONDESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       );"""
sql_index["careplans_patient"] = """CREATE INDEX ix_careplans_patients on careplans(PATIENT);"""
sql_index["careplans_encounters"] = """CREATE INDEX ix_careplans_encounters on careplans(ENCOUNTER);"""
sql_index["careplans_description"] = """CREATE INDEX ix_careplans_description on careplans(DESCRIPTION);"""
sql_index["careplans_code"] = """CREATE INDEX ix_careplans_code on careplans(CODE);"""
sql_index["careplans_reasondescription"] = """CREATE INDEX ix_careplans_reasondescription on careplans(REASONDESCRIPTION);"""
sql_index["careplans_reasoncode"] = """CREATE INDEX ix_careplans_reasoncode on careplans(REASONCODE);"""

In [9]:
sql_table["conditions"] = """CREATE TABLE IF NOT EXISTS conditions (
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       );"""
sql_index["conditions_patient"] = """CREATE INDEX ix_conditions_patient on conditions(PATIENT);"""
sql_index["conditions_encounter"] = """CREATE INDEX ix_conditions_encounter on conditions(ENCOUNTER);"""
sql_index["conditions_code"] = """CREATE INDEX ix_conditions_code on conditions(CODE);"""
sql_index["conditions_description"] = """CREATE INDEX ix_conditions_description on conditions(DESCRIPTION);"""

In [10]:
sql_table["procedures"]= """CREATE TABLE IF NOT EXISTS procedures (
                           DATE DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           BASE_COST INTEGER,
                           REASONCODE STRING,
                           REASONDESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       );"""
sql_index["procedures_patient"] = """CREATE INDEX ix_procedures_patient on procedures(PATIENT);"""
sql_index["procedures_encounter"] = """CREATE INDEX ix_procedures_encounter on procedures(ENCOUNTER);"""
sql_index["procedures_code"] = """CREATE INDEX ix_procedures_code on procedures(CODE);"""
sql_index["procedures_description"] = """CREATE INDEX ix_procedures_description on procedures(DESCRIPTION);"""

In [11]:
sql_table["observations"]= """CREATE TABLE IF NOT EXISTS observations (
                           DATE DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           VALUE STRING,
                           UNITS STRING,
                           TYPE STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       );"""
sql_index["observations_patient"] = """CREATE INDEX ix_observations_patient on observations(PATIENT);"""
sql_index["observations_encounter"] = """CREATE INDEX ix_observations_encounter on observations(ENCOUNTER);"""
sql_index["observations_code"] = """CREATE INDEX ix_observations_code on observations(CODE);"""
sql_index["observations_description"] = """CREATE INDEX ix_observations_description on observations(DESCRIPTION);"""

In [12]:
sql_table["devices"] = """CREATE TABLE IF NOT EXISTS devices (
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           UDI STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id)
                           
                       );"""
sql_index["devices_patient"] = """CREATE INDEX ix_devices_patient on devices(PATIENT);"""
sql_index["devices_encounter"] = """CREATE INDEX ix_devices_encounter on devices(ENCOUNTER);"""
sql_index["devices_code"] = """CREATE INDEX ix_devices_code on devices(CODE);"""
sql_index["devices_description"] = """CREATE INDEX ix_devices_description on devices(DESCRIPTION);"""

In [13]:
sql_table["imaging_studies"] = """CREATE TABLE IF NOT EXISTS imaging_studies (
                           Id STRING PRIMARY KEY,
                           DATE DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           BODYSITE_CODE STRING,
                           BODYSITE_DESCRIPTION STRING,
                           MODALITY_CODE STRING,
                           MODALITY_DESCRIPTION STRING,
                           SOP_CODE STRING,
                           SOP_DESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       );"""
sql_index["imaging_studies_patient"] = """CREATE INDEX ix_imaging_studies_patient on imaging_studies(PATIENT);"""
sql_index["imaging_studies_encounter"] = """CREATE INDEX ix_imaging_studies_encounter on imaging_studies(ENCOUNTER);"""
sql_index["imaging_studies_code"] = """CREATE INDEX ix_imaging_studies_code on imaging_studies(SOP_CODE);"""
sql_index["imaging_studies_description"] = """CREATE INDEX ix_imaging_studies_description on imaging_studies(SOP_DESCRIPTION);"""

In [14]:
sql_table["immunizations"]= """CREATE TABLE IF NOT EXISTS immunizations(
                           DATE DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           BASE_COST INTEGER,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       );"""
sql_index["immunizations_patient"] = """CREATE INDEX ix_immunizations_patient on immunizations(PATIENT);"""
sql_index["immunizations_encounter"] = """CREATE INDEX ix_immunizations_encounter on immunizations(ENCOUNTER);"""
sql_index["immunizations_code"] = """CREATE INDEX ix_immunizations_code on immunizations(CODE);"""
sql_index["immunizations_description"] = """CREATE INDEX ix_immunizations_description on immunizations(DESCRIPTION);"""

# Import csv files into database tables

In [15]:
def connect_to_db(db_file):
    sqlite3_conn = None
    try:
        sqlite3_conn = sqlite3.connect(db_file)
        return sqlite3_conn

    except Error as err:
        print(err)

        if sqlite3_conn is not None:
            sqlite3_conn.close()


In [16]:
def insert_values_to_table(cursor, table_name, csv_file_path):
    # Read CSV file content
    values_to_insert = open_csv_file(csv_file_path)

    # Insert to table
    if len(values_to_insert) > 0:
        column_names, column_numbers = get_column_names_from_db_table(cursor, table_name)

        values_str = '?,' * column_numbers
        # print("*****", values_str, column_names, column_numbers)
        values_str = values_str[:-1]

        sql_query = 'INSERT OR REPLACE INTO ' + table_name + '(' + column_names + ') VALUES (' + values_str + ')'


        cursor.executemany(sql_query, values_to_insert)
        # conn.commit()

        print('SQL insert process finished')
    else:
        print('Nothing to insert')

        # conn.close()

    # else:
        # print('Connection to database failed')

In [17]:
def open_csv_file(csv_file_path):

    with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
        reader = csv.reader(csv_file)
        next(reader)

        data = list()
        for row in reader:
            data.append(row)
        return data

In [18]:
def get_column_names_from_db_table(sql_cursor, table_name):
    table_column_names = 'PRAGMA table_info(' + table_name + ');'
    sql_cursor.execute(table_column_names)
    table_column_names = sql_cursor.fetchall()

    column_count = len(table_column_names)

    column_names = list()

    for name in table_column_names:
        column_names.append(name[1])

    return ', '.join(column_names), column_count

In [19]:
print(sql_table.keys())
print(sql_index.keys())

dict_keys(['disease', 'patients', 'encounters', 'careplans', 'conditions', 'procedures', 'observations', 'devices', 'imaging_studies', 'immunizations'])
dict_keys(['disease_patient', 'disease_encounter', 'disease_code', 'disease_description', 'patients_race', 'patients_ethnicity', 'patients_city', 'patients_gender', 'patients_country', 'patients_zip', 'encounters_patient', 'encounters_description', 'encounters_code', 'careplans_patient', 'careplans_encounters', 'careplans_description', 'careplans_code', 'careplans_reasondescription', 'careplans_reasoncode', 'conditions_patient', 'conditions_encounter', 'conditions_code', 'conditions_description', 'procedures_patient', 'procedures_encounter', 'procedures_code', 'procedures_description', 'observations_patient', 'observations_encounter', 'observations_code', 'observations_description', 'devices_patient', 'devices_encounter', 'devices_code', 'devices_description', 'imaging_studies_patient', 'imaging_studies_encounter', 'imaging_studies_cod

In [20]:
# import csv into db
conn = connect_to_db(DB_FILE_PATH)
if conn is not None:
        cursor = conn.cursor()
        for name in sql_table.keys():
          csv_file = csv_path + name + ".csv"
          print(name, csv_file)
          
          cursor.execute(sql_table[name])

          insert_values_to_table(cursor, name, csv_file)
        # conn.commit()

        #for ix_name in sql_index.keys():
         # cursor.execute(sql_index[ix_name])
else:
        print('Connection to database failed')

disease /content/drive/MyDrive/csv_files/allergy/disease.csv
SQL insert process finished
patients /content/drive/MyDrive/csv_files/allergy/patients.csv
SQL insert process finished
encounters /content/drive/MyDrive/csv_files/allergy/encounters.csv
SQL insert process finished
careplans /content/drive/MyDrive/csv_files/allergy/careplans.csv
SQL insert process finished
conditions /content/drive/MyDrive/csv_files/allergy/conditions.csv
SQL insert process finished
procedures /content/drive/MyDrive/csv_files/allergy/procedures.csv
SQL insert process finished
observations /content/drive/MyDrive/csv_files/allergy/observations.csv
SQL insert process finished
devices /content/drive/MyDrive/csv_files/allergy/devices.csv
SQL insert process finished
imaging_studies /content/drive/MyDrive/csv_files/allergy/imaging_studies.csv
SQL insert process finished
immunizations /content/drive/MyDrive/csv_files/allergy/immunizations.csv
SQL insert process finished


In [21]:
pd.read_csv(f"{csv_path}disease.csv").head()

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION
0,1943-02-26,,3575b903-dbd0-1d55-6146-9e8aa4ed52a5,de8159bf-90ae-c9a6-dd8d-f1de7b7972a7,419474003,Allergy to mould
1,1943-02-26,,3575b903-dbd0-1d55-6146-9e8aa4ed52a5,de8159bf-90ae-c9a6-dd8d-f1de7b7972a7,232350006,House dust mite allergy
2,1943-02-26,,3575b903-dbd0-1d55-6146-9e8aa4ed52a5,de8159bf-90ae-c9a6-dd8d-f1de7b7972a7,232347008,Dander (animal) allergy
3,1943-02-26,,3575b903-dbd0-1d55-6146-9e8aa4ed52a5,de8159bf-90ae-c9a6-dd8d-f1de7b7972a7,418689008,Allergy to grass pollen
4,1943-02-26,,3575b903-dbd0-1d55-6146-9e8aa4ed52a5,de8159bf-90ae-c9a6-dd8d-f1de7b7972a7,419263009,Allergy to tree pollen


In [22]:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('disease',), ('patients',), ('encounters',), ('careplans',), ('conditions',), ('procedures',), ('observations',), ('devices',), ('imaging_studies',), ('immunizations',)]


In [23]:
cursor.execute("SELECT * FROM immunizations limit 10;")
rows = cursor.fetchall()
for row in rows:
  print(row)

('2011-04-11T11:40:19Z', '3575b903-dbd0-1d55-6146-9e8aa4ed52a5', '905865c3-d0fd-aa37-b2dd-8f297e43a089', 140, 'Influenza  seasonal  injectable  preservative free', 140.52)
('2012-04-16T11:40:19Z', '3575b903-dbd0-1d55-6146-9e8aa4ed52a5', 'a30d46d7-823d-f721-60d5-58c3f9b4e519', 140, 'Influenza  seasonal  injectable  preservative free', 140.52)
('2013-04-22T11:40:19Z', '3575b903-dbd0-1d55-6146-9e8aa4ed52a5', 'd37cc838-ee0b-2e4d-8c96-db68d01e0b29', 140, 'Influenza  seasonal  injectable  preservative free', 140.52)
('2013-04-22T11:40:19Z', '3575b903-dbd0-1d55-6146-9e8aa4ed52a5', 'd37cc838-ee0b-2e4d-8c96-db68d01e0b29', 113, 'Td (adult) preservative free', 140.52)
('2014-04-28T11:40:19Z', '3575b903-dbd0-1d55-6146-9e8aa4ed52a5', 'e7defffa-fc95-542e-03a3-485e20469db4', 140, 'Influenza  seasonal  injectable  preservative free', 140.52)
('2015-05-04T11:40:19Z', '3575b903-dbd0-1d55-6146-9e8aa4ed52a5', 'c57b93a2-0302-4965-9e2c-713b84e7c24b', 140, 'Influenza  seasonal  injectable  preservative free'

In [24]:
# schow created indices
cursor.execute("select name, tbl_name FROM sqlite_master WHERE type='index' and name like 'ix%'")
for row in cursor.fetchall():
  print(row)

('ix_disease_patient', 'disease')
('ix_disease_encounter', 'disease')
('ix_disease_code', 'disease')
('ix_disease_description', 'disease')
('ix_patients_race', 'patients')
('ix_patients_ethnicity', 'patients')
('ixpatients_city', 'patients')
('ix_patients_gender', 'patients')
('ix_patients_country', 'patients')
('ix_patients_zip', 'patients')
('ix_encounters_patients', 'encounters')
('ix_encounters_description', 'encounters')
('ix_encounters_code', 'encounters')
('ix_careplans_patients', 'careplans')
('ix_careplans_encounters', 'careplans')
('ix_careplans_description', 'careplans')
('ix_careplans_code', 'careplans')
('ix_careplans_reasondescription', 'careplans')
('ix_careplans_reasoncode', 'careplans')
('ix_conditions_patient', 'conditions')
('ix_conditions_encounter', 'conditions')
('ix_conditions_code', 'conditions')
('ix_conditions_description', 'conditions')
('ix_procedures_patient', 'procedures')
('ix_procedures_encounter', 'procedures')
('ix_procedures_code', 'procedures')
('ix_

In [25]:
conn.commit()
conn.close()