In [38]:
import sqlite3
import pandas as pd

In [39]:
#connect to DB
conn = sqlite3.connect('sqlite_example.db')
cursor = conn.cursor()
print("Opened database successfully");

Opened database successfully


In [40]:
#load CSVs
careplans = pd.read_csv('Daten/careplans.csv', sep=",")
conditions = pd.read_csv('Daten/conditions.csv', sep=",")
disease = pd.read_csv('Daten/disease.csv', sep=",")
immunizations = pd.read_csv('Daten/immunizations.csv', sep=",")
medications = pd.read_csv('Daten/medications.csv', sep=",")
observations = pd.read_csv('Daten/observations.csv', sep=",")
patients = pd.read_csv('Daten/patients.csv', sep=",")

In [41]:
#Delete Tables in case they exist
cursor.execute("DROP TABLE IF EXISTS careplans")
cursor.execute("DROP TABLE IF EXISTS conditions")
cursor.execute("DROP TABLE IF EXISTS disease")
cursor.execute("DROP TABLE IF EXISTS immunizations")
cursor.execute("DROP TABLE IF EXISTS medications")
cursor.execute("DROP TABLE IF EXISTS observations")
cursor.execute("DROP TABLE IF EXISTS patients")
cursor.execute("DROP TABLE IF EXISTS facts_table")


<sqlite3.Cursor at 0x2c40ee79c70>

In [42]:
#Create Tables
cursor.execute('''CREATE TABLE IF NOT EXISTS careplans (
                           Id STRING PRIMARY KEY,
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           REASONCODE STRING,
                           REASONDESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 
                       )
                       ''')


cursor.execute('''CREATE TABLE IF NOT EXISTS conditions (
                           condition_code INTEGER PRIMARY KEY AUTOINCREMENT,
                           START DATE,
                           STOP DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 
                        )
                       ''')


# cursor.execute('''CREATE TABLE IF NOT EXISTS disease (
#                            disease_code INTEGER PRIMARY KEY AUTOINCREMENT,
#                            START DATE,
#                            STOP DATE,
#                            PATIENT STRING,
#                            ENCOUNTER STRING,
#                            CODE STRING,
#                            DESCRIPTION STRING,
#                            FOREIGN KEY (PATIENT)
#                               REFERENCES patients (Id) 
#                            FOREIGN KEY (Encounter)
#                               REFERENCES encounters (Id)
#                        )
#                        ''')


cursor.execute('''CREATE TABLE IF NOT EXISTS observations (
                           observation_code INTEGER PRIMARY KEY AUTOINCREMENT,
                           DATE DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           VALUE STRING,
                           UNITS STRING,
                           TYPE STRING,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       )
                       ''')



cursor.execute('''CREATE TABLE IF NOT EXISTS patients (
                           Id STRING PRIMARY KEY,
                           BIRTHDATE DATE,
                           DEATHDATE DATE,
                           SSN STRING,
                           DRIVERS STRING,
                           PASSPORT STRING,
                           PREFIX STRING,
                           FIRST STRING,
                           LAST STRING,
                           SUFFIX STRING,
                           MAIDEN STRING,
                           MARITAL STRING,
                           RACE STRING,
                           ETHNICITY STRING,
                           GENDER STRING,
                           BIRTHPLACE STRING,
                           ADDRESS STRING,
                           CITY STRING,
                           STATE STRING,
                           COUNTY STRING,
                           ZIP STRING,
                           LAT INTEGER,
                           LON INTEGER,
                           HEALTHCARE_EXPENSES INTEGER,
                           HEALTHCARE_COVERAGE INTEGER
                       )
                       ''')

cursor.execute('''CREATE TABLE IF NOT EXISTS immunizations(
                           immunization_code INTEGER PRIMARY KEY AUTOINCREMENT,
                           DATE DATE,
                           PATIENT STRING,
                           ENCOUNTER STRING,
                           CODE STRING,
                           DESCRIPTION STRING,
                           BASE_COST INTEGER,
                           FOREIGN KEY (PATIENT)
                              REFERENCES patients (Id) 
                           FOREIGN KEY (Encounter)
                              REFERENCES encounters (Id) 

                       )
                       ''')






<sqlite3.Cursor at 0x2c40ee79c70>

In [43]:
print(pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn))

              name
0  sqlite_sequence
1        careplans
2       conditions
3     observations
4         patients
5    immunizations


In [44]:
print(pd.read_sql_query("PRAGMA table_info('careplans')", conn))

   cid               name    type  notnull dflt_value  pk
0    0                 Id  STRING        0       None   1
1    1              START    DATE        0       None   0
2    2               STOP    DATE        0       None   0
3    3            PATIENT  STRING        0       None   0
4    4          ENCOUNTER  STRING        0       None   0
5    5               CODE  STRING        0       None   0
6    6        DESCRIPTION  STRING        0       None   0
7    7         REASONCODE  STRING        0       None   0
8    8  REASONDESCRIPTION  STRING        0       None   0


In [45]:
#Insert into table
careplans.to_sql('careplans', conn, if_exists='append', index=False)
conditions.to_sql('conditions', conn, if_exists='append', index=False)
#disease.to_sql('disease', conn, if_exists='append', index=False)
immunizations.to_sql('immunizations', conn, if_exists='append', index=False)
medications.to_sql('medications', conn, if_exists='append', index=False)
observations.to_sql('observations', conn, if_exists='append', index=False)
patients.to_sql('patients', conn, if_exists='append', index=False)


1050

In [46]:
print(pd.read_sql_query("SELECT * FROM careplans", conn))

                                        Id       START        STOP  \
0     bbd91bf4-ca41-8fcf-fe03-562677e1cd09  2020-03-16  2020-03-16   
1     8fbb54fb-fdcd-6c3c-c38b-993c36a0271d  2020-03-16  2020-04-03   
2     3258da38-50e6-b40e-889c-dc95d2323c2f  2020-02-17  2020-03-02   
3     ebc94d26-a633-69e8-da33-a42b8e03fbbe  2020-02-24  2020-02-24   
4     1f282245-ffef-b0c6-1002-fdef343e45d8  2020-02-24  2020-03-11   
...                                    ...         ...         ...   
1638  b1d4bc75-5201-f101-4dab-9bc89dab80b8  2020-03-06  2020-03-07   
1639  b52bc20e-da2a-85b6-599e-15263ae8a85d  2020-03-07  2020-04-08   
1640  8a1226b8-0441-2d08-58f9-a4789c63a2fd  2020-03-10  2020-03-10   
1641  20fc258b-f765-47f2-fa93-077bc038ec20  2020-03-10  2020-04-07   
1642  47fbbc28-64f5-5712-81e7-0597da8cf8ba  2020-03-17  2020-03-17   

                                   PATIENT  \
0     52a250a9-24e4-ec9b-91d6-c38cf1b10fc0   
1     52a250a9-24e4-ec9b-91d6-c38cf1b10fc0   
2     ad5525c0-fdab-9

In [47]:
print(pd.read_sql_query("SELECT * FROM conditions", conn))

      condition_code       START        STOP  \
0                  1  2020-02-17  2020-02-27   
1                  2  2020-02-24  2020-03-11   
2                  3  2020-02-17  2020-02-27   
3                  4  2020-02-17  2020-02-27   
4                  5  2020-02-17  2020-02-17   
...              ...         ...         ...   
6370            6371  2020-03-25        None   
6371            6372  2020-03-25        None   
6372            6373  2020-03-25        None   
6373            6374  1991-09-20        None   
6374            6375  2016-11-18        None   

                                   PATIENT  \
0     ad5525c0-fdab-94dc-018f-d7209c72bdef   
1     7f30313f-98e9-2cc2-68a5-6f9973f9ad5a   
2     ad5525c0-fdab-94dc-018f-d7209c72bdef   
3     ad5525c0-fdab-94dc-018f-d7209c72bdef   
4     ad5525c0-fdab-94dc-018f-d7209c72bdef   
...                                    ...   
6370  8d49cb05-b1c3-b8fe-dc75-e43ad861f074   
6371  8d49cb05-b1c3-b8fe-dc75-e43ad861f074   
6372  8d4

### Datenverarbeitungs Schema (Sternschema)


Der Grund für die Verwendung eines Sternschemas liegt darin, die Anzahl der Join-Bedingungnen für die Auswertung der einzelnen Tabellen zu reduzieren. bei dem Sternschema wird eine Zentrale Datenbank erzeugt, welche die relevanten Daten enthält. Um den Kern des Sternschemas werden weitere Dimensions Tabellen angeordnet. Wichtig dabei kann es zu duplizierungen der Daten in kommen!

## Create Facts table

In [48]:
cursor.execute('''CREATE TABLE IF NOT EXISTS facts_table (
                          patient_ID STRING,
                          careplan_ID INT,
                          condition_id INT,
                          immunization_code INT,
                          VALUE STRING,
                          START_DATE DATE,
                          STOP_DATE DATE,
                          observation_code INT, 
                          FOREIGN KEY (patient_ID)
                            REFERENCES patients (Id) 
                          FOREIGN KEY (careplan_ID)
                            REFERENCES careplans (Id) 
                          FOREIGN KEY (condition_id)
                            REFERENCES conditions (condition_code)
                          FOREIGN KEY (immunization_code)
                            REFERENCES immunizations (immunization_code) 
                          FOREIGN KEY (observation_code)
                            REFERENCES observations (observation_code)                             

                       )
                       ''')

<sqlite3.Cursor at 0x2c40ee79c70>

In [49]:
print(pd.read_sql_query("PRAGMA table_info('facts_table')", conn))

   cid               name    type  notnull dflt_value  pk
0    0         patient_ID  STRING        0       None   0
1    1        careplan_ID     INT        0       None   0
2    2       condition_id     INT        0       None   0
3    3  immunization_code     INT        0       None   0
4    4              VALUE  STRING        0       None   0
5    5         START_DATE    DATE        0       None   0
6    6          STOP_DATE    DATE        0       None   0
7    7   observation_code     INT        0       None   0


### Transfer Data to new Table

In [50]:
cursor.execute('''INSERT INTO facts_table        
                    (patient_ID, careplan_ID, VALUE, START_DATE, STOP_DATE) 
                    SELECT PATIENT, Id, CODE, START, STOP
                    FROM careplans
                    ;''')



cursor.execute('''INSERT INTO facts_table        
                    (patient_ID, observation_code, VALUE, START_DATE) 
                    SELECT PATIENT, CODE, VALUE, DATE 
                    FROM OBSERVATIONS
                    ;''')


cursor.execute('''INSERT INTO facts_table        
                    (patient_ID, condition_Id, VALUE, START_DATE, STOP_DATE) 
                    SELECT PATIENT, condition_code, CODE, START, STOP
                    FROM conditions
                    ;''')

cursor.execute('''INSERT INTO facts_table        
                    (patient_ID, immunization_code, VALUE, START_DATE) 
                    SELECT PATIENT, immunization_code, CODE, DATE
                    FROM immunizations
                    ;''')



<sqlite3.Cursor at 0x2c40ee79c70>

In [54]:
print(pd.read_sql_query("SELECT * FROM facts_table", conn))

                                  patient_ID  \
0       52a250a9-24e4-ec9b-91d6-c38cf1b10fc0   
1       52a250a9-24e4-ec9b-91d6-c38cf1b10fc0   
2       ad5525c0-fdab-94dc-018f-d7209c72bdef   
3       7f30313f-98e9-2cc2-68a5-6f9973f9ad5a   
4       7f30313f-98e9-2cc2-68a5-6f9973f9ad5a   
...                                      ...   
141075  fce5ed5c-e218-4839-a0c5-6ef906e5ae73   
141076  fce5ed5c-e218-4839-a0c5-6ef906e5ae73   
141077  fce5ed5c-e218-4839-a0c5-6ef906e5ae73   
141078  fce5ed5c-e218-4839-a0c5-6ef906e5ae73   
141079  fce5ed5c-e218-4839-a0c5-6ef906e5ae73   

                                 careplan_ID  condition_id  immunization_code  \
0       bbd91bf4-ca41-8fcf-fe03-562677e1cd09           NaN                NaN   
1       8fbb54fb-fdcd-6c3c-c38b-993c36a0271d           NaN                NaN   
2       3258da38-50e6-b40e-889c-dc95d2323c2f           NaN                NaN   
3       ebc94d26-a633-69e8-da33-a42b8e03fbbe           NaN                NaN   
4       1f282245-f

In [55]:
print(pd.read_sql_query("SELECT * FROM facts_table WHERE immunization_code == 3", conn))

                             patient_ID careplan_ID condition_id  \
0  2f114e21-8b95-e9eb-2e9e-4b950b059cfa        None         None   

   immunization_code  VALUE            START_DATE STOP_DATE observation_code  
0                  3    140  2011-05-08T11:01:53Z      None             None  


In [None]:
# conn.close()