# Laden der Brustkrebsdaten in die SQLite-Datenbank

In [1]:
#Laden Libraries
import sqlite3
import pandas as pd

In [2]:
# Verbindung zur Datenbank
conn = sqlite3.connect('BreastCancerDB.db')
cursor = conn.cursor()
print("Datenbank wurde erfolgreich geöffnet");

Datenbank wurde erfolgreich geöffnet


In [3]:
# Löschen Tabellen wenn sie schon exestieren
cursor.execute("DROP TABLE IF EXISTS patients")
cursor.execute("DROP TABLE IF EXISTS conditions")
cursor.execute("DROP TABLE IF EXISTS medications")
cursor.execute("DROP TABLE IF EXISTS observations")
cursor.execute("DROP TABLE IF EXISTS procedures")
cursor.execute("DROP TABLE IF EXISTS patient_careplans")
cursor.execute("DROP TABLE IF EXISTS patient_conditions")

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [4]:
# Laden Daten
patients = pd.read_csv('BreastCancerData/patients.csv', sep=",")
conditions = pd.read_csv('BreastCancerData/conditions.csv', sep=",")
medications = pd.read_csv('BreastCancerData/medications.csv', sep=",")
observations = pd.read_csv('BreastCancerData/observations.csv', sep=",")
procedures = pd.read_csv('BreastCancerData/procedures.csv', sep=",")

In [5]:
conditions['DESCRIPTION'].unique()

array(['Coronary Heart Disease', 'Atrial Fibrillation', 'Cardiac Arrest',
       'History of cardiac arrest (situation)', 'Stroke',
       'Myocardial Infarction',
       'History of myocardial infarction (situation)',
       'Malignant neoplasm of breast (disorder)'], dtype=object)

In [6]:
# Tabellen erstellen
cursor.execute('''
		CREATE TABLE IF NOT EXISTS patients (
			Id nvarchar(36) primary key,
            BIRTHDATE Date,
            DEATHDATE Date,
            SSN nvarchar(50),
            DRIVERS nvarchar(50),
            PASSPORT nvarchar(10),
            PREFIX nvarchar(4),
            FIRST nvarchar(50),
            LAST nvarchar(50),
            SUFFIX nvarchar(10),
            MAIDEN nvarchar(50),
            MARITAL nvarchar(1),
            RACE nvarchar(10),
            ETHNICITY nvarchar(50),
            GENDER nvarchar(1),
            BIRTHPLACE nvarchar(50),
            ADDRESS nvarchar(50),
            CITY nvarchar(50),
            STATE nvarchar(50),
            COUNTY nvarchar(50),
            ZIP nvarchar(4),
            LAT nvarchar(10),
            LON nvarchar(10),
            HEALTHCARE_EXPENSES float,
            HEALTHCARE_COVERAGE float
          
			)
             ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS conditions (
            START Date,
            STOP Date,
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            DESCRIPTION nvarchar(256),
            CODE int,
            foreign key(PATIENT) references patients(Id) 
			)
               ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS medications(
            START nvarchar(20),
            STOP nvarchar(20),
            PATIENT nvarchar(36),
            PAYER nvarchar(50),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(256),
            BASE_COST float,
            PAYER_COVERAGE float,
            DISPENSES int,
            TOTALCOST float,
            REASONCODE float,
            REASONDESCRIPTION nvarchar(50),
            foreign key(PATIENT) references patients(Id) 
			)
               ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS observations (
            DATE nvarchar(20),
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(256),
            VALUE float,
            UNITS nvarchar(10),
            TYPE nvarchar(10),
            foreign key(PATIENT) references patients(Id) 
			)
               ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS procedures(
            DATE nvarchar(20),
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(50),
            BASE_COST float,
            REASONCODE float,
            REASONDESCRIPTION nvarchar(256),
            foreign key(PATIENT) references patients(Id) 
			)
               ''')

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [7]:
# Einfügen der Daten in die erstellten Tabellen
patients.to_sql('patients', conn, if_exists='append', index=False)
conditions.to_sql('conditions', conn, if_exists='append', index=False)
medications.to_sql('medications', conn, if_exists='append', index=False)
observations.to_sql('observations', conn, if_exists='append', index=False)
procedures.to_sql('procedures', conn, if_exists='append', index=False)

# Sternschema erstellen

Wir benötigen eine Faktentabelle für unsere Sternschema. Die Tabellen Patien und Condition werden als Dimensionstabellen genommen.

In [8]:
conditions['DESCRIPTION'].unique()

array(['Coronary Heart Disease', 'Atrial Fibrillation', 'Cardiac Arrest',
       'History of cardiac arrest (situation)', 'Stroke',
       'Myocardial Infarction',
       'History of myocardial infarction (situation)',
       'Malignant neoplasm of breast (disorder)'], dtype=object)

In [9]:


cursor.execute('''
		CREATE TABLE IF NOT EXISTS conditions_new (
            CONDITIONS_ID INTEGER PRIMARY KEY AUTOINCREMENT,
            START Date,
            STOP Date,
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            DESCRIPTION nvarchar(256),
            CODE int,
            foreign key(PATIENT) references patients(Id) 
			)
               ''')             

cursor.execute('''
		CREATE TABLE IF NOT EXISTS observations_new (
            OBSERVATIONS_ID INTEGER PRIMARY KEY AUTOINCREMENT,
            DATE nvarchar(20),
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(256),
            VALUE float,
            UNITS nvarchar(10),
            foreign key(PATIENT) references patients(Id) 
			)
               ''')  

cursor.execute('''
		CREATE TABLE IF NOT EXISTS procedures_new(
            PROCEDURES_ID INTEGER PRIMARY KEY AUTOINCREMENT,
            DATE nvarchar(20),
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(50),
            BASE_COST float,
            REASONCODE float,
            REASONDESCRIPTION nvarchar(256),
            foreign key(PATIENT) references patients(Id) 
			)
               ''') 

cursor.execute('''
		CREATE TABLE IF NOT EXISTS medications_new (
            MEDICATIONS_ID INTEGER PRIMARY KEY AUTOINCREMENT,
            START nvarchar(20),
            STOP nvarchar(20),
            PATIENT nvarchar(36),
            PAYER nvarchar(50),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(256),
            BASE_COST float,
            PAYER_COVERAGE float,
            DISPENSES int,
            TOTALCOST float,
            REASONCODE float,
            REASONDESCRIPTION nvarchar(50),
            foreign key(PATIENT) references patients(Id) 
			)
               ''')                        

# Faktentabelle 
cursor.execute('''
		CREATE TABLE IF NOT EXISTS patient_careplans (
            CAREPLANT_ID INTEGER PRIMARY KEY AUTOINCREMENT,
			PATIENT_ID nvarchar(36),
            CONDITIONS_ID int,
            CARE_TYPE nvarchar(20),
            TABLE_ID int,
            foreign key(PATIENT_ID) references patient(Id) 
            foreign key(CONDITIONS_ID) references conditions_new(CONDITIONS_ID)
			)
             ''')             

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [10]:
cursor.execute('''
    INSERT INTO conditions_new(START,STOP, PATIENT, ENCOUNTER,DESCRIPTION,CODE)
    SELECT START,STOP, PATIENT, ENCOUNTER,DESCRIPTION,CODE
    FROM conditions
    ''') 

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [11]:
cursor.execute('''
    INSERT INTO observations_new(DATE, PATIENT, ENCOUNTER,CODE, DESCRIPTION, UNITS)
    SELECT DATE, PATIENT, ENCOUNTER,CODE, DESCRIPTION, UNITS
    FROM observations
    ''')

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [12]:
cursor.execute('''
    INSERT INTO procedures_new(DATE, PATIENT, ENCOUNTER,CODE, DESCRIPTION, BASE_COST, REASONCODE,REASONDESCRIPTION )
    SELECT DATE, PATIENT, ENCOUNTER,CODE, DESCRIPTION, BASE_COST, REASONCODE,REASONDESCRIPTION
    FROM procedures
    ''')

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [13]:
cursor.execute('''
    INSERT INTO medications_new(START, STOP, PATIENT, PAYER, ENCOUNTER, CODE, DESCRIPTION, BASE_COST, PAYER_COVERAGE, DISPENSES, TOTALCOST, REASONCODE,REASONDESCRIPTION )
    SELECT START, STOP, PATIENT, PAYER, ENCOUNTER, CODE, DESCRIPTION, BASE_COST, PAYER_COVERAGE, DISPENSES, TOTALCOST, REASONCODE,REASONDESCRIPTION 
    FROM medications
    ''')


<sqlite3.Cursor at 0x7fe2dccb3b90>

In [14]:
## Faktentabelle

cursor.execute('''
    INSERT INTO patient_careplans(PATIENT_ID, CONDITIONS_ID, CARE_TYPE, TABLE_ID)
    SELECT 
    patients.Id, CONDITIONS_ID,  'Medications' as CARE_TYPE, medications_new.MEDICATIONS_ID
    FROM patients
    INNER JOIN 
    conditions_new on patients.Id= conditions_new.PATIENT
    INNER JOIN
    medications_new ON patients.Id = medications_new.PATIENT
    
    UNION

    SELECT 
    patients.Id, CONDITIONS_ID,  'Observations' as CARE_TYPE, observations_new.OBSERVATIONS_ID
    FROM patients
    INNER JOIN 
    conditions_new on patients.Id= conditions_new.PATIENT
    INNER JOIN
    observations_new ON patients.Id =  observations_new.PATIENT

    UNION

    SELECT 
    patients.Id, CONDITIONS_ID,  'Procedures' as CARE_TYPE, procedures_new.PROCEDURES_ID
    FROM patients
    INNER JOIN 
    conditions_new on patients.Id= conditions_new.PATIENT
    INNER JOIN
    procedures_new ON patients.Id =  procedures_new.PATIENT

    ''')

<sqlite3.Cursor at 0x7fe2dccb3b90>

In [15]:
print(pd.read_sql_query('''
     SELECT *
     FROM patient_careplans
     ''', conn))

      CAREPLANT_ID                            PATIENT_ID  CONDITIONS_ID  \
0                1  009121bf-a672-8942-443e-85e18a33f766             51   
1                2  009121bf-a672-8942-443e-85e18a33f766             51   
2                3  009121bf-a672-8942-443e-85e18a33f766             51   
3                4  009121bf-a672-8942-443e-85e18a33f766             51   
4                5  009121bf-a672-8942-443e-85e18a33f766             51   
...            ...                                   ...            ...   
8265          8266  fd6d5aae-f012-0e00-99bc-6668b73bf4f3             68   
8266          8267  fd6d5aae-f012-0e00-99bc-6668b73bf4f3             68   
8267          8268  fd6d5aae-f012-0e00-99bc-6668b73bf4f3             68   
8268          8269  fd6d5aae-f012-0e00-99bc-6668b73bf4f3             68   
8269          8270  fd6d5aae-f012-0e00-99bc-6668b73bf4f3             68   

         CARE_TYPE  TABLE_ID  
0     Observations      7190  
1     Observations      7191  
2     

In [16]:
print(pd.read_sql_query('''
     SELECT *
     FROM patient_careplans
     WHERE CAREPLANT_ID<10
     ''', conn))

   CAREPLANT_ID                            PATIENT_ID  CONDITIONS_ID  \
0             1  009121bf-a672-8942-443e-85e18a33f766             51   
1             2  009121bf-a672-8942-443e-85e18a33f766             51   
2             3  009121bf-a672-8942-443e-85e18a33f766             51   
3             4  009121bf-a672-8942-443e-85e18a33f766             51   
4             5  009121bf-a672-8942-443e-85e18a33f766             51   
5             6  009121bf-a672-8942-443e-85e18a33f766             51   
6             7  009121bf-a672-8942-443e-85e18a33f766             51   
7             8  009121bf-a672-8942-443e-85e18a33f766             51   
8             9  009121bf-a672-8942-443e-85e18a33f766             51   

      CARE_TYPE  TABLE_ID  
0  Observations      7190  
1  Observations      7191  
2  Observations      7192  
3  Observations      7193  
4  Observations      7194  
5  Observations      7195  
6  Observations      7196  
7  Observations      7197  
8  Observations      7198  

In [17]:
print(pd.read_sql_query('''
     SELECT CARE_TYPE
     FROM patient_careplans
     GROUP BY CARE_TYPE
     ''', conn))

      CARE_TYPE
0   Medications
1  Observations
2    Procedures


In [18]:
print(pd.read_sql_query('''
     SELECT PATIENT_ID, CARE_TYPE
     FROM patient_careplans
     WHERE CARE_TYPE = 'Medications'
     ''', conn))

                               PATIENT_ID    CARE_TYPE
0    02db7f22-8617-0cf7-fa10-d820d596a81a  Medications
1    02db7f22-8617-0cf7-fa10-d820d596a81a  Medications
2    02db7f22-8617-0cf7-fa10-d820d596a81a  Medications
3    02db7f22-8617-0cf7-fa10-d820d596a81a  Medications
4    037404e1-0c87-534d-0fe2-e21ef20640f6  Medications
..                                    ...          ...
798  f4b7d173-38fd-e173-9242-aca2651c6b10  Medications
799  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medications
800  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medications
801  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medications
802  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medications

[803 rows x 2 columns]


In [19]:
conn.close()