# Laden der Brustkrebsdaten in die SQLite-Datenbank

In [481]:
#Laden Libraries
import sqlite3
import pandas as pd

In [482]:
# Verbindung zur Datenbank
conn = sqlite3.connect('BreastCancerDB.db')
cursor = conn.cursor()
print("Datenbank wurde erfolgreich geöffnet");

Datenbank wurde erfolgreich geöffnet


In [483]:
# Löschen Tabellen wenn sie schon exestieren
cursor.execute("DROP TABLE IF EXISTS patient")
cursor.execute("DROP TABLE IF EXISTS condition")
cursor.execute("DROP TABLE IF EXISTS medication")
cursor.execute("DROP TABLE IF EXISTS observation")
cursor.execute("DROP TABLE IF EXISTS procedures")
cursor.execute("DROP TABLE IF EXISTS patient_careplan")
cursor.execute("DROP TABLE IF EXISTS patient_condition")

<sqlite3.Cursor at 0x7fd56e03ba40>

In [484]:
# Laden Daten
patient = pd.read_csv('BreastCancerData/patients.csv', sep=",")
condition = pd.read_csv('BreastCancerData/conditions.csv', sep=",")
medication = pd.read_csv('BreastCancerData/medications.csv', sep=",")
observation = pd.read_csv('BreastCancerData/observations.csv', sep=",")
procedure = pd.read_csv('BreastCancerData/procedures.csv', sep=",")

In [485]:
# Tabellen erstellen
cursor.execute('''
		CREATE TABLE IF NOT EXISTS patient (
			Id nvarchar(36) primary key,
            BIRTHDATE Date,
            DEATHDATE Date,
            SSN nvarchar(50),
            DRIVERS nvarchar(50),
            PASSPORT nvarchar(10),
            PREFIX nvarchar(3),
            FIRST nvarchar(50),
            LAST nvarchar(50),
            SUFFIX nvarchar(50),
            MAIDEN nvarchar(50),
            MARITAL nvarchar(1),
            RACE nvarchar(10),
            ETHNICITY nvarchar(20),
            GENDER nvarchar(2),
            BIRTHPLACE nvarchar(50),
            ADDRESS nvarchar(50),
            CITY nvarchar(50),
            STATE nvarchar(50),
            COUNTY nvarchar(50),
            ZIP nvarchar(4),
            LAT nvarchar(10),
            LON nvarchar(10),
            HEALTHCARE_EXPENSES float,
            HEALTHCARE_COVERAGE float
          
			)
             ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS condition (
            START Date,
            STOP Date,
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            DESCRIPTION object,
            CODE int,
            foreign key(PATIENT) references patient(Id) 
			)
               ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS medication (
            START nvarchar(20),
            STOP nvarchar(20),
            PATIENT nvarchar(36),
            PAYER nvarchar(50),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(50),
            BASE_COST float,
            PAYER_COVERAGE float,
            DISPENSES int,
            TOTALCOST float,
            REASONCODE float,
            REASONDESCRIPTION nvarchar(50),
            foreign key(PATIENT) references patient(Id) 
			)
               ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS observation (
            DATE nvarchar(20),
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(50),
            VALUE float,
            UNITS nvarchar(10),
            TYPE nvarchar(10),
            foreign key(PATIENT) references patient(Id) 
			)
               ''')

cursor.execute('''
		CREATE TABLE IF NOT EXISTS procedure (
            DATE nvarchar(20),
            PATIENT nvarchar(36),
            ENCOUNTER nvarchar(50),
            CODE int,
            DESCRIPTION nvarchar(50),
            BASE_COST float,
            REASONCODE float,
            REASONDESCRIPTION nvarchar(50),
            foreign key(PATIENT) references patient(Id) 
			)
               ''')


           


<sqlite3.Cursor at 0x7fd56e03ba40>

In [486]:
# Einfügen der Daten in die erstellten Tabellen
patient.to_sql('patient', conn, if_exists='append', index=False)
condition.to_sql('condition', conn, if_exists='append', index=False)
medication.to_sql('medication', conn, if_exists='append', index=False)
observation.to_sql('observation', conn, if_exists='append', index=False)
procedure.to_sql('procedure', conn, if_exists='append', index=False)

# Sternschema erstellen

Wir benötigen eine Faktentabelle für unsere Sternschema. Die Tabellen Patien und Condition werden als Dimensionstabellen genommen.

In [487]:
condition['DESCRIPTION'].unique()

array(['Coronary Heart Disease', 'Atrial Fibrillation', 'Cardiac Arrest',
       'History of cardiac arrest (situation)', 'Stroke',
       'Myocardial Infarction',
       'History of myocardial infarction (situation)',
       'Malignant neoplasm of breast (disorder)'], dtype=object)

In [488]:
cursor.execute('''
		CREATE TABLE IF NOT EXISTS patient_cond(
			patient_Id nvarchar(36),
            START Date,
            STOP Date,
            ENCOUNTER nvarchar(50),
            DESCRIPTION nvarchar(50),
            CODE int,
            foreign key(patient_Id) references patient(Id) 
			)
             ''')

<sqlite3.Cursor at 0x7fd56e03ba40>

In [489]:
cursor.execute('''
		CREATE TABLE IF NOT EXISTS patient_condition(
			patient_Id nvarchar(36),
            START Date,
            STOP Date,
            ENCOUNTER nvarchar(50),
            DESCRIPTION nvarchar(50),
            CODE int,
            foreign key(patient_Id) references patient(Id) 
			)
             ''')

# Faktentabelle in 
cursor.execute('''
		CREATE TABLE IF NOT EXISTS patient_careplan (
            CAREPLANT_ID INTEGER PRIMARY KEY AUTOINCREMENT,
			PATIENT_ID nvarchar(36),
            condition_START Date,
            condition_STOP Date,
            condition_ENCOUNTER nvarchar(50),
            condition_DESCRIPTION nvarchar(50),
            condition_CODE int,
            CARE_TYPE nvarchar(20),
            DESCRIPTION nvarchar(50),
            RESON nvarchar(50),
            ENCOUNTER nvarchar(50),
            DATE Date,
            foreign key(PATIENT_ID) references patient(Id) 
			)
             ''')  

<sqlite3.Cursor at 0x7fd56e03ba40>

In [490]:
cursor.execute('''
    INSERT INTO patient_condition(patient_Id, START, STOP, ENCOUNTER,DESCRIPTION, CODE)
    SELECT Id, START, STOP, ENCOUNTER, DESCRIPTION, CODE
    FROM patient
    LEFT JOIN condition ON Id = PATIENT
    ''')

<sqlite3.Cursor at 0x7fd56e03ba40>

In [491]:
print(pd.read_sql_query('''
     SELECT *
     FROM patient_condition
     ''', conn))

                                patient_Id       START  STOP  \
0     009121bf-a672-8942-443e-85e18a33f766  2000-12-05  None   
1     0103a559-910a-03df-6117-ec429eeb4ac9        None  None   
2     01739e74-c257-d750-9c7f-3cff9513511e        None  None   
3     01a280b3-04f1-419c-d2f7-0aebc4126365        None  None   
4     01b18347-90d9-e2b1-b142-8252c92cef8c        None  None   
...                                    ...         ...   ...   
1101  fefe51c3-dd11-9321-7e1a-a98603713461        None  None   
1102  ff464775-7c93-27c7-ac95-6bad65259f5f        None  None   
1103  ffa3fb1e-a8a6-6558-9f2a-e4e9fc09447e        None  None   
1104  ffef1762-984b-72e5-f3c3-c5f76fda79df        None  None   
1105  fffc77ee-b38c-41bd-2b07-ebaca114a78e        None  None   

                                 ENCOUNTER DESCRIPTION         CODE  
0     b824148c-1547-1097-7981-77b09c7ec478      Stroke  230690007.0  
1                                     None        None          NaN  
2                    

In [492]:
cursor.execute('''
    INSERT INTO patient_careplan(PATIENT_ID, condition_START, condition_STOP, condition_ENCOUNTER,condition_DESCRIPTION, condition_CODE, CARE_TYPE, DESCRIPTION, RESON, ENCOUNTER,DATE)
    SELECT 
    patient_condition.patient_Id as PATIENT_ID, patient_condition.START as condition_START, patient_condition.STOP condition_STOP , patient_condition.ENCOUNTER as condition_ENCOUNTER, patient_condition.DESCRIPTION as condition_DESCRIPTION, patient_condition.CODE as condition_CODE,
    'Medication' as CARE_TYPE, medication.DESCRIPTION, medication.REASONCODE as RESON, medication.ENCOUNTER, medication.START as DATE
    FROM patient_condition 
    INNER JOIN 
    medication ON patient_condition.patient_Id = medication.PATIENT
    
    UNION
    SELECT 
    patient_condition.patient_Id as PATIENT_ID, patient_condition.START as condition_START, patient_condition.STOP condition_STOP, patient_condition.ENCOUNTER as condition_ENCOUNTER, patient_condition.DESCRIPTION as condition_DESCRIPTION, patient_condition.CODE as condition_CODE,
    'Observation' as CARE_TYPE, observation.DESCRIPTION, observation.CODE as RESON, observation.ENCOUNTER, observation.DATE
    FROM patient_condition 
    INNER JOIN 
    observation ON patient_condition.patient_Id = observation.PATIENT
    
    UNION
    SELECT 
    patient_condition.patient_Id as PATIENT_ID, patient_condition.START as condition_START, patient_condition.STOP condition_STOP , patient_condition.ENCOUNTER as condition_ENCOUNTER, patient_condition.DESCRIPTION as condition_DESCRIPTION, patient_condition.CODE as condition_CODE,
    'Procedure' as CARE_TYPE, procedure.DESCRIPTION, procedure.REASONDESCRIPTION as RESON, procedure.ENCOUNTER, procedure.DATE
    FROM patient_condition 
    INNER JOIN 
    procedure ON patient_condition.patient_Id = procedure.PATIENT
    ''')

<sqlite3.Cursor at 0x7fd56e03ba40>

In [493]:
print(pd.read_sql_query('''
     SELECT *
     FROM patient_careplan
     ''', conn))

       CAREPLANT_ID                            PATIENT_ID condition_START  \
0                 1  009121bf-a672-8942-443e-85e18a33f766      2000-12-05   
1                 2  009121bf-a672-8942-443e-85e18a33f766      2000-12-05   
2                 3  009121bf-a672-8942-443e-85e18a33f766      2000-12-05   
3                 4  009121bf-a672-8942-443e-85e18a33f766      2000-12-05   
4                 5  009121bf-a672-8942-443e-85e18a33f766      2000-12-05   
...             ...                                   ...             ...   
35773         35774  fffc77ee-b38c-41bd-2b07-ebaca114a78e            None   
35774         35775  fffc77ee-b38c-41bd-2b07-ebaca114a78e            None   
35775         35776  fffc77ee-b38c-41bd-2b07-ebaca114a78e            None   
35776         35777  fffc77ee-b38c-41bd-2b07-ebaca114a78e            None   
35777         35778  fffc77ee-b38c-41bd-2b07-ebaca114a78e            None   

      condition_STOP                   condition_ENCOUNTER  \
0            

In [494]:
print(pd.read_sql_query('''
     SELECT PATIENT_ID, CARE_TYPE
     FROM patient_careplan
     WHERE CARE_TYPE = 'Medication'
     ''', conn))

                               PATIENT_ID   CARE_TYPE
0    02db7f22-8617-0cf7-fa10-d820d596a81a  Medication
1    02db7f22-8617-0cf7-fa10-d820d596a81a  Medication
2    02db7f22-8617-0cf7-fa10-d820d596a81a  Medication
3    02db7f22-8617-0cf7-fa10-d820d596a81a  Medication
4    037404e1-0c87-534d-0fe2-e21ef20640f6  Medication
..                                    ...         ...
798  f4b7d173-38fd-e173-9242-aca2651c6b10  Medication
799  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medication
800  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medication
801  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medication
802  f5a6e3b3-781d-d800-d4c3-1782e39258e8  Medication

[803 rows x 2 columns]


In [None]:
#cursor.execute("DROP TABLE IF EXISTS patient_condition")

In [495]:
conn.close()