In [1]:
import pandas as pd
import sqlite3
import glob
import os
import os.path as op

pd.set_option('display.width', 100)
conn = sqlite3.connect("../../data/sepsis.db")
cursor = conn.cursor()

In [2]:
# Load data from csv into sqlite database
csv_path = op.join(os.getcwd(), '..', '..', 'data', 'fake_sepsis_data')

for file in glob.glob(op.join(csv_path, "*.csv")):
    print('Reading', file)
    df = pd.read_csv(file)
    # First column should be unnamed... if not, there's a problem
    if (df.columns[0] == 'Unnamed: 0'):
        df.drop('Unnamed: 0', axis=1, inplace=True)
    else:
        print('   --- Check Data File')
    filename, fileextension = op.splitext(file)
    filename = op.basename(filename)
    df.to_sql(filename, conn, if_exists='replace', index=False)
    print('Saved', filename, 'to database')

# create test record to see if outlier can be detected
cursor.execute("""
INSERT INTO admission (
    "SubjectId",
    "EncounterId",
    "DOB",
    "Sex",
    "Race",
    "Ethnicity",
    "FirstHeightInInches",
    "FirstWeightInOunces",
    "AdmittingDepartmentName",
    "AdmissionDateTime",
    "FinancialClass",
    "AdmissionType",
    "AdmissionSource",
    "PatientClass",
    "AdmittingService",
    "PrincipalProblem",
    "PrimaryCodedDx",
    "DischargeDateTime",
    "DischargeDisposition",
    "HospitalService" )
VALUES (
    123456789,                -- SubjectId,
    123456789,                -- EncounterId,
    '1800-01-01',             -- DOB
    'M',                      -- Sex
    'White',                  -- Race
    'Not Hispanic or Latino', -- Ethnicity
    216,                      -- FirstHeightInInches
    44800,                    -- FirstWeightInOunces
    'MHC SURG TRAUMA UNIT',   -- AdmittingDepartmentName
    '2017-12-01',             -- AdmissionDateTime
    'insurance',              -- FinancialClass
    'Emergency',              -- AdmissionType
    'Unspecified',            -- AdmissionSource
    'inpatient',              -- PatientClass
    'surgery',                -- AdmittingService
    'patient reported text',  -- PrincipalProblem
    'E220',                   -- PrimaryCodedDx
    '2018-01-01',             -- DischargeDateTime
    'home',                   -- DischargeDisposition
    'surgery'                 -- HospitalService
    );""")
conn.commit()

cursor.execute("SELECT count(1) FROM admission")
cursor.fetchall()

Reading /Users/seth/OneDrive - The University of Colorado Denver/Documents/ICML_2018_Paper/faker-prototype/sourcecode/python/../../data/fake_sepsis_data/admission.csv
Saved admission to database
Reading /Users/seth/OneDrive - The University of Colorado Denver/Documents/ICML_2018_Paper/faker-prototype/sourcecode/python/../../data/fake_sepsis_data/admission_diagnoses.csv
Saved admission_diagnoses to database
Reading /Users/seth/OneDrive - The University of Colorado Denver/Documents/ICML_2018_Paper/faker-prototype/sourcecode/python/../../data/fake_sepsis_data/adt_events.csv
Saved adt_events to database
Reading /Users/seth/OneDrive - The University of Colorado Denver/Documents/ICML_2018_Paper/faker-prototype/sourcecode/python/../../data/fake_sepsis_data/diagnoses.csv
Saved diagnoses to database
Reading /Users/seth/OneDrive - The University of Colorado Denver/Documents/ICML_2018_Paper/faker-prototype/sourcecode/python/../../data/fake_sepsis_data/flowsheet.csv
Saved flowsheet to database
Rea

[(1001,)]

In [3]:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
for c in tables:
    cursor.execute("SELECT sql FROM sqlite_master WHERE name='" + c[0] + "'")
    print(cursor.fetchall()[0][0])

CREATE TABLE "admission" (
"SubjectId" INTEGER,
  "EncounterId" INTEGER,
  "DOB" TEXT,
  "Sex" TEXT,
  "Race" TEXT,
  "Ethnicity" TEXT,
  "FirstHeightInInches" INTEGER,
  "FirstWeightInOunces" INTEGER,
  "AdmittingDepartmentName" TEXT,
  "AdmissionDateTime" TEXT,
  "FinancialClass" TEXT,
  "AdmissionType" TEXT,
  "AdmissionSource" TEXT,
  "PatientClass" TEXT,
  "AdmittingService" TEXT,
  "PrincipalProblem" TEXT,
  "PrimaryCodedDx" TEXT,
  "DischargeDateTime" TEXT,
  "DischargeDisposition" TEXT,
  "HospitalService" TEXT
)
CREATE TABLE "admission_diagnoses" (
"SubjectId" INTEGER,
  "EncounterId" INTEGER,
  "AdmittingDiagnosis" TEXT
)
CREATE TABLE "adt_events" (
"SubjectId" INTEGER,
  "EncounterId" INTEGER,
  "DepartmentName" TEXT,
  "TransferInDateTime" TEXT,
  "TransferOutDateTime" TEXT
)
CREATE TABLE "diagnoses" (
"SubjectId" INTEGER,
  "EncounterId" INTEGER,
  "Source" TEXT,
  "StartDate" TEXT,
  "Code" TEXT,
  "Type" TEXT
)
CREATE TABLE "flowsheet" (
"SubjectId" INTEGER,
  "Encounter

In [4]:
df1 = pd.read_sql("""
select SubjectId, encounterid, sex, race, FirstHeightInInches, FirstWeightInOunces from
admission
where FirstHeightInInches > 80 and
      FirstWeightInOunces > 8000
order by FirstHeightInInches desc, FirstWeightInOunces desc
""", conn)

df2 = pd.read_sql("""
select SubjectId, encounterid, sex, race, FirstHeightInInches, FirstWeightInOunces from
admission
where FirstHeightInInches > 107 and
      FirstWeightInOunces > 8000
order by FirstHeightInInches desc, FirstWeightInOunces desc
""", conn)

df3 = pd.read_sql("""
select distinct PrimaryCodedDx from admission
where PrimaryCodedDx like 'e%'
order by PrimaryCodedDx
""", conn)


df3

Unnamed: 0,PrimaryCodedDx
0,E070
1,E088
2,E113592
3,E133293
4,E1337X2
5,E220
6,E2689
7,E368
8,E40
9,E610
