In [1]:
import psycopg2
from configparser import ConfigParser
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
def postgresql_config(filename='config.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db

In [4]:
def test_postgresql_connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = postgresql_config()

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
		
        # create a cursor
        cur = conn.cursor()
        
	# execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')

        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)
       
	    # close the communication with the PostgreSQL
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')

In [5]:
test_postgresql_connect()

Connecting to the PostgreSQL database...
PostgreSQL database version:
('PostgreSQL 14.2, compiled by Visual C++ build 1914, 64-bit',)
Database connection closed.


---

In [6]:
def get_patients():
    """ query data from the patients table """
    conn = None
    try:
        params = postgresql_config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        cur.execute("SELECT * from patients")
        print("The number of parts: ", cur.rowcount)
        row = cur.fetchone()

        while row is not None:
            print(row)
            row = cur.fetchone()

        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

In [7]:
sql = "SELECT * FROM patients p  order by random() limit 1000;"
conn = psycopg2.connect(**postgresql_config())

In [8]:
random_patients_df = pd.read_sql(sql, conn)

In [9]:
random_patients_df

Unnamed: 0,row_id,subject_id,gender,dob,dod,dod_hosp,dod_ssn,expire_flag
0,10673,11290,M,2173-11-19,NaT,NaT,NaT,0
1,33056,48637,F,2044-10-27,NaT,NaT,NaT,0
2,8721,9216,M,2170-04-19,NaT,NaT,NaT,0
3,18513,19611,M,2170-03-15,NaT,NaT,NaT,0
4,28235,30020,M,2046-06-27,2126-03-02,NaT,2126-03-02,1
...,...,...,...,...,...,...,...,...
995,46011,97990,M,2020-08-22,NaT,NaT,NaT,0
996,12718,13445,M,2071-09-26,2146-08-12,NaT,2146-08-12,1
997,11297,11936,M,2108-07-22,NaT,NaT,NaT,0
998,18552,19654,M,2085-03-12,2173-05-10,NaT,2173-05-10,1


In [10]:
# Pick Subject_id
patients_subject_ids = random_patients_df['subject_id']
patients_subject_ids

0      11290
1      48637
2       9216
3      19611
4      30020
       ...  
995    97990
996    13445
997    11936
998    19654
999     7059
Name: subject_id, Length: 1000, dtype: int64

---

In [11]:
# create query scripts

def create_query_scripts(table_name):
    return "SELECT * FROM " + table_name + " WHERE SUBJECT_ID = "

In [12]:
def select_data(subject_ids, sql):
    conn = None

    try:
        params = postgresql_config()
        conn = psycopg2.connect(**params)
        # cur = conn.cursor()
        
        table_df = pd.DataFrame()

        for subject_id in subject_ids:
            new_sql = sql + str(subject_id)
            select_result = df = pd.read_sql(new_sql, conn)

            if table_df.empty:
                table_df = select_result.copy()
            else:
                table_df = pd.concat([table_df, select_result], axis=0)

        return table_df
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

### Patients and transfer information

In [15]:
admissions_sample_df = select_data(patients_subject_ids, create_query_scripts('admissions'))

In [16]:
admissions_sample_df.to_csv("../temp_sets/admissions.csv")

In [17]:
callout_sample_df = select_data(patients_subject_ids, create_query_scripts('callout'))

In [18]:
callout_sample_df.to_csv("../temp_sets/callout.csv")

In [19]:
icustays_sample_df = select_data(patients_subject_ids, create_query_scripts('icustays'))

In [20]:
icustays_sample_df.to_csv("../temp_sets/icustays.csv")

In [21]:
services_sample_df = select_data(patients_subject_ids, create_query_scripts('services'))

In [22]:
services_sample_df.to_csv("../temp_sets/services.csv")

In [23]:
transfers_sample_df = select_data(patients_subject_ids, create_query_scripts('transfers'))

In [24]:
transfers_sample_df.to_csv("../temp_sets/transfers.csv")

### ICU Information

In [30]:
caregivers_sample_df = None

In [33]:
chartevents_sample_df = select_data(patients_subject_ids, create_query_scripts('chartevents'))

In [34]:
chartevents_sample_df.to_csv("../temp_sets/chartevents.csv")

In [35]:
datetimeevents_sample_df = select_data(patients_subject_ids, create_query_scripts('datetimeevents'))

In [36]:
datetimeevents_sample_df.to_csv("../temp_sets/datetimeevents.csv")

In [37]:
inputevents_cv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_cv'))

In [38]:
inputevents_cv_sample_df.to_csv("../temp_sets/inputevents_cv.csv")

In [39]:
inputevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_mv'))

In [40]:
inputevents_mv_sample_df.to_csv("../temp_sets/inputevents_mv.csv")

In [41]:
outputevents_sample_df = select_data(patients_subject_ids, create_query_scripts('outputevents'))

In [42]:
outputevents_sample_df.to_csv("../temp_sets/outputevents.csv")

In [43]:
procedureevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('procedureevents_mv'))

In [44]:
procedureevents_mv_sample_df.to_csv("../temp_sets/procedureevents_mv.csv")

### Hospital recording

In [45]:
cptevents_sample_df = select_data(patients_subject_ids, create_query_scripts('cptevents'))

In [46]:
cptevents_sample_df.to_csv("../temp_sets/cptevents.csv")

In [47]:
diagnoses_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('diagnoses_icd'))

In [48]:
diagnoses_icd_sample_df.to_csv("../temp_sets/diagnoses_icd.csv")

In [49]:
drgcodes_sample_df = select_data(patients_subject_ids, create_query_scripts('drgcodes'))

In [50]:
drgcodes_sample_df.to_csv("../temp_sets/drgcodes.csv")

In [51]:
labevents_sample_df = select_data(patients_subject_ids, create_query_scripts('labevents'))

In [52]:
labevents_sample_df.to_csv("../temp_sets/labevents.csv")

In [53]:
microbiologyevents_sample_df = select_data(patients_subject_ids, create_query_scripts('microbiologyevents'))

In [54]:
microbiologyevents_sample_df.to_csv("../temp_sets/microbiologyevents.csv")

In [55]:
prescriptions_sample_df = select_data(patients_subject_ids, create_query_scripts('prescriptions'))

In [56]:
prescriptions_sample_df.to_csv("../temp_sets/prescriptions.csv")

In [57]:
procedures_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('procedures_icd'))

In [58]:
procedures_icd_sample_df.to_csv("../temp_sets/procedures_icd.csv")

---