In [1]:
import psycopg2
from configparser import ConfigParser
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
csv_save_path = "F:\\MSc Project\\temp_sets_100\\"

---

In [4]:
def postgresql_config(filename='config.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db

In [5]:
def test_postgresql_connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = postgresql_config()

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
		
        # create a cursor
        cur = conn.cursor()
        
	# execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')

        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)
       
	    # close the communication with the PostgreSQL
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
            print('Database connection closed.')

In [6]:
test_postgresql_connect()

Connecting to the PostgreSQL database...
PostgreSQL database version:
('PostgreSQL 14.2, compiled by Visual C++ build 1914, 64-bit',)
Database connection closed.


---

In [7]:
def get_patients():
    """ query data from the patients table """
    conn = None
    try:
        params = postgresql_config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        cur.execute("SELECT * from patients")
        print("The number of parts: ", cur.rowcount)
        row = cur.fetchone()

        while row is not None:
            print(row)
            row = cur.fetchone()

        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

In [8]:
sql = "SELECT * FROM patients p order by random() limit 100;"
conn = psycopg2.connect(**postgresql_config())

In [9]:
random_patients_df = pd.read_sql(sql, conn)

In [10]:
random_patients_df.to_csv(csv_save_path + "patients.csv")

In [11]:
# Pick Subject_id
patients_subject_ids = random_patients_df['subject_id']
patients_subject_ids

0       569
1     26282
2      1762
3     14481
4     21470
      ...  
95    23647
96    26485
97    26884
98    49024
99    15440
Name: subject_id, Length: 100, dtype: int64

---

In [12]:
# create query scripts

def create_query_scripts(table_name):
    return "SELECT * FROM " + table_name + " WHERE subject_id = "

In [13]:
def select_data(subject_ids, sql):
    conn = None

    try:
        params = postgresql_config()
        conn = psycopg2.connect(**params)
        # cur = conn.cursor()
        
        table_df = pd.DataFrame()

        for subject_id in subject_ids:
            new_sql = sql + str(subject_id)
            select_result = pd.read_sql(new_sql, conn)

            if table_df.empty:
                table_df = select_result.copy()
            else:
                table_df = pd.concat([table_df, select_result], axis=0)

        return table_df
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

### Patients and transfer information

In [14]:
admissions_sample_df = select_data(patients_subject_ids, create_query_scripts('admissions'))

In [15]:
admissions_sample_df.to_csv(csv_save_path + "admissions.csv")

In [16]:
callout_sample_df = select_data(patients_subject_ids, create_query_scripts('callout'))

In [17]:
callout_sample_df.to_csv(csv_save_path + "callout.csv")

In [18]:
icustays_sample_df = select_data(patients_subject_ids, create_query_scripts('icustays'))

In [19]:
icustays_sample_df.to_csv(csv_save_path + "icustays.csv")

In [20]:
services_sample_df = select_data(patients_subject_ids, create_query_scripts('services'))

In [21]:
services_sample_df.to_csv(csv_save_path + "services.csv")

In [22]:
transfers_sample_df = select_data(patients_subject_ids, create_query_scripts('transfers'))

In [23]:
transfers_sample_df.to_csv(csv_save_path + "transfers.csv")

### ICU Information

In [None]:
caregivers_sample_df = None

In [None]:
chartevents_sample_df = select_data(patients_subject_ids, create_query_scripts('chartevents'))

In [None]:
chartevents_sample_df.to_csv(csv_save_path + "chartevents.csv")

In [24]:
datetimeevents_sample_df = select_data(patients_subject_ids, create_query_scripts('datetimeevents'))

In [25]:
datetimeevents_sample_df.to_csv(csv_save_path + "datetimeevents.csv")

In [26]:
outputevents_sample_df = select_data(patients_subject_ids, create_query_scripts('outputevents'))

In [27]:
outputevents_sample_df.to_csv(csv_save_path + "outputevents.csv")

In [30]:
outputevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_mv'))

In [31]:
outputevents_mv_sample_df.to_csv(csv_save_path + "inputevents_mv.csv")

In [32]:
outputevents_sample_df = select_data(patients_subject_ids, create_query_scripts('inputevents_cv'))

In [33]:
outputevents_sample_df.to_csv(csv_save_path + "inputevents_cv.csv")

In [34]:
procedureevents_mv_sample_df = select_data(patients_subject_ids, create_query_scripts('procedureevents_mv'))

In [35]:
procedureevents_mv_sample_df.to_csv(csv_save_path + "procedureevents_mv.csv")

### Hospital recording

In [36]:
cptevents_sample_df = select_data(patients_subject_ids, create_query_scripts('cptevents'))

In [37]:
cptevents_sample_df.to_csv(csv_save_path + "cptevents.csv")

In [38]:
diagnoses_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('diagnoses_icd'))

In [None]:
diagnoses_icd_sample_df.to_csv(csv_save_path + "diagnoses_icd.csv")

In [None]:
drgcodes_sample_df = select_data(patients_subject_ids, create_query_scripts('drgcodes'))

In [None]:
drgcodes_sample_df.to_csv(csv_save_path + "drgcodes.csv")

In [None]:
labevents_sample_df = select_data(patients_subject_ids, create_query_scripts('labevents'))

In [None]:
labevents_sample_df.to_csv(csv_save_path + "labevents.csv")

In [None]:
microbiologyevents_sample_df = select_data(patients_subject_ids, create_query_scripts('microbiologyevents'))

In [None]:
microbiologyevents_sample_df.to_csv(csv_save_path + "microbiologyevents.csv")

In [None]:
prescriptions_sample_df = select_data(patients_subject_ids, create_query_scripts('prescriptions'))

In [None]:
prescriptions_sample_df.to_csv(csv_save_path + "prescriptions.csv")

In [None]:
procedures_icd_sample_df = select_data(patients_subject_ids, create_query_scripts('procedures_icd'))

In [None]:
procedures_icd_sample_df.to_csv(csv_save_path + "procedures_icd.csv")

---