In [22]:
import psycopg2
from configparser import ConfigParser
import pandas as pd

import common

In [23]:
csv_save_path="C:\\Users\\shrus\\Documents\\Synthetic-data-generation\\sampled_data_csv_100\\"

## Connect to postgresql

In [24]:
def postgresql_config(filename='config.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)

    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return db

In [25]:
def test_postgresql_connect():
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # read connection parameters
        params = postgresql_config()

        print('------Test Connection------')

        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)

        # create a cursor
        cur = conn.cursor()

        # execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')

        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)

        # close the communication with the PostgreSQL
        cur.close()
        print('Database connection closed.')
        # Successfully connected, return True
        print('Successfully connected!')
        return True
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
        print('------Test Finished------')

In [26]:
test_postgresql_connect()


------Test Connection------
Connecting to the PostgreSQL database...
PostgreSQL database version:
('PostgreSQL 15.3, compiled by Visual C++ build 1914, 64-bit',)
Database connection closed.
Successfully connected!
------Test Finished------


True

### Querying Patient Data from PostgreSQL

In [27]:
def get_patients():
    """ query data from the patients table """
    conn = None
    try:
        params = postgresql_config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()

        cur.execute("SELECT * from patients")
        print("The number of parts: ", cur.rowcount)
        row = cur.fetchone()

        while row is not None:
            print(row)
            row = cur.fetchone()

        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()

In [28]:
# Select some patients as sample
patient_num = 100

sql = "SELECT * FROM mimiciii.patients p order by random() limit " + str(patient_num)
conn = psycopg2.connect(**postgresql_config())

In [29]:
# Execute the sql and form the result as DataFrame
random_patients_df = pd.read_sql(sql, conn)

  random_patients_df = pd.read_sql(sql, conn)


In [30]:
random_patients_df

Unnamed: 0,row_id,subject_id,gender,dob,dod,dod_hosp,dod_ssn,expire_flag
0,39119,71382,M,2135-06-30,NaT,NaT,NaT,0
1,28999,30853,M,2125-09-19,NaT,NaT,NaT,0
2,13496,14272,M,2122-07-13,NaT,NaT,NaT,0
3,16421,17359,M,2105-10-03,NaT,NaT,NaT,0
4,40804,78012,M,2114-01-08,NaT,NaT,NaT,0
...,...,...,...,...,...,...,...,...
95,31889,44303,M,2137-05-08,2167-02-13,2167-02-13,NaT,1
96,36859,62941,F,1872-02-14,NaT,NaT,NaT,0
97,44792,93391,M,2072-10-10,NaT,NaT,NaT,0
98,154,164,M,2034-04-26,2117-01-16,2117-01-16,NaT,1


In [31]:
# Save patient sample
random_patients_df.to_csv(csv_save_path + "patients.csv", index=None)

In [32]:
# Pick Subject_id
patients_subject_ids = random_patients_df['subject_id']
patients_subject_ids

0     71382
1     30853
2     14272
3     17359
4     78012
      ...  
95    44303
96    62941
97    93391
98      164
99    75514
Name: subject_id, Length: 100, dtype: int64

In [33]:
random_patients_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   row_id       100 non-null    int64         
 1   subject_id   100 non-null    int64         
 2   gender       100 non-null    object        
 3   dob          100 non-null    datetime64[ns]
 4   dod          41 non-null     datetime64[ns]
 5   dod_hosp     24 non-null     datetime64[ns]
 6   dod_ssn      32 non-null     datetime64[ns]
 7   expire_flag  100 non-null    int64         
dtypes: datetime64[ns](4), int64(3), object(1)
memory usage: 6.4+ KB
