# Select patients matching certain criteria from the gaitbase
Read all the patients from the gaitbase database, select patients matching certain criteria (e.g. age, diagnois, etc.), and save them to an excel file

In [1]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
import sqlite3

In [2]:
DB_FILE_NAME = 'Z:/gaitbase/patients.db'
OUT_FNAME = 'C:/Temp/patients.xlsx'
CODE_TYPES = ['H', 'C']
REF_DATE = '110823'
AGE_MIN = 0
AGE_MAX = 25

In [3]:
# Read sqlite query results into a pandas DataFrame
con = sqlite3.connect(DB_FILE_NAME)
df = pd.read_sql_query("SELECT * from patients", con)
con.close()

In [4]:
def selector(row):
    if not (row.patient_code[0] in CODE_TYPES):
        return False
    
    b_date = datetime.strptime(row.ssn[:6], '%d%m%y')
    ref_date = datetime.strptime(REF_DATE, '%d%m%y')

    age = relativedelta(ref_date, b_date).years
    return (age <= AGE_MAX) and (age >= AGE_MIN)

In [5]:
df_filtered = df.loc[df.apply(selector, axis=1)]
df_filtered[['firstname', 'lastname', 'ssn', 'patient_code', 'diagnosis']].to_excel(OUT_FNAME, index=False)