In [65]:
import json
import os
import pandas as pd
import time

from datetime import date as d
import argparse
import csv
import itertools as it
from collections import OrderedDict

import sys
import yaml
import pandas.io.sql as sqlio
import psycopg2

import hcc_risk_models as hrm

In [63]:
def small_test():

    #model = 'V2217_79_O1'  # accepts icd10 only
    model = 'V2216_79_O2'

    demographics = pd.DataFrame({
        'pt_id': ['07358519','07098439'],
        'sex': [1,1],
        'dob': ['1944-6-6', '1946-1-1'],
        'ltimcaid': [0,0],
        'nemcaid': [0,0],
        'orec': [0,0],
    })

    diagnoses = pd.DataFrame({
        'pt_id': ['07358519','07358519','07358519','07358519','07358519','07358519',
                 '07358519','07358519','07358519','07358519','07358519','07358519',
                 '07358519','07358519','07358519','07358519','07358519','07358519',
                 '07358519','07358519','07358519','07358519','07358519','07358519',
                 '07358519','07358519','07358519','07358519','07358519','07358519',
                 '07098439','07098439','07098439','07098439','07098439','07098439',
                 '07098439','07098439','07098439'
                 ],
        'diag_code': ['S0990XA', 'M1990', 'M79642', 'I8310', 'M79674', 'I480', 
                      'M79606','M1711', 'G629', 'I872', 'I2510', 'M25432', 
                      'W19XXXA', 'S5002XA','M79675', 'I10', 'G92', 'J449', 
                      'S61213A', 'Z5181', 'R55', 'B351','M79672', 'M7989', 
                      'Z6828', 'I4891', 'M25422', 'M25532', 'Z01810','Z86718',
                      'I10','V722','E785','Z0120','Z01818','M542','E119','M4712','M4722'],
    
        'diag_type': [0,0,0,0,0,0,
                      0,0,0,0,0,0,
                      0,0,0,0,0,0,
                      0,0,0,0,0,0,
                      0,0,0,0,0,0,
                      0,9,0,0,0,0,0,0,0],
    })

    result = hrm.evaluate_model(model, demographics, diagnoses)
    return result

In [115]:
def big_test ():
    model = 'V2216_79_O2'
    data_science_repo_dir = '/Users/elsie/data-science-repo/'
    creds_file='/Users/elsie/creds.yml'
    redshift_schema = 'data_science'
    long_form_dx_table = 'bcop_dx_episode'
    
    conn_string = os.environ.get("DW_DEV_CONN")
    conn = psycopg2.connect(conn_string)
    cur = conn.cursor()
    cur.execute("SET search_path TO data_science")
    sql = """ SELECT DISTINCT episode_id, diagnosis_code, icd_version, '1' as sex_cd, 
    '1944-06-06' as date_of_birth,  '0' as mcaid_cd FROM data_science.{}""".format(long_form_dx_table)

    episode_dx = sqlio.read_sql_query(sql, conn)
    print ("Number of episodes: {}".format(len(episode_dx['episode_id'].unique())))
    print ("Number of dx codes: {}".format(len(episode_dx)))
    
    demog = episode_dx[['episode_id','sex_cd','date_of_birth']].drop_duplicates()
    zerolist =  [0] * len(dem)
    demographics = pd.DataFrame({
        'pt_id': dem['episode_id'],
        'sex': dem['sex_cd'],
        'dob': dem['date_of_birth'],
        'ltimcaid': zerolist,
        'nemcaid': zerolist,
        'orec': zerolist,
        })
    diagnoses = pd.DataFrame({
    'pt_id': episode_dx['episode_id'],
    'diag_code': episode_dx['diagnosis_code'],
    'diag_type': episode_dx['icd_version'].astype(int),
    })
    
    start = time.clock()
    result = hrm.evaluate_model(model, demographics, diagnoses)  
    print ("Execution time: {}".format(time.clock() - start))
    return result
    

In [116]:
results = None
if __name__ == '__main__':
    results = big_test()

Number of episodes: 3128
Number of dx codes: 65722
Execution time: 33.16753


In [122]:
summary = pd.DataFrame(columns=['episode_id','diagnoses'])
for p in results['patients']:
    #print ("Episode: {}".format(p['pt_id']))
    diags = p['diagnoses_to_hccs']
    diags_df = pd.DataFrame(diags)
    hcc_list = []
    if ('hcc' in diags_df.columns):
        hcc_list = list(diags_df['hcc'].unique())
    summary = summary.append({'episode_id': p['pt_id'], 'diagnoses': hcc_list},ignore_index=True)
    #print (diags_df['hcc'].unique())

In [123]:
summary

Unnamed: 0,episode_id,diagnoses
0,07457905,[]
1,08481284,"[96, 40]"
2,09809617,"[19, 135, 84, 40]"
3,07053432,[11]
4,09591544,[55]
5,24516974,[]
6,12766367,[40]
7,12617300,[48]
8,24681517,"[19, 58]"
9,10419486,[]


In [124]:
conn_string = os.environ.get("DW_DEV_CONN")
conn = psycopg2.connect(conn_string)
cur = conn.cursor()
cur.execute("SET search_path TO data_science")
sql = """SELECT * FROM data_science.bcop_episode_hcc_risk"""
clarify_hcc = sqlio.read_sql_query(sql, conn)

In [125]:
cfy_hcc = pd.DataFrame(columns=['episode_id','diagnoses'])
for r in clarify_hcc:
    clarify_hcc.head(2)

Unnamed: 0,episode_id,hcc1,hcc2,hcc6,hcc8,hcc9,hcc10,hcc11,hcc12,hcc17,...,hcc162,hcc166,hcc167,hcc169,hcc170,hcc173,hcc176,hcc186,hcc188,hcc189
0,7011570,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,7117254,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
