In [1]:
from hccpy.hcc import HCCEngine
import os
import sys
import time
from dateutil.relativedelta import relativedelta
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd

sys.path.append('../src')
import cb_utils

pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

In [2]:
query = "select * from junk.v24_sample_members vs where ttype = '3. cca_year'"
df = cb_utils.sql_query_to_df(query, source='msh_analytics', use_cache=False)

Pulling query from db


In [3]:
display(df.shape)
df.head()

(4201, 9)

Unnamed: 0,ttype,patient_id,patient_age,is_medicaid,is_disabled,gender,unformatted_hcc_icd10s,elig,orec
0,3. cca_year,13472,69.0,False,False,m,[F331],CNA,0
1,3. cca_year,56746,68.0,False,False,f,[],CNA,0
2,3. cca_year,13478,66.0,False,False,f,[],CNA,0
3,3. cca_year,56758,90.0,True,False,f,[F324],CFA,0
4,3. cca_year,57991,73.0,True,False,f,[],CFA,0


In [4]:
df.gender = df.gender.str.upper()
df.head()

Unnamed: 0,ttype,patient_id,patient_age,is_medicaid,is_disabled,gender,unformatted_hcc_icd10s,elig,orec
0,3. cca_year,13472,69.0,False,False,M,[F331],CNA,0
1,3. cca_year,56746,68.0,False,False,F,[],CNA,0
2,3. cca_year,13478,66.0,False,False,F,[],CNA,0
3,3. cca_year,56758,90.0,True,False,F,[F324],CFA,0
4,3. cca_year,57991,73.0,True,False,F,[],CFA,0


In [5]:
he = HCCEngine()

In [6]:
s= df.loc[0]
s.patient_age

69.0

In [7]:
he.profile(dx_lst=s.unformatted_hcc_icd10s,
           age=s.patient_age,
           sex=s.gender,
           elig=s.elig,
           orec=s.orec,
           medicaid=s.is_medicaid)

{'risk_score': 0.617,
 'details': {'CNA_M65_69': 0.308, 'CNA_HCC59': 0.309, 'CNA_D1': 0.0},
 'hcc_lst': ['HCC59', 'D1'],
 'hcc_map': {'F331': ['HCC59']},
 'parameters': {'age': 69.0,
  'sex': 'M',
  'elig': 'CNA',
  'medicaid': False,
  'disabled': 0,
  'origds': 0}}

In [8]:
risk_scores = []
details = []
hcc_lsts = []
hcc_maps = []
for i, s in df.iterrows():
    result = he.profile(dx_lst=s.unformatted_hcc_icd10s,
                         age=s.patient_age,
                         sex=s.gender,
                         elig=s.elig,
                         orec=s.orec,
                         medicaid=s.is_medicaid)
    risk_scores.append(result['risk_score'])
    details.append(str(result['details']))
    hcc_lsts.append(str(result['hcc_lst']))
    hcc_maps.append(str(result['hcc_map']))

In [9]:
df = df.assign(risk_score=risk_scores, details=details, hcc_lst=hcc_lsts, hcc_map=hcc_maps)
df.head()

Unnamed: 0,ttype,patient_id,patient_age,is_medicaid,is_disabled,gender,unformatted_hcc_icd10s,elig,orec,risk_score,details,hcc_lst,hcc_map
0,3. cca_year,13472,69.0,False,False,M,[F331],CNA,0,0.617,"{'CNA_M65_69': 0.308, 'CNA_HCC59': 0.309, 'CNA...","['HCC59', 'D1']",{'F331': ['HCC59']}
1,3. cca_year,56746,68.0,False,False,F,[],CNA,0,0.323,{'CNA_F65_69': 0.323},[],{}
2,3. cca_year,13478,66.0,False,False,F,[],CNA,0,0.323,{'CNA_F65_69': 0.323},[],{}
3,3. cca_year,56758,90.0,True,False,F,[F324],CFA,0,1.286,"{'CFA_F90_94': 0.987, 'CFA_LTIMCAID': 0.0, 'CF...","['HCC59', 'D1']",{'F324': ['HCC59']}
4,3. cca_year,57991,73.0,True,False,F,[],CFA,0,0.519,"{'CFA_F70_74': 0.519, 'CFA_LTIMCAID': 0.0}",[],{}


In [None]:
engine = cb_utils.get_engine(source='msh_analytics')
df.to_sql('qa_hcc_py', con=engine, schema='junk', index=False, method='multi')#, if_exists='replace')