In [None]:
import numpy as np
import _pickle as pickle
import math
from scipy import stats
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [None]:
pid_claim_map = pickle.load(open('../PMCA/pid_claim_map','rb'))
claim_info_map = pickle.load(open('../PMCA/claim_info_map','rb'))

In [None]:
PFK_ICD_DATA_TABLE = pd.read_csv(open('./PFK_ICD_DATA_TABLE - Copy.csv'),dtype=str,header=None)
icd10toicd9 = pickle.load(open('./icd10toicd9','rb'))

In [None]:
nonzero_index = pickle.load(open('../table/nonzero_index','rb'))
group_id = pickle.load(open('../table/group_id','rb'))
zero_index = []
for i in range(len(pid_claim_map)):
    if i not in nonzero_index: zero_index.append(i)

In [None]:
code_cat_map = {}
for code, cat in zip(PFK_ICD_DATA_TABLE[0],PFK_ICD_DATA_TABLE[1]):
    code_cat_map[code.replace('.', '')] = cat

In [None]:
def type_flag(p_codes):
    flags = [0]
    for c in p_codes:
        if c in code_cat_map:
            flags.append(int(code_cat_map[c]))
    return max(flags)

In [None]:
def PT_check(pid_claim_map, claim_info_map, early_age):
    pid_codes_list = []
    pid_type_list = []
    icd_codes = []
    for p in pid_claim_map:
        p_codes = []
        for claim in pid_claim_map[p]:
            for info in claim_info_map[claim]:
                svcdt = info[4]
                dob = info[6]
                if early_age == 1:
                    if (svcdt - dob).days > 2*365 or (svcdt - dob).days < 0.5*365:continue 
                if early_age == 0:
                    if (svcdt - dob).days > 5*365 or (svcdt - dob).days < 2*365:continue      
                p_codes.extend(info[1])
                icd_codes.extend(info[1])
        p_codes = [x.replace('.','') for x in p_codes if len(x)>0]
        p_codes = [icd10toicd9[c] if c in icd10toicd9 else c for c in p_codes]
        p_codes = list(set(p_codes))
        pid_codes_list.append(p_codes)
        pid_type_list.append(type_flag(p_codes))
    return pid_codes_list, pid_type_list

In [None]:
_, pid_type_list_early = PT_check(pid_claim_map, claim_info_map, early_age=1)
_, pid_type_list_late = PT_check(pid_claim_map, claim_info_map, early_age=0)

In [None]:
def bar_list(values, names):
    count = Counter(values)
    count_names = [0,1,2,3]
    bar = [(count[0]+count[1])/len(values)] + [count[i]/len(values) for i in count_names]
    return bar

def analysis_plot(patient_condition, zero_index, group_id):
    barWidth = 0.9
    patient_condition = np.array(patient_condition)
    names = ['1A+1B', '1A', '1B', 'TYPE-2', 'NONE']
    colors = cm.rainbow(np.linspace(0, 1, 6))
    
    bar_overall = bar_list(patient_condition, names)
    bar_group_0 = bar_list(patient_condition[zero_index], names)
    bar_group_1 = bar_list(patient_condition[np.array(nonzero_index)[group_id[0]]], names)
    bar_group_2 = bar_list(patient_condition[np.array(nonzero_index)[group_id[1]]], names)
    bar_group_3 = bar_list(patient_condition[np.array(nonzero_index)[group_id[2]]], names)
    bar_group_4 = bar_list(patient_condition[np.array(nonzero_index)[group_id[3]]], names)
    gap = [0]*len(names)
    
    r1 = [1+ x*7 for x in range(len(names))]
    r2 = [2+ x*7 for x in range(len(names))]
    r3 = [3+ x*7 for x in range(len(names))]
    r4 = [4+ x*7 for x in range(len(names))]
    r5 = [5+ x*7 for x in range(len(names))]
    r6 = [6+ x*7 for x in range(len(names))]
    r7 = [7+ x*7 for x in range(len(names))]
    
    plt.xticks([3 + x*7 for x in range(len(names))], [x[2:15] for x in names])

    plt.bar(r1, bar_overall, alpha = 0.5, width = barWidth, color = colors[0], label='bar_overall, n='+str(len(patient_condition)))
    plt.bar(r2, bar_group_0, alpha = 0.5, width = barWidth, color = colors[1], label='bar_group_0, n='+str(len(patient_condition[zero_index])))
    plt.bar(r3, bar_group_1, alpha = 0.5, width = barWidth, color = colors[2], label='bar_group_1, n='+str(len(patient_condition[np.array(nonzero_index)[group_id[0]]])))
    plt.bar(r4, bar_group_2, alpha = 0.5, width = barWidth, color = colors[3], label='bar_group_2, n='+str(len(patient_condition[np.array(nonzero_index)[group_id[1]]])))
    plt.bar(r5, bar_group_3, alpha = 0.5, width = barWidth, color = colors[4], label='bar_group_3, n='+str(len(patient_condition[np.array(nonzero_index)[group_id[2]]])))
    plt.bar(r6, bar_group_4, alpha = 0.5, width = barWidth, color = colors[5], label='bar_group_4, n='+str(len(patient_condition[np.array(nonzero_index)[group_id[3]]])))
    plt.bar(r7, gap, width = barWidth)

    plt.legend()
    plt.show()

In [None]:
analysis_plot(pid_type_list_late, zero_index, group_id)

In [None]:
analysis_plot(pid_type_list_late, zero_index, group_id)