In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# replace NaN reject_code with 0
claims_df = pd.read_csv('../data/processed/dim_claims_train.csv').fillna(0)
# type cast reject_code as int for nicer printing
claims_df.loc[:, 'reject_code'] = claims_df['reject_code'].astype(int)

date_df = pd.read_csv('../data/processed/dim_date_train.csv')
pa_df = pd.read_csv('../data/processed/dim_pa_train.csv')
bridge_df = pd.read_csv('../data/processed/bridge_train.csv')

## What is the average rate of approved prior authorizations?

In [3]:
print(f"{round(100 * pa_df['pa_approved'].mean(), 1)}% of PAs ({pa_df['pa_approved'].count()} records) are approved.")

73.5% of PAs (444682 records) are approved.


## What is the aggregate rate of approved prior authorizations, segmented by `correct_diagnosis`, `tried_and_failed`, and `contraindication`?
- A correct diagnosis for the drug prescribed **increases** approval rate by 3.9%.
- Trying and failing a generic alternative **increases** approval rate by 11.1%.
- A drug with a contraindication **decreases** approval rate by 24.5%.

In [4]:
for name in ['correct_diagnosis', 'tried_and_failed', 'contraindication']:
    for outcome, view in pa_df.groupby(name):
        print(f"{round(100 * view['pa_approved'].mean(), 1)}% of PAs ({view['pa_approved'].count()} records) are approved if the {name} is {bool(outcome)}.")
    print('')

70.4% of PAs (89040 records) are approved if the correct_diagnosis is False.
74.3% of PAs (355642 records) are approved if the correct_diagnosis is True.

67.9% of PAs (221940 records) are approved if the tried_and_failed is False.
79.0% of PAs (222742 records) are approved if the tried_and_failed is True.

78.4% of PAs (355368 records) are approved if the contraindication is False.
53.9% of PAs (89314 records) are approved if the contraindication is True.



## How does the payer (`bin`) influence the average rate of prior authorization approval?
* Payer `999001` approves 90.7% of PAs, but has the least amount of PAs (51344).

In [5]:
merged = bridge_df.merge(claims_df, on='dim_claim_id').merge(pa_df, on='dim_pa_id')

for payer, view in merged.groupby('bin'):
    print(f"{round(100 * view['pa_approved'].mean(), 1)}% of PAs ({view['pa_approved'].count()}) are approved with payer of {payer}.")

78.7% of PAs (106801) are approved with payer of 417380.
71.1% of PAs (193722) are approved with payer of 417614.
62.9% of PAs (92815) are approved with payer of 417740.
90.7% of PAs (51344) are approved with payer of 999001.


## How does the drug (`drug`) influence the average rate of prior authorization approval?

In [6]:
for drug, view in merged.groupby('drug'):
    print(f"{round(100 * view['pa_approved'].mean(), 1)}% of PAs ({view['pa_approved'].count()}) are approved with drug of {drug}.")

76.3% of PAs (230732) are approved with drug of A.
75.9% of PAs (123482) are approved with drug of B.
63.1% of PAs (90468) are approved with drug of C.


## How does the reject code (`reject_code`) influence the average rate of prior authorization approval?
- 50% of PAs with reject code 70 (drug not covered by plan and not on formulary) approved.
- 94.8% of PAs with reject code 75 (drug is in formulary but does not have preferred status) approved.
- 88.4% of PAs with reject code 76 (drug is covered, but plan limits have been exceeded) approved.

In [7]:
for reject_code, view in merged.groupby('reject_code'):
    print(f"{round(100 * view['pa_approved'].mean(), 1)}% of PAs ({view['pa_approved'].count()}) are approved with reject code of {reject_code}.")

50.0% of PAs (201599) are approved with reject code of 70.
94.8% of PAs (173935) are approved with reject code of 75.
88.4% of PAs (69148) are approved with reject code of 76.


## How does the combination of payer (`bin`) and reject code (`reject_code`) influence the average rate of prior authorization approval?
- Reject code 70 has the lowest rate of approval for each drug.
- Reject code 75 has the highest rate of approval for each drug.
- Payer `999001` always uses reject code 76 and has a higher rate of approval compared to the other payer with reject code 76 for that specific drug.
- For each drug , each payer *except* `999001` has a single code they use. Each payer also only uses each code once.

In [8]:
split = pd.DataFrame(merged.groupby(['drug', 'bin', 'reject_code'])['pa_approved'].mean())
split.loc[:, 'pa_approved_count'] = merged.groupby(['drug', 'bin', 'reject_code'])['pa_approved'].count()
split = split.rename(columns={'pa_approved': 'pa_approved_percent'})
display(split.sort_index(level=[0, 1, 2]))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
drug,bin,reject_code,Unnamed: 3_level_1,Unnamed: 4_level_1
A,417380,75,0.990274,70844
A,417614,70,0.582805,125179
A,417740,76,0.902317,8630
A,999001,76,0.963265,26079
B,417380,76,0.903797,3503
B,417614,75,0.97374,62872
B,417740,70,0.389096,43966
B,999001,76,0.929305,13141
C,417380,70,0.330683,32454
C,417614,76,0.636396,5671


## How does the combination of contraindication (`contraindication`), generic failure (`tried_and_failed`), and correct diagnosis (`correct_diagnosis`) influence the average rate of prior authorization approval?
- The magnitude of influence of each feature is consistent with the averages above.

In [9]:
split = pd.DataFrame(merged.groupby(['contraindication', 'tried_and_failed', 'correct_diagnosis'])['pa_approved'].mean())
split.loc[:, 'pa_approved_count'] = merged.groupby(['contraindication', 'tried_and_failed', 'correct_diagnosis'])['pa_approved'].count()
split = split.rename(columns={'pa_approved': 'pa_approved_percent'})

display(split.sort_values('pa_approved_percent', ascending=False))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.844382,142304
0,1,0,0.810968,35687
0,0,1,0.738509,141959
0,0,0,0.69891,35418
1,1,1,0.611386,35799
1,1,0,0.567806,8952
1,0,1,0.482996,35580
1,0,0,0.43894,8983


## For each drug (`drug`) and payer (`bin`), how does the contraindication (`contraindication`), generic failure (`tried_and_failed`), and correct diagnosis (`correct_diagnosis`) influence the average rate of prior authorization approval?
- The relative magnitude of each affect is the same as seen in the previous result. However, the absolute magnitude is different for each payer/drug combination.

In [10]:
for drug in merged['drug'].unique():
    print(f'-- Drug {drug} --')
    for payer in merged['bin'].unique():
        section = merged.loc[(merged['drug'] == drug) & (merged['bin'] == payer)]
        split = pd.DataFrame(section.groupby(['contraindication', 'tried_and_failed', 'correct_diagnosis'])['pa_approved'].mean())
        split.loc[:, 'pa_approved_count'] = section.groupby(['contraindication', 'tried_and_failed', 'correct_diagnosis'])['pa_approved'].count()
        split = split.rename(columns={'pa_approved': 'pa_approved_percent'})
        
        reject_code = section['reject_code'].unique()[0]
        print(f'PA approval rate for payer {payer} (reject_code {reject_code}) and drug {drug}')
        display(split.sort_values('pa_approved_percent', ascending=False))

-- Drug A --
PA approval rate for payer 417380 (reject_code 75) and drug A


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.999067,22500
0,1,0,0.998938,5650
0,0,1,0.996427,22667
0,0,0,0.994802,5771
1,1,1,0.98193,5700
1,1,0,0.971034,1450
1,0,1,0.946662,5662
1,0,0,0.927978,1444


PA approval rate for payer 417740 (reject_code 76) and drug A


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.972973,2738
0,1,0,0.962798,672
0,0,1,0.930772,2759
0,0,0,0.898827,682
1,1,1,0.828854,707
1,1,0,0.787565,193
1,0,1,0.656904,717
1,0,0,0.530864,162


PA approval rate for payer 999001 (reject_code 76) and drug A


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.9942,8276
0,1,0,0.991889,2096
0,0,1,0.977947,8389
0,0,0,0.976329,2070
1,1,1,0.929672,2076
1,1,0,0.901354,517
1,0,1,0.829926,2152
1,0,0,0.809145,503


PA approval rate for payer 417614 (reject_code 70) and drug A


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.770531,40049
0,1,0,0.714129,10008
0,0,1,0.578814,39923
0,0,0,0.505073,9955
1,1,1,0.37617,10147
1,1,0,0.307169,2497
1,0,1,0.186338,10057
1,0,0,0.139599,2543


-- Drug B --
PA approval rate for payer 417380 (reject_code 76) and drug B


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.980716,1089
0,1,0,0.961538,260
0,0,1,0.927928,1110
0,0,0,0.905724,297
1,1,1,0.823333,300
1,1,0,0.740741,81
1,0,0,0.662791,86
1,0,1,0.660714,280


PA approval rate for payer 417740 (reject_code 70) and drug B


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.568442,14063
0,1,0,0.505501,3636
0,0,1,0.369595,14037
0,0,0,0.291154,3414
1,1,1,0.194678,3570
1,1,0,0.139908,872
1,0,1,0.06765,3459
1,0,0,0.045902,915


PA approval rate for payer 999001 (reject_code 76) and drug B


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.985312,4221
0,1,0,0.978957,1093
0,0,1,0.954115,4228
0,0,0,0.951389,1008
1,1,1,0.852262,1083
1,1,0,0.803636,275
1,0,1,0.700306,981
1,0,0,0.630952,252


PA approval rate for payer 417614 (reject_code 75) and drug B


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.997282,20237
0,1,0,0.994058,5049
0,0,1,0.988599,19999
0,0,0,0.980727,5033
1,1,1,0.944411,4965
1,1,0,0.921801,1266
1,0,1,0.871414,5055
1,0,0,0.829653,1268


-- Drug C --
PA approval rate for payer 417380 (reject_code 70) and drug C


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.506289,10494
0,1,0,0.410991,2584
0,0,1,0.306833,10201
0,0,0,0.229976,2522
1,1,1,0.153584,2637
1,1,0,0.103348,687
1,0,1,0.056831,2657
1,0,0,0.029762,672


PA approval rate for payer 417740 (reject_code 75) and drug C


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.942748,12838
0,1,0,0.922303,3282
0,0,1,0.852971,12977
0,0,0,0.818462,3239
1,1,1,0.700189,3182
1,1,0,0.647668,772
1,0,1,0.503315,3167
1,0,0,0.430446,762


PA approval rate for payer 999001 (reject_code 76) and drug C


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.900074,4033
0,1,0,0.881466,928
0,0,1,0.775867,3837
0,0,0,0.718191,951
1,1,1,0.58156,987
1,1,0,0.488889,225
1,0,1,0.380849,919
1,0,0,0.340164,244


PA approval rate for payer 417614 (reject_code 76) and drug C


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pa_approved_percent,pa_approved_count
contraindication,tried_and_failed,correct_diagnosis,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,1,0.838052,1766
0,1,0,0.792541,429
0,0,1,0.640284,1832
0,0,0,0.586134,476
1,1,1,0.440449,445
1,1,0,0.333333,117
1,0,1,0.185654,474
1,0,0,0.106061,132
