In [1]:
import os
import sys
import re
import time
import random
import warnings
import collections
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

# Milliman MANI File ETL

### Config

In [2]:
data_dir = '/Users/bp/workspace/msh/milliman_data'
current_folder = '20240812'
# pro_file_name = 'ACO Builder MSSP Prospective Explorer v2025.3.txt'
# retro_file_name = 'ACO Builder MSSP Retrospective Explorer v2025.3.txt'
table_prefix = 'milliman_mani'

### DataTables

In [6]:
file_name = 'Medicare Advantage Network Intelligence (MANI) CY2020 Data Tables - v2024.0 - Nationwide.csv'
year = 2020
mani_data_tables_2020_df = pd.read_csv(f'{data_dir}/{current_folder}/{file_name}',  encoding='latin1', low_memory=False)
# display(mani_data_tables_2020_df.head())
mani_data_tables_2020_df = cb_utils.df_format_columns(mani_data_tables_2020_df)
mani_data_tables_2020_df.head()

Unnamed: 0,Year,ACO_Builder_ID,TIN_CCN_Type,TIN_CCN_ID,Provider_Name,PAC_ID,St_MSA,Carrier,PY,Risk_Score_MMWtd,Risk_Score_MMWtd_Region,MA_Benchmark_Rate,MA_Bid_Amount,Paid_atFFSReimb,Paid_atFFSReimb_Region,IP_Admits,IP_Admits_Imputed,IP_Admits_Region,IP_Days,IP_Days_Imputed,IP_Days_Region,SNF_Admits,SNF_Admits_Imputed,SNF_Admits_Region,SNF_Days,SNF_Days_Imputed,SNF_Days_Region,ED,ED_Imputed,ED_Region,Phy_Office,Phy_Office_Imputed,Phy_Office_Region,Rad_Adv_Imag_OP,Rad_Adv_Imag_OP_Imputed,Rad_Adv_Imag_OP_Region,Rad_Adv_Imag_NonHosp,Rad_Adv_Imag_NonHosp_Imputed,Rad_Adv_Imag_NonHosp_Region,Rad_Gen_OP,Rad_Gen_OP_Imputed,Rad_Gen_OP_Region,Rad_Gen_NonHosp,Rad_Gen_NonHosp_Imputed,Rad_Gen_NonHosp_Region,Rad_Therap_OP,Rad_Therap_OP_Imputed,Rad_Therap_OP_Region,Rad_Therap_NonHosp,Rad_Therap_NonHosp_Imputed,Rad_Therap_NonHosp_Region,Prev_Physicals,Prev_Physicals_Imputed,Prev_Physicals_Region,Surgery_ASC,Surgery_ASC_Imputed,Surgery_ASC_Region,Surgery_OP,Surgery_OP_Imputed,Surgery_OP_Region,Part_B_Rx_NonHosp,Part_B_Rx_NonHosp_Imputed,Part_B_Rx_NonHosp_Region,Part_B_Rx_OP,Part_B_Rx_OP_Imputed,Part_B_Rx_OP_Region,Lab_NonHosp,Lab_NonHosp_Imputed,Lab_NonHosp_Region,Lab_OP,Lab_OP_Imputed,Lab_OP_Region
0,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,"Blue Cross of Idaho Health Services, Inc.",70.41667,978.40233,940.96367,910201.5,813222.5967,848763.2,784222.4,17.18238,17.18238,16.17895,95.0,0.0,91.40048,0.0,0.0,2.90345,0.0,0.0,78.62068,66.0,0.0,38.43124,672.25134,2.25134,531.47002,41.0,0.0,49.06599,7.44854,7.44854,11.09374,101.0,0.0,78.88074,67.0,0.0,66.09894,23.63697,5.63697,6.92966,0.0,0.0,2.13013,23.53506,4.53506,37.11633,5.58672,5.58672,7.48148,26.0,0.0,35.62701,102.0,2.0,67.61381,44.0,1.0,79.75242,1065.0,0.0,778.46228,280.0,0.0,535.58037
1,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,FFS,139.0,1738.72311,1806.87182,1617316.0,0.0,1544586.0,1593759.0,44.55842,13.55842,0.0,223.42427,5.42427,0.0,4.10977,4.10977,0.0,148.0,0.0,0.0,62.0,0.0,0.0,1098.0,0.0,0.0,71.0,0.0,0.0,11.04066,11.04066,0.0,103.0,0.0,0.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.97964,12.97964,0.0,18.05664,1.05664,0.0,38.48064,7.48064,0.0,163.0,0.0,0.0,49.0,1.0,0.0,2715.0,0.0,0.0,286.49336,5.49336,0.0
2,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,Humana Inc.,3.33333,37.09248,34.95186,34536.04,30321.35652,49884.03,27422.71,0.0,0.0,0.53436,0.0,0.0,2.2532,0.0,0.0,0.06985,0.0,0.0,1.51062,2.93185,2.93185,1.1261,39.0,0.0,20.82886,1.17593,1.17593,1.91406,1.0,1.0,0.50789,1.0,1.0,2.79236,3.17279,3.17279,2.83264,0.0,0.0,0.32776,0.0,0.0,0.05884,0.0,0.0,1.81274,0.0,0.0,0.30693,11.0,11.0,1.58325,1.0,1.0,3.0275,0.0,0.0,2.70668,57.0,0.0,31.71387,2.4518,2.4518,19.91043
3,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,ID Other Carriers,1.0,8.23925,7.80952,7656.489,6593.75279,3704.882,5740.482,0.0,0.0,0.10484,0.0,0.0,0.46342,0.0,0.0,0.01048,0.0,0.0,0.30821,0.0,0.0,0.26422,12.46735,12.46735,5.23755,1.12693,1.12693,0.46967,0.0,0.0,0.1279,1.49576,1.49576,0.59546,1.0,1.0,0.79047,0.0,0.0,0.02307,0.0,0.0,0.04823,1.0,1.0,0.59125,0.0,0.0,0.13,0.0,0.0,0.40677,1.0,1.0,0.6184,1.0,1.0,0.4047,9.98463,9.98463,8.83154,0.0,0.0,5.51019
4,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,"Molina Healthcare, Inc.",5.0,57.99166,61.51546,53889.91,49180.45071,98154.72,72043.74,1.0,1.0,1.41137,3.85948,3.85948,7.5874,1.0,1.0,0.23034,13.0,0.0,6.15417,2.66142,2.66142,5.6543,66.0,0.0,56.60889,2.29306,2.29306,4.44031,1.85177,1.85177,0.89916,9.44196,9.44196,8.51257,10.40537,10.40537,5.98663,0.0,0.0,0.4553,0.0,0.0,0.0,1.0,1.0,2.95395,2.03786,2.03786,0.49885,5.01633,5.01633,2.40967,28.0,0.0,6.76558,1.0,1.0,7.77077,20.0,0.0,73.59375,15.86381,2.86381,38.70712


Unnamed: 0,year,aco_builder_id,tin_ccn_type,tin_ccn_id,provider_name,pac_id,st_msa,carrier,py,risk_score_mmwtd,risk_score_mmwtd_region,ma_benchmark_rate,ma_bid_amount,paid_atffsreimb,paid_atffsreimb_region,ip_admits,ip_admits_imputed,ip_admits_region,ip_days,ip_days_imputed,ip_days_region,snf_admits,snf_admits_imputed,snf_admits_region,snf_days,snf_days_imputed,snf_days_region,ed,ed_imputed,ed_region,phy_office,phy_office_imputed,phy_office_region,rad_adv_imag_op,rad_adv_imag_op_imputed,rad_adv_imag_op_region,rad_adv_imag_nonhosp,rad_adv_imag_nonhosp_imputed,rad_adv_imag_nonhosp_region,rad_gen_op,rad_gen_op_imputed,rad_gen_op_region,rad_gen_nonhosp,rad_gen_nonhosp_imputed,rad_gen_nonhosp_region,rad_therap_op,rad_therap_op_imputed,rad_therap_op_region,rad_therap_nonhosp,rad_therap_nonhosp_imputed,rad_therap_nonhosp_region,prev_physicals,prev_physicals_imputed,prev_physicals_region,surgery_asc,surgery_asc_imputed,surgery_asc_region,surgery_op,surgery_op_imputed,surgery_op_region,part_b_rx_nonhosp,part_b_rx_nonhosp_imputed,part_b_rx_nonhosp_region,part_b_rx_op,part_b_rx_op_imputed,part_b_rx_op_region,lab_nonhosp,lab_nonhosp_imputed,lab_nonhosp_region,lab_op,lab_op_imputed,lab_op_region
0,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,"Blue Cross of Idaho Health Services, Inc.",70.41667,978.40233,940.96367,910201.5,813222.5967,848763.2,784222.4,17.18238,17.18238,16.17895,95.0,0.0,91.40048,0.0,0.0,2.90345,0.0,0.0,78.62068,66.0,0.0,38.43124,672.25134,2.25134,531.47002,41.0,0.0,49.06599,7.44854,7.44854,11.09374,101.0,0.0,78.88074,67.0,0.0,66.09894,23.63697,5.63697,6.92966,0.0,0.0,2.13013,23.53506,4.53506,37.11633,5.58672,5.58672,7.48148,26.0,0.0,35.62701,102.0,2.0,67.61381,44.0,1.0,79.75242,1065.0,0.0,778.46228,280.0,0.0,535.58037
1,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,FFS,139.0,1738.72311,1806.87182,1617316.0,0.0,1544586.0,1593759.0,44.55842,13.55842,0.0,223.42427,5.42427,0.0,4.10977,4.10977,0.0,148.0,0.0,0.0,62.0,0.0,0.0,1098.0,0.0,0.0,71.0,0.0,0.0,11.04066,11.04066,0.0,103.0,0.0,0.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.97964,12.97964,0.0,18.05664,1.05664,0.0,38.48064,7.48064,0.0,163.0,0.0,0.0,49.0,1.0,0.0,2715.0,0.0,0.0,286.49336,5.49336,0.0
2,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,Humana Inc.,3.33333,37.09248,34.95186,34536.04,30321.35652,49884.03,27422.71,0.0,0.0,0.53436,0.0,0.0,2.2532,0.0,0.0,0.06985,0.0,0.0,1.51062,2.93185,2.93185,1.1261,39.0,0.0,20.82886,1.17593,1.17593,1.91406,1.0,1.0,0.50789,1.0,1.0,2.79236,3.17279,3.17279,2.83264,0.0,0.0,0.32776,0.0,0.0,0.05884,0.0,0.0,1.81274,0.0,0.0,0.30693,11.0,11.0,1.58325,1.0,1.0,3.0275,0.0,0.0,2.70668,57.0,0.0,31.71387,2.4518,2.4518,19.91043
3,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,ID Other Carriers,1.0,8.23925,7.80952,7656.489,6593.75279,3704.882,5740.482,0.0,0.0,0.10484,0.0,0.0,0.46342,0.0,0.0,0.01048,0.0,0.0,0.30821,0.0,0.0,0.26422,12.46735,12.46735,5.23755,1.12693,1.12693,0.46967,0.0,0.0,0.1279,1.49576,1.49576,0.59546,1.0,1.0,0.79047,0.0,0.0,0.02307,0.0,0.0,0.04823,1.0,1.0,0.59125,0.0,0.0,0.13,0.0,0.0,0.40677,1.0,1.0,0.6184,1.0,1.0,0.4047,9.98463,9.98463,8.83154,0.0,0.0,5.51019
4,2020,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,"Molina Healthcare, Inc.",5.0,57.99166,61.51546,53889.91,49180.45071,98154.72,72043.74,1.0,1.0,1.41137,3.85948,3.85948,7.5874,1.0,1.0,0.23034,13.0,0.0,6.15417,2.66142,2.66142,5.6543,66.0,0.0,56.60889,2.29306,2.29306,4.44031,1.85177,1.85177,0.89916,9.44196,9.44196,8.51257,10.40537,10.40537,5.98663,0.0,0.0,0.4553,0.0,0.0,0.0,1.0,1.0,2.95395,2.03786,2.03786,0.49885,5.01633,5.01633,2.40967,28.0,0.0,6.76558,1.0,1.0,7.77077,20.0,0.0,73.59375,15.86381,2.86381,38.70712


In [7]:
conn = cb_utils.get_engine(source='msh_analytics')
table_name = f'{table_prefix}_data_tables_{year}_{current_folder}'
schema = 'raw'
print(f'select * from {schema}.{table_name}')
# pro_df.to_sql(pro_table_name, conn, schema=pro_schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

select * from raw.milliman_mani_data_tables_2020_20240812


In [9]:
mani_data_tables_2020_df.to_sql(table_name, conn, schema=schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

334212

In [10]:
file_name = 'Medicare Advantage Network Intelligence (MANI) CY2021 Data Tables - v2024.0 - Nationwide.csv'
year = 2021
mani_data_tables_2021_df = pd.read_csv(f'{data_dir}/{current_folder}/{file_name}',  encoding='latin1', low_memory=False)
# display(mani_data_tables_2020_df.head())
mani_data_tables_2021_df = cb_utils.df_format_columns(mani_data_tables_2021_df)
mani_data_tables_2021_df.head()

Unnamed: 0,ï»¿year,aco_builder_id,tin_ccn_type,tin_ccn_id,provider_name,pac_id,st_msa,carrier,py,risk_score_mmwtd,risk_score_mmwtd_region,ma_benchmark_rate,ma_bid_amount,paid_atffsreimb,paid_atffsreimb_region,ip_admits,ip_admits_imputed,ip_admits_region,ip_days,ip_days_imputed,ip_days_region,snf_admits,snf_admits_imputed,snf_admits_region,snf_days,snf_days_imputed,snf_days_region,ed,ed_imputed,ed_region,phy_office,phy_office_imputed,phy_office_region,rad_adv_imag_op,rad_adv_imag_op_imputed,rad_adv_imag_op_region,rad_adv_imag_nonhosp,rad_adv_imag_nonhosp_imputed,rad_adv_imag_nonhosp_region,rad_gen_op,rad_gen_op_imputed,rad_gen_op_region,rad_gen_nonhosp,rad_gen_nonhosp_imputed,rad_gen_nonhosp_region,rad_therap_op,rad_therap_op_imputed,rad_therap_op_region,rad_therap_nonhosp,rad_therap_nonhosp_imputed,rad_therap_nonhosp_region,prev_physicals,prev_physicals_imputed,prev_physicals_region,surgery_asc,surgery_asc_imputed,surgery_asc_region,surgery_op,surgery_op_imputed,surgery_op_region,part_b_rx_nonhosp,part_b_rx_nonhosp_imputed,part_b_rx_nonhosp_region,part_b_rx_op,part_b_rx_op_imputed,part_b_rx_op_region,lab_nonhosp,lab_nonhosp_imputed,lab_nonhosp_region,lab_op,lab_op_imputed,lab_op_region
0,2021,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,"Blue Cross of Idaho Health Services, Inc.",57.25,668.87944,661.49104,654402.9,574624.7115,672526.3,681895.9,17.84242,17.84242,11.64326,90.06221,35.06221,69.55973,0.0,0.0,1.88079,0.0,0.0,43.92307,90.0,0.0,31.76057,458.59408,5.59408,431.79256,47.0,0.0,43.25978,9.97145,9.97145,8.01355,72.0,0.0,65.61929,45.0,0.0,56.11757,0.0,0.0,4.74798,0.0,0.0,1.40504,20.30391,6.30391,30.12698,12.0,0.0,4.65597,13.32008,13.32008,31.10315,60.0,1.0,54.90814,2.0,2.0,69.9102,406.0,0.0,632.24219,87.0,0.0,461.80923
1,2021,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,CVS Health Corporation,2.0,21.77081,22.40499,22266.02,18505.3011,24177.59,22710.2,0.0,0.0,0.40084,0.0,0.0,1.39238,0.0,0.0,0.13186,0.0,0.0,3.42773,0.0,0.0,1.81421,3.24653,3.24653,10.13086,0.0,0.0,1.53442,0.0,0.0,0.2742,0.0,0.0,1.49231,0.0,0.0,0.59592,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.86499,0.0,0.0,0.20563,0.0,0.0,1.62964,0.0,0.0,1.13379,0.0,0.0,1.47675,135.0,0.0,26.33203,0.0,0.0,14.41173
2,2021,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,FFS,134.33333,1615.36737,1571.66835,1588136.0,0.0,1326359.0,1559343.0,12.0,0.0,0.0,126.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,48.0,0.0,0.0,874.0,0.0,0.0,104.0,0.0,0.0,0.0,0.0,0.0,77.0,0.0,0.0,44.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,45.0,0.0,0.0,0.0,0.0,0.0,27.0,0.0,0.0,96.0,0.0,0.0,18.0,0.0,0.0,3083.0,0.0,0.0,277.0,0.0,0.0
3,2021,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,Humana Inc.,4.08333,64.41775,62.56212,62425.65,56951.49747,66822.03,50424.6,1.0,1.0,0.94727,3.63629,3.63629,5.31696,0.0,0.0,0.20159,0.0,0.0,5.00647,1.13549,1.13549,1.96416,54.0,0.0,31.62614,1.0,1.0,3.25092,3.12112,3.12112,0.95516,8.01766,8.01766,4.32982,1.94954,1.94954,3.95523,0.0,0.0,0.39076,0.0,0.0,0.24575,2.77182,2.77182,2.13201,0.0,0.0,0.35959,4.70198,4.70198,2.11033,1.0,1.0,3.91682,41.0,1.0,4.17636,115.0,0.0,40.2312,34.0,0.0,44.50382
4,2021,00004B7C4023,TIN,474454561.0,RINDFLEISCH FAMILY PRACTICE PLLC,9335446202,ID-26820,ID Other Carriers,2.0,6.49522,7.23591,6367.185,5406.79273,9681.091,7715.731,0.0,0.0,0.08188,0.0,0.0,0.38103,0.0,0.0,0.00529,0.0,0.0,0.10307,0.0,0.0,0.20421,9.22153,9.22153,7.53461,0.0,0.0,0.50977,0.0,0.0,0.15916,0.0,0.0,0.8432,1.0,1.0,1.27258,0.0,0.0,0.07919,0.0,0.0,0.06078,1.88283,1.88283,1.28943,0.0,0.0,0.1956,1.0,1.0,0.65497,1.0,1.0,0.87757,6.4399,6.4399,0.62889,26.0,0.0,13.86328,1.29253,1.29253,7.2995


In [11]:
conn = cb_utils.get_engine(source='msh_analytics')
table_name = f'{table_prefix}_data_tables_{year}_{current_folder}'
schema = 'raw'
print(f'select * from {schema}.{table_name}')
# pro_df.to_sql(pro_table_name, conn, schema=pro_schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

select * from raw.milliman_mani_data_tables_2021_20240812


In [12]:
mani_data_tables_2021_df.to_sql(table_name, conn, schema=schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

354510

### Scorecard

In [13]:
file_name = 'Medicare Advantage Network Intelligence (MANI) CY2020 Scorecard - v2024.0 - Nationwide.xlsb'
year = 2020
score_card_2020_df = pd.read_excel(f'{data_dir}/{current_folder}/{file_name}')
score_card_2020_df = cb_utils.df_format_columns(score_card_2020_df)
print(f'{score_card_2020_df.shape[0]} rows, {score_card_2020_df.shape[1]} columns')
score_card_2020_df.head()

NameError: name 'retro_file_name' is not defined

In [None]:
conn = cb_utils.get_engine(source='msh_analytics')
table_name = f'{table_prefix}_scorecard_{year}_{current_folder}'
schema = 'raw'
print(f'select * from {schema}.{table_name}')

In [None]:
score_card_2020_df.to_sql(table_name, conn, schema=schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

In [None]:
file_name = 'Medicare Advantage Network Intelligence (MANI) CY2021 Scorecard - v2024.0 - Nationwide.xlsb'
year = 2021
score_card_2021_df = pd.read_excel(f'{data_dir}/{current_folder}/{file_name}')
score_card_2021_df = cb_utils.df_format_columns(score_card_2021_df)
print(f'{score_card_2021_df.shape[0]} rows, {score_card_2021_df.shape[1]} columns')
score_card_2021_df.head()

In [None]:
conn = cb_utils.get_engine(source='msh_analytics')
table_name = f'{table_prefix}_scorecard_{year}_{current_folder}'
schema = 'raw'
print(f'select * from {schema}.{table_name}')

In [None]:
score_card_2020_df.to_sql(table_name, conn, schema=schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'