In [1]:
import os
import sys
import re
import time
import random
import warnings
import collections
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

# Milliman PAC File ETL
1) log into SFTP site: https://secureftp.milliman.com  
2) In the "from milliman" folder, go to the latest directory  
     - In this case, the latest directory is "20221216"  
3) Make sure you have access to the following folder: \\Main Street Health\MSH_Strategy_Group - ACO Analyses\Raw MIlliman Flat FIles\  
     - create folder with same datestamp as file from milliman, in this case "20221216"  
4) Download the files from the SFTP site  
     - in this case there are 3  
5) copy the data disctionary to the folder on the network drive.  
6) download the prospective and retrospective CSV files into database tables in both the MSH and CB datagrip environments (Alan has a place he puts them)  
7) delete the last 30 columns from these database tables (we will create a "pivot file" with these 30 columns):  
8) with these 30 columns, run Alan's "pivot code" to create these files:  
     - strategic_milliman_prosp_20221128_pivot
     -  strategic_milliman_retro_20221128_pivot
9) copy 4 files to the "20221216" folder  
     - prospective flat file (without the 30 MA columns)
     - retrospective flat file (without the 30 MA columns)
     - prospective MA pivot file
     - retrospective MA pivor file
10) email Ali and Ben that process is complete with the following summaries that you can pull via queries and paste in excel into the same folder.  

### Config

In [2]:
data_dir = '/Users/bp/workspace/msh/milliman_data'
current_folder = '20241211'
pro_file_name = 'ACO Builder MSSP Prospective Explorer v2026.1c.txt'
retro_file_name = 'ACO Builder MSSP Retrospective Explorer v2026.1c.txt'

### Pro load

In [3]:
pro_df = pd.read_csv(f'{data_dir}/{current_folder}/{pro_file_name}',  encoding='latin1', low_memory=False, delimiter='\t')
pro_df = cb_utils.df_format_columns(pro_df)
print(f'{pro_df.shape[0]} rows, {pro_df.shape[1]} columns')
pro_df.head()

764603 rows, 68 columns


Unnamed: 0,pac_id,deid_tin,tin_public,year,program_type,provider_name,mssp_aco_name,reach_aco_name,practice_state,practice_msa,practice_st_msa,practice_msa_name,practice_zip,spec_count,np_count,cns_count,pcp_count,pa_count,other_count,pys_esrd,pys_dis,pys_ad,pys_and,pys_total,rs_esrd_legacy,rs_dis_legacy,rs_ad_legacy,rs_and_legacy,risk_composite_legacy,rs_esrd_v24,rs_dis_v28,rs_ad_v28,rs_and_v28,risk_composite_v28,rs_esrd_demog,rs_dis_demog,rs_ad_demog,rs_and_demog,risk_composite_demog,paid_esrd,paid_dis,paid_ad,paid_and,paid_composite,reg_ret_exp_esrd,reg_ret_exp_dis,reg_ret_exp_ad,reg_ret_exp_and,reg_ret_exp_composite,regional_efficiency_ret,reg_pro_exp_esrd,reg_pro_exp_dis,reg_pro_exp_ad,reg_pro_exp_and,reg_pro_exp_composite,regional_efficiency_pro,reg_ret_exp_esrd_v24,reg_ret_exp_dis_v28,reg_ret_exp_ad_v28,reg_ret_exp_and_v28,reg_ret_exp_composite_v28,regional_efficiency_ret_v28,reg_pro_exp_esrd_v24,reg_pro_exp_dis_v28,reg_pro_exp_ad_v28,reg_pro_exp_and_v28,reg_pro_exp_composite_v28,regional_efficiency_pro_v28
0,9335446202,00004B7C4023,474454561,2017,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,2.0,10.58,6.67,65.58,84.83,1.244208,1.061727,0.751925,1.193961,1.15278,1.146884,1.112388,0.787483,1.293654,1.229794,1.155315,1.075276,0.964009,1.118293,1.105547,53840.41,18498.227788,16336.562219,19126.599573,19647.286809,76338.88,11330.44,17127.78,10161.92,12354.265065,1.390269,74223.62,10991.45,17066.18,9935.12,12074.588412,1.422471,76733.99,11215.9,17113.63,9918.72,11899.616264,1.344765,74791.48,10944.84,17195.08,9740.72,11685.32509,1.369426
1,9335446202,00004B7C4023,474454561,2018,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,1.0,13.17,13.0,104.83,132.0,0.773985,1.738585,1.014378,1.100139,1.133178,0.773045,1.639107,1.002196,1.126503,1.142245,1.220548,0.989357,0.946056,1.126156,1.088725,57672.29,15824.800304,29511.023077,17289.571497,18652.984773,80862.56,11365.42,17777.53,10545.83,11653.386743,1.388342,77564.53,10586.18,16878.87,10335.98,11282.477484,1.433984,81309.18,11406.9,17745.62,10324.4,11464.158286,1.399357,78061.78,10754.75,16875.06,10160.29,11155.906529,1.438023
2,9335446202,00004B7C4023,474454561,2019,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,0.0,17.58,10.0,94.25,121.83,,1.320301,1.530419,1.200863,1.25135,,1.25542,1.29019,1.12773,1.163027,,0.930975,1.041779,1.088247,1.059888,,12862.443117,14708.418,11902.543236,12271.366905,0.0,11914.29,16810.94,11141.54,11831.74624,0.832958,0.0,11316.74,16788.3,10822.41,11499.92751,0.856992,0.0,11731.85,16680.81,10872.77,11537.461827,0.917308,0.0,11252.13,16638.04,10598.61,11252.319332,0.940554
3,9335446202,00004B7C4023,474454561,2020,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,1.0,16.42,10.5,109.25,137.17,0.702473,1.473083,0.840481,1.032591,1.089407,0.756026,1.261862,0.785298,0.996563,1.023647,1.22294,0.93813,1.086159,1.015967,1.012062,43532.44,18077.850792,10185.451429,11345.480458,12297.235037,78121.15,11356.82,18207.51,10543.68,11463.470052,1.004235,81498.61,10486.4,17147.72,10168.87,10983.580201,1.048112,78713.73,11242.29,18037.18,10281.38,11259.748353,1.080905,81127.9,10487.82,17006.7,9938.61,10829.554326,1.123843
4,9335446202,00004B7C4023,474454561,2021,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,0.48,18.08,9.17,101.75,129.48,0.929688,1.33418,1.000295,1.012963,1.068312,0.932337,1.406079,0.934563,0.997688,1.065217,1.215215,0.927941,0.931138,1.00083,0.985259,33827.291667,15441.997235,11878.762268,11464.973464,12132.512434,79995.45,11408.62,15921.34,11010.86,11635.24092,0.986871,76820.49,11056.68,15980.17,10753.62,11372.97812,1.009628,80891.23,11529.13,15706.95,10721.95,11418.093,1.01197,77682.82,11233.85,15898.48,10515.61,11210.320207,1.030726


In [5]:
conn = cb_utils.get_engine(source='msh_analytics')
pro_table_name = f'milliman_pro_{current_folder}'
pro_schema = 'raw'
print(f'select * from {pro_schema}.{pro_table_name}')
# pro_df.to_sql(pro_table_name, conn, schema=pro_schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

select * from raw.milliman_pro_20241211


In [6]:
pro_df.to_sql(pro_table_name, conn, schema=pro_schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

764603

In [4]:
ret_df = pd.read_csv(f'{data_dir}/{current_folder}/{retro_file_name}',  encoding='latin1', low_memory=False, delimiter='\t')
ret_df = cb_utils.df_format_columns(ret_df)
print(f'{ret_df.shape[0]} rows, {ret_df.shape[1]} columns')
ret_df.head()

739873 rows, 68 columns


Unnamed: 0,pac_id,deid_tin,tin_public,year,program_type,provider_name,mssp_aco_name,reach_aco_name,practice_state,practice_msa,practice_st_msa,practice_msa_name,practice_zip,spec_count,np_count,cns_count,pcp_count,pa_count,other_count,pys_esrd,pys_dis,pys_ad,pys_and,pys_total,rs_esrd_legacy,rs_dis_legacy,rs_ad_legacy,rs_and_legacy,risk_composite_legacy,rs_esrd_v24,rs_dis_v28,rs_ad_v28,rs_and_v28,risk_composite_v28,rs_esrd_demog,rs_dis_demog,rs_ad_demog,rs_and_demog,risk_composite_demog,paid_esrd,paid_dis,paid_ad,paid_and,paid_composite,reg_ret_exp_esrd,reg_ret_exp_dis,reg_ret_exp_ad,reg_ret_exp_and,reg_ret_exp_composite,regional_efficiency_ret,reg_pro_exp_esrd,reg_pro_exp_dis,reg_pro_exp_ad,reg_pro_exp_and,reg_pro_exp_composite,regional_efficiency_pro,reg_ret_exp_esrd_v24,reg_ret_exp_dis_v28,reg_ret_exp_ad_v28,reg_ret_exp_and_v28,reg_ret_exp_composite_v28,regional_efficiency_ret_v28,reg_pro_exp_esrd_v24,reg_pro_exp_dis_v28,reg_pro_exp_ad_v28,reg_pro_exp_and_v28,reg_pro_exp_composite_v28,regional_efficiency_pro_v28
0,9335446202,00004B7C4023,474454561,2016,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,2.0,16.42,13.67,109.08,141.17,1.089615,0.97082,0.861217,1.083807,1.049655,1.089615,0.97082,0.861217,1.083807,1.049655,1.190661,0.961259,1.030802,1.155459,1.124941,56585.55,17678.937272,23318.405999,21053.628438,21383.804279,69922.7,10567.91,16376.94,10180.71,11593.879822,1.757927,0,0,0,0,0,0,69922.7,10567.91,16376.94,10180.71,11593.879822,1.757927,0,0,0,0,0,0
1,9335446202,00004B7C4023,474454561,2017,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,1.0,20.92,13.58,149.08,184.58,0.858231,1.420286,0.960267,1.233865,1.234039,0.833114,1.478871,0.883379,1.303391,1.290912,1.221797,1.005492,1.054501,1.18215,1.151525,35210.18,22750.608509,20140.586156,20493.971224,20803.463539,72850.09,11511.68,17346.41,10122.6,10954.524468,1.54042,0,0,0,0,0,0,73326.81,11298.79,17243.72,9889.32,10665.045312,1.512308,0,0,0,0,0,0
2,9335446202,00004B7C4023,474454561,2018,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,1.0,23.75,12.92,124.58,162.25,0.773985,1.281183,1.479154,1.170615,1.214754,0.773045,1.148105,1.394784,1.177084,1.190648,1.220548,0.976608,1.070689,1.155073,1.120966,57672.29,19383.458105,28502.956656,18645.947744,19779.351556,80862.56,11252.42,17291.01,10638.5,11658.988193,1.403305,0,0,0,0,0,0,81309.18,11257.7,17280.15,10416.95,11462.121807,1.45293,0,0,0,0,0,0
3,9335446202,00004B7C4023,474454561,2019,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,0.0,26.17,10.75,124.17,161.09,,1.305999,1.198887,0.997281,1.05322,,1.232323,1.039844,0.965182,1.007119,,1.010109,0.992896,1.007123,1.006615,,11737.527703,13338.053953,14185.610614,13731.345521,0.0,12075.4,17199.55,11162.64,11800.447101,1.096845,0,0,0,0,0,0,0.0,11884.07,17036.56,10901.99,11515.961782,1.176419,0,0,0,0,0,0
4,9335446202,00004B7C4023,474454561,2020,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,3.0,0.0,1.0,19.92,8.83,109.17,138.92,0.702473,1.168583,0.672584,0.944149,0.980846,0.756026,1.073387,0.568586,0.928852,0.947744,1.22294,0.985699,0.878545,1.018304,1.013487,43532.44,17625.769076,4146.578709,10611.156453,11443.075151,78121.15,11512.33,18074.76,10574.73,11430.547412,1.045716,0,0,0,0,0,0,78713.73,11353.03,17955.49,10300.53,11176.836745,1.106314,0,0,0,0,0,0


In [7]:
ret_table_name = f'milliman_ret_{current_folder}'
ret_schema = 'raw'
print(f'select * from {ret_schema}.{ret_table_name}')

select * from raw.milliman_ret_20241211


In [8]:
ret_df.to_sql(f'milliman_ret_{current_folder}', conn, schema='raw', index=False, method='multi', chunksize=1000) # if_exists='replace', 

739873

In [9]:
ret_df.year.value_counts()

2016    93034
2017    90049
2018    87024
2019    84347
2020    82240
2021    79126
2022    76878
2023    74184
2024    72991
Name: year, dtype: int64

In [10]:
for yr in [2016, 2017, 2018, 2019, 2020, 2021, 2022]:
    ret_df.loc[ret_df.year == yr].to_csv(f'{data_dir}/{current_folder}/ret_{yr}.csv',index=False)

# Did not use below

In [6]:
conn = cb_utils.get_engine()
pro_df.to_sql(f'milliman_pro_{current_folder}', conn, schema='strategic', index=False, method='multi', chunksize=1000) # , if_exists='replace'

613556

### Retro load

In [33]:

ret_df.to_csv('ret.csv',index=False)

In [10]:
conn = cb_utils.get_engine()
ret_df.to_sql(f'milliman_ret_{current_folder}', conn, schema='strategic', index=False, method='multi', chunksize=1000) # if_exists='replace', 

594838

### Pivots

In [11]:
def pivot_ma_plan_enrollment(df):
    dfs = []
    for i in range(1, 16):
        name, enrollment = f'ma_plan_{i}_name', f'ma_plan_{i}_enrollment'
        plan = df[['deid_tin', 'pac_id', 'year', name, enrollment]]
        plan = plan.loc[~(plan[name].isna()) & ~(plan[enrollment].isna())]
        plan.columns = ['deid_tin', 'pac_id', 'year', 'ma_plan_name', 'ma_enrollment']
        dfs.append(plan)
    return pd.concat(dfs)

In [12]:
ret_pivot_df = pivot_ma_plan_enrollment(ret_df)

KeyError: "['ma_plan_1_name', 'ma_plan_1_enrollment'] not in index"

In [None]:
ret_pivot_df.head()

In [13]:
pro_pivot_df = pivot_ma_plan_enrollment(pro_df)

KeyError: "['ma_plan_1_name', 'ma_plan_1_enrollment'] not in index"

In [None]:
pro_pivot_df.head()

### Summaries 
### Prospective

In [None]:
s1 = ret_df[['year', 'unique_members']].groupby('year', as_index=False).sum()
s1.head()

### Retro

In [None]:
s2 = pro_df[['year', 'unique_members']].groupby('year', as_index=False).sum()
s2.head()

### Prospective

In [None]:
s3 = pro_pivot_df[['year', 'ma_plan_name', 'ma_enrollment']].groupby(['year', 'ma_plan_name'], as_index=False).sum().sort_values('ma_enrollment', ascending=False)
s3.head()

### Retro

In [None]:
s4 = ret_pivot_df[['year', 'ma_plan_name', 'ma_enrollment']].groupby(['year', 'ma_plan_name'], as_index=False).sum().sort_values('ma_enrollment', ascending=False)
s4.head()

### Outputs

In [None]:
ret_df.loc[:, ret_df.columns[:-30]].to_csv(f'{data_dir}/{current_folder}/ours_milliman_ret_{current_folder}.csv', index=False)

In [None]:
pro_df.loc[:, pro_df.columns[:-30]].to_csv(f'{data_dir}/{current_folder}/ours_milliman_pro_{current_folder}.csv', index=False)

In [None]:
pro_pivot_df.to_csv(f'{data_dir}/{current_folder}/pro_pivot.csv', index=False)
ret_pivot_df.to_csv(f'{data_dir}/{current_folder}/ret_pivot.csv', index=False)

In [None]:
with pd.ExcelWriter(f'{data_dir}/{current_folder}/summary.xlsx') as writer:
    s1.to_excel(writer, sheet_name='retrospective', index=False)
    s2.to_excel(writer, sheet_name='prospective', index=False)
    s3.to_excel(writer, sheet_name='prospective_pivot', index=False)
    s4.to_excel(writer, sheet_name='retrospective_pivot', index=False)