In [11]:
import os
import sys
import re
import time
import random
import warnings
import collections
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Milliman PAC File ETL
1) log into SFTP site: https://secureftp.milliman.com  
2) In the "from milliman" folder, go to the latest directory  
     - In this case, the latest directory is "20221216"  
3) Make sure you have access to the following folder: \\Main Street Health\MSH_Strategy_Group - ACO Analyses\Raw MIlliman Flat FIles\  
     - create folder with same datestamp as file from milliman, in this case "20221216"  
4) Download the files from the SFTP site  
     - in this case there are 3  
5) copy the data disctionary to the folder on the network drive.  
6) download the prospective and retrospective CSV files into database tables in both the MSH and CB datagrip environments (Alan has a place he puts them)  
7) delete the last 30 columns from these database tables (we will create a "pivot file" with these 30 columns):  
8) with these 30 columns, run Alan's "pivot code" to create these files:  
     - strategic_milliman_prosp_20221128_pivot
     -  strategic_milliman_retro_20221128_pivot
9) copy 4 files to the "20221216" folder  
     - prospective flat file (without the 30 MA columns)
     - retrospective flat file (without the 30 MA columns)
     - prospective MA pivot file
     - retrospective MA pivor file
10) email Ali and Ben that process is complete with the following summaries that you can pull via queries and paste in excel into the same folder.  

### Config

In [12]:
data_dir = '/Users/bp/workspace/msh/milliman_data'
current_folder = '20250217'
pro_file_name = 'ACO Builder MSSP Prospective Explorer v2026.2c.txt'
retro_file_name = 'ACO Builder MSSP Retrospective Explorer v2026.2c.txt'

### Pro load

In [13]:
pro_df = pd.read_csv(f'{data_dir}/{current_folder}/{pro_file_name}',  encoding='latin1', low_memory=False, delimiter='\t')
pro_df = cb_utils.df_format_columns(pro_df)
print(f'{pro_df.shape[0]} rows, {pro_df.shape[1]} columns')
pro_df.head()

764493 rows, 68 columns


Unnamed: 0,pac_id,deid_tin,tin_public,year,program_type,provider_name,mssp_aco_name,reach_aco_name,practice_state,practice_msa,practice_st_msa,practice_msa_name,practice_zip,spec_count,np_count,cns_count,pcp_count,pa_count,other_count,pys_esrd,pys_dis,pys_ad,pys_and,pys_total,rs_esrd_legacy,rs_dis_legacy,rs_ad_legacy,rs_and_legacy,risk_composite_legacy,rs_esrd_v24,rs_dis_v28,rs_ad_v28,rs_and_v28,risk_composite_v28,rs_esrd_demog,rs_dis_demog,rs_ad_demog,rs_and_demog,risk_composite_demog,paid_esrd,paid_dis,paid_ad,paid_and,paid_composite,reg_ret_exp_esrd,reg_ret_exp_dis,reg_ret_exp_ad,reg_ret_exp_and,reg_ret_exp_composite,regional_efficiency_ret,reg_pro_exp_esrd,reg_pro_exp_dis,reg_pro_exp_ad,reg_pro_exp_and,reg_pro_exp_composite,regional_efficiency_pro,reg_ret_exp_esrd_v24,reg_ret_exp_dis_v28,reg_ret_exp_ad_v28,reg_ret_exp_and_v28,reg_ret_exp_composite_v28,regional_efficiency_ret_v28,reg_pro_exp_esrd_v24,reg_pro_exp_dis_v28,reg_pro_exp_ad_v28,reg_pro_exp_and_v28,reg_pro_exp_composite_v28,regional_efficiency_pro_v28
0,9335446202,00004B7C4023,474454561,2017,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,2.0,10.58,6.67,65.58,84.83,1.244208,1.061706,0.752036,1.193966,1.152788,1.146884,1.112354,0.787568,1.293661,1.2298,1.155315,1.075276,0.964009,1.118293,1.105547,53840.41,18498.227788,16336.562219,19126.599573,19647.286809,76282.96,11368.42,17082.04,10199.91,12385.548987,1.386745,74223.62,11031.39,17023.71,9985.78,12117.927741,1.417371,76677.7,11253.33,17068.57,9955.79,11930.550796,1.34127,74791.48,10984.59,17152.91,9790.37,11728.156163,1.364417
1,9335446202,00004B7C4023,474454561,2018,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,1.0,13.17,13.0,104.83,132.0,0.773985,1.738556,1.014493,1.100143,1.1332,0.773045,1.639064,1.002278,1.126509,1.14226,1.220548,0.989357,0.946056,1.126156,1.088721,57672.29,15824.800304,29511.023077,17287.00477,18650.946364,80743.54,11361.88,17783.52,10550.74,11656.546456,1.3878,77439.5,10584.89,16886.85,10329.85,11277.747755,1.434414,81189.5,11403.46,17754.26,10329.24,11467.568974,1.398778,77935.95,10753.56,16885.7,10154.32,11151.452873,1.43843
2,9335446202,00004B7C4023,474454561,2019,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,0.0,17.58,10.0,94.25,121.83,,1.320284,1.530579,1.200865,1.251366,,1.255396,1.290303,1.127734,1.163037,,0.930975,1.041779,1.088247,1.059888,,12862.443117,14708.418,11902.543236,12271.366905,0.0,11912.55,16812.73,11146.73,11835.583937,0.832679,0.0,11313.36,16791.46,10827.98,11503.939814,0.856684,0.0,11730.18,16682.83,10877.75,11541.171311,0.917007,0.0,11248.8,16641.43,10603.96,11256.175524,0.940224
3,9335446202,00004B7C4023,474454561,2020,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,1.0,16.42,10.5,109.25,137.17,0.702473,1.473064,0.840561,1.032594,1.089411,0.756026,1.26184,0.785348,0.996565,1.023648,1.22294,0.93813,1.086159,1.015967,1.012062,43532.44,18077.850792,10185.451429,11345.480458,12297.235037,78107.01,11345.62,18169.59,10540.44,11456.811918,1.004813,81482.42,10478.21,17108.14,10167.85,10979.016008,1.048541,78699.48,11231.37,18000.09,10278.27,11253.41229,1.08151,81111.78,10479.64,16967.7,9937.67,10825.207434,1.124291
4,9335446202,00004B7C4023,474454561,2021,Prospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,0.48,18.08,9.17,101.75,129.48,0.929688,1.334167,1.00036,1.012966,1.068344,0.932337,1.406059,0.934601,0.99769,1.065251,1.215216,0.927941,0.931138,1.00083,0.985251,33827.291667,15441.997235,11878.762268,11457.180934,12126.388786,80045.68,11470.33,15906.82,11068.37,11688.655909,0.981861,76820.51,11122.92,15970.48,10813.09,11428.830077,1.004183,80942.02,11592.69,15693.34,10777.97,11471.12609,1.006782,77682.84,11300.43,15889.5,10573.76,11265.634434,1.025146


In [15]:
conn = cb_utils.get_engine(source='msh_analytics')
pro_table_name = f'milliman_pro_{current_folder}'
pro_schema = 'raw'
print(f'select * from {pro_schema}.{pro_table_name}')
# pro_df.to_sql(pro_table_name, conn, schema=pro_schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

select * from raw.milliman_pro_20250217


In [16]:
pro_df.to_sql(pro_table_name, conn, schema=pro_schema, index=False, method='multi', chunksize=1000) # , if_exists='replace'

764493

In [17]:
ret_df = pd.read_csv(f'{data_dir}/{current_folder}/{retro_file_name}',  encoding='latin1', low_memory=False, delimiter='\t')
ret_df = cb_utils.df_format_columns(ret_df)
print(f'{ret_df.shape[0]} rows, {ret_df.shape[1]} columns')
ret_df.head()

739405 rows, 68 columns


Unnamed: 0,pac_id,deid_tin,tin_public,year,program_type,provider_name,mssp_aco_name,reach_aco_name,practice_state,practice_msa,practice_st_msa,practice_msa_name,practice_zip,spec_count,np_count,cns_count,pcp_count,pa_count,other_count,pys_esrd,pys_dis,pys_ad,pys_and,pys_total,rs_esrd_legacy,rs_dis_legacy,rs_ad_legacy,rs_and_legacy,risk_composite_legacy,rs_esrd_v24,rs_dis_v28,rs_ad_v28,rs_and_v28,risk_composite_v28,rs_esrd_demog,rs_dis_demog,rs_ad_demog,rs_and_demog,risk_composite_demog,paid_esrd,paid_dis,paid_ad,paid_and,paid_composite,reg_ret_exp_esrd,reg_ret_exp_dis,reg_ret_exp_ad,reg_ret_exp_and,reg_ret_exp_composite,regional_efficiency_ret,reg_pro_exp_esrd,reg_pro_exp_dis,reg_pro_exp_ad,reg_pro_exp_and,reg_pro_exp_composite,regional_efficiency_pro,reg_ret_exp_esrd_v24,reg_ret_exp_dis_v28,reg_ret_exp_ad_v28,reg_ret_exp_and_v28,reg_ret_exp_composite_v28,regional_efficiency_ret_v28,reg_pro_exp_esrd_v24,reg_pro_exp_dis_v28,reg_pro_exp_ad_v28,reg_pro_exp_and_v28,reg_pro_exp_composite_v28,regional_efficiency_pro_v28
0,9335446202,00004B7C4023,474454561,2016,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,2.0,16.42,13.67,109.08,141.17,1.089615,0.970784,0.861216,1.083805,1.049647,1.089615,0.970784,0.861216,1.083805,1.049647,1.190661,0.961259,1.030802,1.155459,1.124938,56585.55,17678.937272,23318.405999,21050.820315,21381.634483,69821.11,10583.22,16527.54,10214.8,11633.218649,1.751814,0,0,0,0,0,0,69821.11,10583.22,16527.54,10214.8,11633.218649,1.751814,0,0,0,0,0,0
1,9335446202,00004B7C4023,474454561,2017,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,1.0,20.92,13.58,149.08,184.58,0.858231,1.420259,0.960409,1.23387,1.234005,0.833114,1.478826,0.883474,1.303398,1.290873,1.221797,1.005492,1.054501,1.18215,1.15156,35210.18,22706.029159,20140.586156,20493.971224,20798.410987,72738.24,11557.63,17310.98,10159.01,10987.553466,1.535402,0,0,0,0,0,0,73214.23,11344.16,17212.35,9924.83,10697.977808,1.507278,0,0,0,0,0,0
2,9335446202,00004B7C4023,474454561,2018,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,1.0,23.75,12.92,124.58,162.25,0.773985,1.281162,1.479323,1.17062,1.214762,0.773045,1.148075,1.394898,1.17709,1.190678,1.220548,0.976608,1.070689,1.155073,1.121007,57672.29,19337.142316,28502.956656,18639.468293,19767.596795,80743.54,11248.65,17298.72,10643.12,11662.180339,1.402071,0,0,0,0,0,0,81189.5,11254.04,17290.87,10421.49,11465.622821,1.451612,0,0,0,0,0,0
3,9335446202,00004B7C4023,474454561,2019,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,0.0,26.17,10.75,124.17,161.09,,1.305982,1.199012,0.997283,1.053264,,1.2323,1.039935,0.965186,1.007152,,1.010109,0.992896,1.007123,1.006615,,11737.527703,13338.053953,14173.885238,13722.307468,0.0,12073.28,17255.52,11166.4,11807.009627,1.095506,0,0,0,0,0,0,0.0,11882.06,17091.02,10905.66,11522.017606,1.175021,0,0,0,0,0,0
4,9335446202,00004B7C4023,474454561,2020,Retrospective,RINDFLEISCH FAMILY PRACTICE PLLC,,"PEARL PRIMARY CARE NETWORK, LLC",Idaho,26820.0,ID-26820,"Idaho Falls, ID",83404.0,,1.0,,1.0,1.0,0.0,1.0,19.92,8.83,109.17,138.92,0.702473,1.168569,0.672648,0.944152,0.980847,0.756026,1.073368,0.568622,0.928854,0.947742,1.22294,0.985699,0.878545,1.018304,1.013487,43532.44,17625.769076,4146.578709,10611.156453,11443.075151,78107.01,11498.68,18036.27,10572.53,11424.685791,1.046248,0,0,0,0,0,0,78699.48,11339.67,17917.79,10298.47,11171.448611,1.106846,0,0,0,0,0,0


In [18]:
ret_table_name = f'milliman_ret_{current_folder}'
ret_schema = 'raw'
print(f'select * from {ret_schema}.{ret_table_name}')

select * from raw.milliman_ret_20250217


In [19]:
ret_df.to_sql(f'milliman_ret_{current_folder}', conn, schema='raw', index=False, method='multi', chunksize=1000) # if_exists='replace', 

739405

In [None]:
ret_df.year.value_counts()

In [10]:
for yr in [2016, 2017, 2018, 2019, 2020, 2021, 2022]:
    ret_df.loc[ret_df.year == yr].to_csv(f'{data_dir}/{current_folder}/ret_{yr}.csv',index=False)

# Did not use below

In [None]:
conn = cb_utils.get_engine()
pro_df.to_sql(f'milliman_pro_{current_folder}', conn, schema='strategic', index=False, method='multi', chunksize=1000) # , if_exists='replace'

### Retro load

In [33]:

ret_df.to_csv('ret.csv',index=False)

In [None]:
conn = cb_utils.get_engine()
ret_df.to_sql(f'milliman_ret_{current_folder}', conn, schema='strategic', index=False, method='multi', chunksize=1000) # if_exists='replace', 

### Pivots

In [11]:
def pivot_ma_plan_enrollment(df):
    dfs = []
    for i in range(1, 16):
        name, enrollment = f'ma_plan_{i}_name', f'ma_plan_{i}_enrollment'
        plan = df[['deid_tin', 'pac_id', 'year', name, enrollment]]
        plan = plan.loc[~(plan[name].isna()) & ~(plan[enrollment].isna())]
        plan.columns = ['deid_tin', 'pac_id', 'year', 'ma_plan_name', 'ma_enrollment']
        dfs.append(plan)
    return pd.concat(dfs)

In [None]:
ret_pivot_df = pivot_ma_plan_enrollment(ret_df)

In [None]:
ret_pivot_df.head()

In [None]:
pro_pivot_df = pivot_ma_plan_enrollment(pro_df)

In [None]:
pro_pivot_df.head()

### Summaries 
### Prospective

In [None]:
s1 = ret_df[['year', 'unique_members']].groupby('year', as_index=False).sum()
s1.head()

### Retro

In [None]:
s2 = pro_df[['year', 'unique_members']].groupby('year', as_index=False).sum()
s2.head()

### Prospective

In [None]:
s3 = pro_pivot_df[['year', 'ma_plan_name', 'ma_enrollment']].groupby(['year', 'ma_plan_name'], as_index=False).sum().sort_values('ma_enrollment', ascending=False)
s3.head()

### Retro

In [None]:
s4 = ret_pivot_df[['year', 'ma_plan_name', 'ma_enrollment']].groupby(['year', 'ma_plan_name'], as_index=False).sum().sort_values('ma_enrollment', ascending=False)
s4.head()

### Outputs

In [None]:
ret_df.loc[:, ret_df.columns[:-30]].to_csv(f'{data_dir}/{current_folder}/ours_milliman_ret_{current_folder}.csv', index=False)

In [None]:
pro_df.loc[:, pro_df.columns[:-30]].to_csv(f'{data_dir}/{current_folder}/ours_milliman_pro_{current_folder}.csv', index=False)

In [None]:
pro_pivot_df.to_csv(f'{data_dir}/{current_folder}/pro_pivot.csv', index=False)
ret_pivot_df.to_csv(f'{data_dir}/{current_folder}/ret_pivot.csv', index=False)

In [None]:
with pd.ExcelWriter(f'{data_dir}/{current_folder}/summary.xlsx') as writer:
    s1.to_excel(writer, sheet_name='retrospective', index=False)
    s2.to_excel(writer, sheet_name='prospective', index=False)
    s3.to_excel(writer, sheet_name='prospective_pivot', index=False)
    s4.to_excel(writer, sheet_name='retrospective_pivot', index=False)