In [1]:
import pandas as pd
import numpy as np
from data.get_data import FetchPlansPricings
from data.clean_data import HMOEPOCleaner, PPOPOSCleaner
from features.feature_building import FeatureEngineer
from models.v1_model import MedicalPlanSimilarityModel
from sklearn.neighbors import NearestNeighbors
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

### Fetch Data

In [59]:
data_fetcher = FetchPlansPricings(state="CA", zip_code=93401, year=2024, quarter="Q1")
data_fetcher.fetch_and_save_data()
raw_plans_df = pd.read_csv("~/like-plans/data/raw/raw_plans.csv")



Saved dataframe from CA, 93401, 2024, Q1 to /Users/kieranshaw/like-plans/data/raw/raw_plans.csv
Saved dataframe from CA, 93401, 2024, Q1 to /Users/kieranshaw/like-plans/data/raw/raw_pricings.csv


### Clean Data

In [60]:
# Create an instance of HMOEPOCleaner
hmo_epo_cleaner = HMOEPOCleaner()
ppo_pos_cleaner = PPOPOSCleaner()

# Read the raw data
hmo_epo_raw_data = hmo_epo_cleaner.read_data()
ppo_pos_raw_data = ppo_pos_cleaner.read_data()

# Clean the data
hmo_epo_cleaned_data = hmo_epo_cleaner.clean(df=hmo_epo_raw_data)
ppo_pos_cleaned_data = ppo_pos_cleaner.clean(df=ppo_pos_raw_data)

# Save the cleaned data for inspection
hmo_epo_cleaner.save_data(df=hmo_epo_cleaned_data)
ppo_pos_cleaner.save_data(df=ppo_pos_cleaned_data)

### Feature Building

In [61]:
fe_hmo = FeatureEngineer(exclude_cols=["carrier_name","name","id"])
hmo_epo_cleaned_data["primary_care_physician_in_network"] = hmo_epo_cleaned_data.apply(
    lambda row: fe_hmo.calculate_primary_care_physician(
        row['pcp_cleaned_dollar_values_in_network'], 
        row['pcp_cleaned_percentages_in_network'], 
        row['pcp_initial_visits_in_network']
    ), axis=1
)
hmo_epo_df = fe_hmo.drop_hmo_columns(df=hmo_epo_cleaned_data)
hmo_epo_df.reset_index(drop=True,inplace=True)
hmo_epo_transformed_array = fe_hmo.fit_transform(df=hmo_epo_df)

fe_ppo = FeatureEngineer(exclude_cols=["carrier_name","name","id"])
ppo_pos_cleaned_data["primary_care_physician_in_network"] = ppo_pos_cleaned_data.apply(
    lambda row: fe_hmo.calculate_primary_care_physician(
        row['pcp_cleaned_dollar_values_in_network'], 
        row['pcp_cleaned_percentages_in_network'], 
        row['pcp_initial_visits_in_network']
    ), axis=1
)
ppo_pos_cleaned_data["primary_care_physician_out_of_network"] = ppo_pos_cleaned_data.apply(
    lambda row: fe_hmo.calculate_primary_care_physician(
        row['pcp_cleaned_dollar_values_out_of_network'], 
        row['pcp_cleaned_percentages_out_of_network'], 
        row['pcp_initial_visits_out_of_network']
    ), axis=1
)
ppo_pos_df = fe_hmo.drop_ppo_columns(df=ppo_pos_cleaned_data)
ppo_pos_df.reset_index(drop=True,inplace=True)

ppo_pos_transformed_array = fe_ppo.fit_transform(df=ppo_pos_df)

### Run Model

In [96]:
model = MedicalPlanSimilarityModel(hmo_epo_df, hmo_epo_transformed_array, metric='manhattan', n_neighbors=200,plan_id="49116CA0070222",carrier_name="BlueShield of California")
random_plan, similar_plans = model.fit()

In [97]:
random_plan

Unnamed: 0,id,carrier_name,name,level,plan_type,hsa_eligible,infertility_treatment_rider,network_size,individual_medical_deductible_in_network,family_medical_deductible_in_network,individual_medical_moop_in_network,family_medical_moop_in_network,coinsurance_in_network,individual_drug_deductible_in_network,family_drug_deductible_in_network,pcp_after_deductible_in_network,primary_care_physician_in_network
230,49116CA0070222,UnitedHealthcare,SignatureValue Alliance HMO Silver - DIPC,silver,HMO,0,0,28367,2400,4800,9400,18800,40,400,800,0,0.0


In [98]:
similar_plans.head(10)

Unnamed: 0,id,carrier_name,name,level,plan_type,hsa_eligible,infertility_treatment_rider,network_size,individual_medical_deductible_in_network,family_medical_deductible_in_network,individual_medical_moop_in_network,family_medical_moop_in_network,coinsurance_in_network,individual_drug_deductible_in_network,family_drug_deductible_in_network,pcp_after_deductible_in_network,primary_care_physician_in_network,similarity_score
245,70285CA8170079,BlueShield of California,Silver Local Access+ HMO 2300/70 OffEx,silver,HMO,0,0,65306,2300,4600,8750,17500,40,450,900,0,70.0,6.750992
241,70285CA8170063,BlueShield of California,Silver Access+ HMO 2300/70 OffEx,silver,HMO,0,0,65318,2300,4600,8750,17500,40,450,900,0,70.0,6.751504
249,70285CA8170096,BlueShield of California,Silver Trio HMO 2300/70 OffEx,silver,HMO,0,0,66486,2300,4600,8750,17500,40,450,900,0,70.0,6.801382
246,70285CA8170395,BlueShield of California,Silver Local Access+ HMO 2300/70 OffEx INF,silver,HMO,0,1,65306,2300,4600,8750,17500,40,450,900,0,70.0,9.092949
242,70285CA8170379,BlueShield of California,Silver Access+ HMO 2300/70 OffEx INF,silver,HMO,0,1,65318,2300,4600,8750,17500,40,450,900,0,70.0,9.093462
250,70285CA8170412,BlueShield of California,Silver Trio HMO 2300/70 OffEx INF,silver,HMO,0,1,66486,2300,4600,8750,17500,40,450,900,0,70.0,9.14334
247,70285CA8170982,BlueShield of California,Silver Local Access+ HMO 2750/70 OffEx,silver,HMO,0,0,65306,2750,5500,8750,17500,45,0,0,0,70.0,11.841768
243,70285CA8170966,BlueShield of California,Silver Access+ HMO 2750/70 OffEx,silver,HMO,0,0,65318,2750,5500,8750,17500,45,0,0,0,70.0,11.842281
251,70285CA8171000,BlueShield of California,Silver Trio HMO 2750/70 OffEx,silver,HMO,0,0,66486,2750,5500,8750,17500,45,0,0,0,70.0,11.892158
248,70285CA8171046,BlueShield of California,Silver Local Access+ HMO 2750/70 OffEx INF,silver,HMO,0,1,65306,2750,5500,8750,17500,45,0,0,0,70.0,14.183726


### Random

In [85]:
raw_plans_df = pd.read_csv("~/like-plans/data/raw/raw_plans.csv")
raw_plans_df[raw_plans_df["id"] == "27603CA1220881"]["network_name"]

144    PRUDENT BUYER PPO
Name: network_name, dtype: object

In [22]:
# raw_plans_df[raw_plans_df["chiropractic_services"].isna()]['name']
raw_plans_df[(raw_plans_df["id"] == "20523CA0031474") | (raw_plans_df["id"] == "20523CA0031072")]["id"]

5     20523CA0031072
19    20523CA0031474
Name: id, dtype: object

In [5]:
raw_plans_df[raw_plans_df["adult_dental"].isna()]["name"][802]

'Silver Tandem PPO Savings 2600/35% HDHP PrevRx OffEx'

In [59]:
raw_plans_df[raw_plans_df["name"] == "Anthem Bronze PPO 75/7300/40%"]["id"]

232    27603CA1221529
Name: id, dtype: object

In [25]:
raw_plans_df[raw_plans_df["chiropractic_services"].isna()][["id","name","carrier_name","benefits_summary_url"]]

Unnamed: 0,id,name,carrier_name,benefits_summary_url
100,27603CA1210266-13,Anthem Gold Select HMO 35 w/ T&L,Anthem,
102,27603CA1210290-13,Anthem Gold HMO 35 w/ T&L,Anthem,
104,27603CA1210475-13,Anthem Platinum Select HMO 0/25 w/ T&L,Anthem,
106,27603CA1210510-13,Anthem Platinum HMO 0/25 w/ T&L,Anthem,
108,27603CA1210534-13,Anthem Gold Select HMO 30 w/ T&L,Anthem,
110,27603CA1210551-13,Anthem Gold HMO 30 w/ T&L,Anthem,
112,27603CA1210773-13,Anthem Silver HMO 55 w/ T&L,Anthem,
114,27603CA1210792-13,Anthem Silver Select HMO 55 w/ T&L,Anthem,
116,27603CA1210865-13,Anthem Platinum HMO 0/30 w/ T&L,Anthem,
118,27603CA1210884-13,Anthem Platinum Select HMO 0/30 w/ T&L,Anthem,


In [30]:
len(raw_plans_df)

902