In [1]:
import pandas as pd
import numpy as np
from data.get_data import FetchPlansPricings
from data.clean_data import HMOEPOCleaner, PPOPOSCleaner
from features.feature_building import FeatureEngineer
from models.v1_model import MedicalPlanSimilarityModel
from sklearn.neighbors import NearestNeighbors
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows', None)

### Fetch Data

In [7]:
data_fetcher = FetchPlansPricings(state="CA", zip_code=92262, year=2024, quarter="Q1")
data_fetcher.fetch_and_save_data()
raw_plans_df = pd.read_csv("~/like-plans/data/raw/raw_plans.csv")



Saved dataframe from CA, 92262, 2024, Q1 to /Users/kieranshaw/like-plans/data/raw/raw_plans.csv
Saved dataframe from CA, 92262, 2024, Q1 to /Users/kieranshaw/like-plans/data/raw/raw_pricings.csv


### Clean Data

In [2]:
# Create an instance of HMOEPOCleaner
hmo_epo_cleaner = HMOEPOCleaner()
ppo_pos_cleaner = PPOPOSCleaner()

# Read the raw data
hmo_epo_raw_data = hmo_epo_cleaner.read_data()
ppo_pos_raw_data = ppo_pos_cleaner.read_data()

# Clean the data
hmo_epo_cleaned_data = hmo_epo_cleaner.clean(df=hmo_epo_raw_data)
ppo_pos_cleaned_data = ppo_pos_cleaner.clean(df=ppo_pos_raw_data)

# Save the cleaned data for inspection
hmo_epo_cleaner.save_data(df=hmo_epo_cleaned_data)
ppo_pos_cleaner.save_data(df=ppo_pos_cleaned_data)

### Feature Building

In [3]:
fe_hmo = FeatureEngineer(exclude_cols=["carrier_name","name","id"])
hmo_epo_cleaned_data["primary_care_physician_in_network"] = hmo_epo_cleaned_data.apply(
    lambda row: fe_hmo.calculate_primary_care_physician(
        row['pcp_cleaned_dollar_values_in_network'], 
        row['pcp_cleaned_percentages_in_network'], 
        row['pcp_initial_visits_in_network']
    ), axis=1
)
hmo_epo_df = fe_hmo.drop_hmo_columns(df=hmo_epo_cleaned_data)
hmo_epo_df.reset_index(drop=True,inplace=True)
hmo_epo_transformed_array = fe_hmo.fit_transform(df=hmo_epo_df)

fe_ppo = FeatureEngineer(exclude_cols=["carrier_name","name","id"])
ppo_pos_cleaned_data["primary_care_physician_in_network"] = ppo_pos_cleaned_data.apply(
    lambda row: fe_hmo.calculate_primary_care_physician(
        row['pcp_cleaned_dollar_values_in_network'], 
        row['pcp_cleaned_percentages_in_network'], 
        row['pcp_initial_visits_in_network']
    ), axis=1
)
ppo_pos_cleaned_data["primary_care_physician_out_of_network"] = ppo_pos_cleaned_data.apply(
    lambda row: fe_hmo.calculate_primary_care_physician(
        row['pcp_cleaned_dollar_values_out_of_network'], 
        row['pcp_cleaned_percentages_out_of_network'], 
        row['pcp_initial_visits_out_of_network']
    ), axis=1
)
ppo_pos_df = fe_hmo.drop_ppo_columns(df=ppo_pos_cleaned_data)
ppo_pos_df.reset_index(drop=True,inplace=True)

ppo_pos_transformed_array = fe_ppo.fit_transform(df=ppo_pos_df)

### Run Model

In [34]:
model = MedicalPlanSimilarityModel(
    ppo_pos_df, #ppo_pos_df hmo_epo_df
    ppo_pos_transformed_array, #ppo_pos_transformed_array hmo_epo_transformed_array
    metric='manhattan', 
    n_neighbors=200,
    plan_id="37873CA0090079",
    carrier_name="Anthem"
)
random_plan, similar_plans = model.fit()

In [35]:
random_plan

Unnamed: 0,id,carrier_name,name,level,plan_type,hsa_eligible,infertility_treatment_rider,chiropractic_services,individual_medical_deductible_in_network,individual_medical_deductible_out_of_network,family_medical_deductible_in_network,family_medical_deductible_out_of_network,individual_medical_moop_in_network,individual_medical_moop_out_of_network,family_medical_moop_in_network,family_medical_moop_out_of_network,coinsurance_in_network,coinsurance_out_of_network,individual_drug_deductible_in_network,family_drug_deductible_in_network,pcp_after_deductible_in_network,pcp_after_deductible_out_of_network,anthem_travel_and_lodging,primary_care_physician_in_network,primary_care_physician_out_of_network
348,37873CA0090079,UnitedHealthcare,Select Plus PPO Silver DIAG /L41S,silver,PPO,0,0,1,1950,3900,3900,7800,9450,18900,18900,37800,40,50,350,700,0,1,0,55.0,85.5


In [36]:
similar_plans.head(10)

Unnamed: 0,id,carrier_name,name,level,plan_type,hsa_eligible,infertility_treatment_rider,chiropractic_services,individual_medical_deductible_in_network,individual_medical_deductible_out_of_network,family_medical_deductible_in_network,family_medical_deductible_out_of_network,individual_medical_moop_in_network,individual_medical_moop_out_of_network,family_medical_moop_in_network,family_medical_moop_out_of_network,coinsurance_in_network,coinsurance_out_of_network,individual_drug_deductible_in_network,family_drug_deductible_in_network,pcp_after_deductible_in_network,pcp_after_deductible_out_of_network,anthem_travel_and_lodging,primary_care_physician_in_network,primary_care_physician_out_of_network,similarity_score
64,27603CA1220884,Anthem,Anthem Silver PPO 55/1950/35%,silver,PPO,0,0,1,1950,3900,3900,7800,9100,18200,18200,36400,35,50,300,600,0,1,0,55.0,85.5,4.895978
72,27603CA1220869,Anthem,Anthem Silver Select PPO 55/1950/35%,silver,PPO,0,0,1,1950,3900,3900,7800,9100,18200,18200,36400,35,50,300,600,0,1,0,55.0,85.5,4.895978
62,27603CA1221462,Anthem,Anthem Silver PPO 45/1750/40% WH,silver,PPO,0,0,1,1750,3500,3500,7000,9100,18200,18200,36400,40,50,300,600,0,1,0,45.0,85.5,9.078209
70,27603CA1221564,Anthem,Anthem Silver Select PPO 45/1750/40% WH,silver,PPO,0,0,1,1750,3500,3500,7000,9100,18200,18200,36400,40,50,300,600,0,1,0,45.0,85.5,9.078209
61,27603CA1221129,Anthem,Anthem Silver PPO 45/1750/40%,silver,PPO,0,0,1,1750,3500,3500,7000,9100,18200,18200,36400,40,50,300,600,0,1,0,45.0,85.5,9.078209
69,27603CA1221132,Anthem,Anthem Silver Select PPO 45/1750/40%,silver,PPO,0,0,1,1750,3500,3500,7000,9100,18200,18200,36400,40,50,300,600,0,1,0,45.0,85.5,9.078209
63,27603CA1220845,Anthem,Anthem Silver PPO 50/2200/40%,silver,PPO,0,0,1,2200,4400,4400,8800,8600,17200,17200,34400,40,50,300,600,0,1,0,50.0,85.5,9.461427
71,27603CA1220896,Anthem,Anthem Silver Select PPO 50/2200/40%,silver,PPO,0,0,1,2200,4400,4400,8800,8600,17200,17200,34400,40,50,300,600,0,1,0,50.0,85.5,9.461427
74,27603CA1221134,Anthem,Anthem Silver Select PPO 55/2500/45%,silver,PPO,0,0,1,2500,5000,5000,10000,8700,17400,17400,34800,45,50,200,400,0,1,0,55.0,85.5,10.055879
65,27603CA1221171,Anthem,Anthem Silver PPO 55/2500/45%,silver,PPO,0,0,1,2500,5000,5000,10000,8700,17400,17400,34800,45,50,200,400,0,1,0,55.0,85.5,10.055879


### Random

In [37]:
raw_plans_df = pd.read_csv("~/like-plans/data/raw/raw_plans.csv")