In [None]:
# Cleaning the column names
df = data.clean_names()

# Changin the values inside the clinic column for onehot encoding
value_map = {1: 'Inpatient', 2: 'Outpatient'}
df['clinic'] = df['clinic'].map(value_map)

#Chaning the values for region_mod
value_map_2 = {'1': 'northeast', '2':'northcentral', '3':'south', '4':'west', '5':'unknown'}
df['region_mod'] = df['region_mod'].map(value_map_2)

#chaning the gender varible
value_map_3 = {'1':'male', '2':'female'}
df['gender'] = df['gender'].map(value_map_3)

value_map_4 = {'1': 'Plan Holder', '2':'Spouse', '3':'Child/Other Dependent', '4':'Unknown'}
df['relationship_to_primary_beneficiary'] = df['relationship_to_primary_beneficiary'].map(value_map_4)

def encode_and_bind(original_dataframe, features_to_encode):

    # Convert a single feature name to a list
    if isinstance(features_to_encode, str):
        features_to_encode = [features_to_encode]

    # Check if the features exist in the dataframe
    for feature in features_to_encode:
        if feature not in original_dataframe.columns:
            raise ValueError(f"Feature '{feature}' not found in the dataframe")

    # One-hot encoding
    encoder = OneHotEncoder(sparse_output = False)
    encoder.fit(original_dataframe[features_to_encode])
    encoded_features = encoder.transform(original_dataframe[features_to_encode])

    # Add new encoded columns to the dataframe
    original_dataframe[encoder.get_feature_names_out(features_to_encode)] = encoded_features

    # Drop the original columns
    original_dataframe.drop(columns=features_to_encode, inplace=True)

    # Return the modified dataframe
    return original_dataframe

features_to_encode = ['clinic', 'gender']

encode_and_bind(df, features_to_encode)

value_to_encode = [183, 184, 185, 521, 522, 533, 534, 545, 536, 542, 543, 544, 562, 563]
df['trauma'] = df['drg'].apply(lambda x: 1 if x in value_to_encode else 0)
df.loc[df['drg'].between(280, 283), 'myocardial_infarction'] += 1
df = df.drop(columns=['health_plan_type','plan_typ', 'drg', 'patient_zipcode'])
# repaet clean_names()
# df = df.clean_names()

group_columns = ['patient_id', 'gender_male','relationship_to_primary_beneficiary']
agg_dict = {col: 'sum' for col in df.columns if col not in group_columns}
agg_dict['age_years'] = 'mean'
df = df.groupby(group_columns).agg(agg_dict).reset_index()

# #sorting out patient_id column
df['patient_id'] = df['patient_id'].astype(str)
df['patient_id'] = df['patient_id'].str.slice(0, -2)

# #eliminating the negative pay values
df = df[(df['age_years'] > 17) & (df['pay'].between(2,1000000))]

df = df.clean_names()
# #removing the duplicates
duplicates = df.duplicated(subset='patient_id', keep=False)
duplicate_rows= df[duplicates]
duplicate_rows.sort_values(by='patient_id')
df.drop_duplicates(inplace=True)
df['both_clinic'] = np.where((df['clinic_inpatient'] == 1.0) & (df['clinic_outpatient'] == 1.0), 1,0)


# df = df.astype(float)
columns_to_replace = [
    'myocardial_infarction', 'trauma', 'chf', 'pvd', 'cardiovascular_d'
    'respiratory_d', 'hypertension', 'diabetes_melitus', 'dementia',
    'kidney_disease', 'liver_disease', 'diarrheal_disease', 'cancer',
    'metastasis', 'puc', 'hemiplegia', 'lymphoma', 'aids',
    'connective_tissue_disease',
    # 'region_mod_northcentral',
    # 'region_mod_northeast',
    # 'region_mod_south',
    # 'region_mod_west',
    # 'region_mod_unknown',
    'clinic_outpatient', 'clinic_inpatient', 'both_clinic', 'gender_female',
    'gender_male'
]
for column in columns_to_replace:
    if column in df.columns:
        # Create a mask for values greater than zero
        mask = df[column] > 0
        # Apply the mask and set those values to 1
        df.loc[mask, column] = 1


# CCI score
df['cci'] = 0
df['cci']= pd.cut(df['age_years'],
                    bins=[0, 49, 59, 69, 79, float('inf')],
                    labels=[0, 1, 2, 3, 4],
                    right=False).astype(int)
df.loc[df['myocardial_infarction'] == 1.0, 'cci'] +=1
df.loc[df['chf'] == 1.0, 'cci'] +=1
df.loc[df['pvd'] == 1.0, 'cci'] +=1
df.loc[df['respiratory_d'] == 1.0, 'cci'] +=1
df.loc[df['connective_tissue_disease'] == 1.0, 'cci'] +=1
df.loc[df['liver_disease'] == 1.0, 'cci'] +=1
df.loc[df['diabetes_melitus'] == 1.0, 'cci'] +=1
df.loc[df['kidney_disease'] == 1.0, 'cci'] +=1
df.loc[df['puc'] == 1.0, 'cci'] +=1
df.loc[df['hemiplegia'] == 1.0, 'cci'] +=2
df.loc[df['lymphoma'] == 1.0, 'cci'] +=2
df.loc[df['aids'] == 1.0, 'cci'] +=6
df.loc[df['cancer'] == 1.0, 'cci'] +=2
df.loc[df['metastasis'] == 1.0, 'cci'] +=6

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
# Load dataset



# Split dataset into training and testing sets
model = models.Sequential()
model.add(layers.Dense(32, activation= 'relu', input_shape=(X_train.shape[1],)))
# model.add(layers.Dropout(0.3))
model.add(layers.Dense(32, activation='relu'))
# model.add(layers.Dropout(0.3))
# model.add(layers.Dense(3, activation='relu',
#                        kernel_regularizer=regularizers.l1(0.01)))
# model.add(layers.Dropout(0.3))
model.add(layers.Dense(1, activation='linear'))


# eraly stopping
early_stopping = EarlyStopping(monitor='val_loss',
                               patience=5,
                               verbose =1,
                               restore_best_weights=True)


# # Compile the model
model.compile(optimizer=Adam(0.1),  # A lower learning rate
              loss='mean_absolute_error',
              metrics=['mae'])
# Train the model
model.fit(X_train, y_train, epochs=59, batch_size=32, validation_split=0.3, callbacks=[early_stopping])

# Evaluate the model on test data
results = model.evaluate(X_test, y_test)
# print(f'Test loss: {test_loss}')

In [None]:
init_dict = {'age_years': 0,
            'relationship_to_primary_beneficiary': 0,
            'clinic': 0,
            'region_mod': 0,
            'gender_female':0,
            'gender_male':0,
            'cci': 0}
init_lst = list(init_dict.keys())

disease_name_mapping = {
        'respiratory_d': 'Respiratory Disease',
        'hypertension': 'Hypertension',
        'diabetes_melitus': 'Diabetes Melitus',
        'dementia': 'Dementia',
        'kidney_disease': 'Kidney Disease',
        'liver_disease': 'Liver Disease',
        'diarrheal_disease': 'Diarrheal Disease',
        'myocardial_infarction': 'Myocardial Infarction',
        'cardiovascular_d': 'Cardiovascular Disease',
        'chf': 'Heart Failure',
        'pvd': 'Peripherial Vascular Disease',
        'cancer': 'Non-Metastatic Cancer',
        'metastasis': 'Metastatic Cancer',
        'connective_tissue_disease': 'Autoimmune Disease',
        'puc': 'Peptic Ulcer',
        'hemiplegia': 'Stroke',
        'lymphoma': 'Lymphoma',
        'aids': 'AIDS',
        'trauma': 'Previous Fracture'
    }
init_lst.extend(list(disease_name_mapping.keys()))
column_order =init_lst

columns_to_convert = ['clinic',
 'region_mod',
 'gender',
 'cci',
 'respiratory_d',
 'hypertension',
 'diabetes_melitus',
 'dementia',
 'kidney_disease',
 'liver_disease',
 'diarrheal_disease',
 'myocardial_infarction',
 'cardiovascular_d',
 'chf',
 'pvd',
 'cancer',
 'metastasis']  # List your columns here

for column in columns_to_convert:
    df[column] = df[column].astype(int)



X = X[column_order]
X