In [33]:
import pandas as pd
import joblib

In [41]:
# Load the trained model
model = joblib.load("finalModel_fromGridSearchCV_RF.joblib")

In [43]:
# Define a function to perform one-hot encoding
def preprocess_input(input_data):
    # Perform one-hot encoding on categorical columns
    input_data = pd.get_dummies(input_data, columns=['sex', 'smoker'], drop_first=True, dtype=int)
    
    # Ensure all required columns are present (add missing columns with default values if necessary)
    required_columns = ['age', 'bmi', 'children', 'sex_male', 'smoker_yes']
    for col in required_columns:
        if col not in input_data.columns:
            input_data[col] = 0  # Add missing columns with default value 0
    
    # Reorder columns to match the model's expected input format
    input_data = input_data[required_columns]
    
    return input_data

In [47]:
# Define the inverse_dummy function to reverse one-hot encoding
def inverse_dummy(dummy_df):
    # Create a dictionary to store original values
    result_dict = {}
    
    # Map one-hot encoded columns back to original categorical values
    if 'sex_male' in dummy_df.columns:
        result_dict['sex'] = dummy_df['sex_male'].apply(lambda x: 'male' if x == 1 else 'female')
    
    if 'smoker_yes' in dummy_df.columns:
        result_dict['smoker'] = dummy_df['smoker_yes'].apply(lambda x: 'yes' if x == 1 else 'no')
    
    # Return as DataFrame
    return pd.DataFrame(result_dict)

In [61]:
# Example input data (original format with categorical columns)
input_data = pd.DataFrame({
    'age': [19, 18, 28],
    'bmi': [27.9, 33.77, 33.0],
    'children': [0, 1, 3],
    'sex': ['female', 'male', 'female'],  # Original categorical column
    'smoker': ['yes', 'no', 'no']        # Original categorical column
})

In [63]:
# Preprocess the input data (one-hot encoding)
processed_input = preprocess_input(input_data)

In [65]:
processed_input

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.9,0,0,1
1,18,33.77,1,1,0
2,28,33.0,3,0,0


In [67]:
# Make predictions
predictions = model.predict(processed_input)

In [69]:
# Add predictions to the processed input data
processed_input['charges_pred'] = predictions

In [75]:
# Reverse one-hot encoding to get original categorical values
original_categories = inverse_dummy(processed_input)
original_categories

Unnamed: 0,sex,smoker
0,female,yes
1,male,no
2,female,no


In [73]:
# Combine predictions with original categorical values and other features
output = pd.concat([original_categories, input_data[['age', 'bmi', 'children']], processed_input['charges_pred']], axis=1)

# Display the final output
print(output)

      sex smoker  age    bmi  children  charges_pred
0  female    yes   19  27.90         0  18343.188935
1    male     no   18  33.77         1   4639.004567
2  female     no   28  33.00         3   7395.703839
