In [3]:
import pandas as pd
import joblib

In [5]:
# Load the trained model
model = joblib.load("finalModel_fromGridSearchCV_RF.joblib")
# Load the saved encoder
encoder = joblib.load("one_hot_encoder.joblib")

In [7]:
# Define a function to perform one-hot encoding
def preprocess_input(input_data):
    # Perform one-hot encoding on categorical columns
    input_data = pd.get_dummies(input_data, columns=['sex', 'smoker'], drop_first=True, dtype=int)
    
    # Ensure all required columns are present (add missing columns with default values if necessary)
    required_columns = ['age', 'bmi', 'children', 'sex_male', 'smoker_yes']
    for col in required_columns:
        if col not in input_data.columns:
            input_data[col] = 0  # Add missing columns with default value 0
    
    # Reorder columns to match the model's expected input format
    input_data = input_data[required_columns]
    
    return input_data

In [13]:
# Example input data (original format with categorical columns)
input_data = pd.DataFrame({
    'age': [19, 18, 28],
    'bmi': [27.9, 33.77, 33.0],
    'children': [0, 1, 3],
    'sex': ['female', 'male', 'female'],  # Original categorical column
    'smoker': ['yes', 'no', 'no']        # Original categorical column
})

In [15]:
# Preprocess the input data (one-hot encoding)
processed_input = preprocess_input(input_data)

In [19]:
processed_input

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.9,0,0,1
1,18,33.77,1,1,0
2,28,33.0,3,0,0


In [47]:
# Make predictions
predictions = model.predict(processed_input)

In [17]:
# Make predictions
predictions = model.predict(processed_input)

In [49]:
# Add predictions to the processed input data
processed_input['charges_pred'] = predictions

In [59]:
dummydf=pd.DataFrame(processed_input)
one_hot_encoded_data=dummydf[['sex_male','smoker_yes']]
original_categories = encoder.inverse_transform(one_hot_encoded_data)
# Convert the encoded data to a DataFrame
categorical_cols = ['sex', 'smoker']
encoded_df = pd.DataFrame(original_categories, columns=categorical_cols)
encoded_df

Unnamed: 0,sex,smoker
0,female,yes
1,male,no
2,female,no


In [61]:
# Combine predictions with original categorical values and other features
output = pd.concat([encoded_df, input_data[['age', 'bmi', 'children']], processed_input['charges_pred']], axis=1)

# Display the final output
print(output)

      sex smoker  age    bmi  children  charges_pred
0  female    yes   19  27.90         0  18343.188935
1    male     no   18  33.77         1   4639.004567
2  female     no   28  33.00         3   7395.703839
