In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
data= pd.read_csv("C:/Shalinii/ml/archive (5)/healthcare_dataset.csv")

In [5]:
features = [
    'Age', 'Gender', 'Blood Type', 'Medical Condition', 
    'Admission Type', 'Insurance Provider', 'Room Number'
]
target = 'Billing Amount'

In [6]:
data = data.dropna(subset=[target])

In [7]:
data = data.fillna(method='ffill')

In [8]:
X = data[features]
y = data[target]

In [9]:
numerical_features = ['Age', 'Room Number']
categorical_features = ['Gender', 'Blood Type', 'Medical Condition', 'Admission Type', 'Insurance Provider']

In [10]:
encoder = OneHotEncoder(drop='first', sparse=False)
X_cat = encoder.fit_transform(X[categorical_features])

In [11]:
scaler = StandardScaler()
X_num = scaler.fit_transform(X[numerical_features])

In [12]:
import numpy as np
X_processed = np.hstack((X_num, X_cat))

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [14]:
model = RandomForestRegressor(random_state=42)

In [15]:
model.fit(X_train, y_train)

RandomForestRegressor(random_state=42)

In [16]:
y_pred = model.predict(X_test)

In [17]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [18]:
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")

Mean Squared Error: 195452087.29836842
R² Score: 0.01738132022215777


In [19]:
new_input = {
    'Age': 45, 
    'Gender': 'Male', 
    'Blood Type': 'A+', 
    'Medical Condition': 'Diabetes', 
    'Admission Type': 'Urgent', 
    'Insurance Provider': 'Medicare', 
    'Room Number': 320
}

In [20]:
new_input_df = pd.DataFrame([new_input])

In [21]:
new_input_cat = encoder.transform(new_input_df[categorical_features])
new_input_num = scaler.transform(new_input_df[numerical_features])

In [22]:
new_input_processed = np.hstack((new_input_num, new_input_cat))

In [23]:
new_prediction = model.predict(new_input_processed)
print(f"Predicted Billing Amount: {new_prediction[0]}")

Predicted Billing Amount: 26557.872707931918
