In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv("health_insurance_claims.csv")  # Update with actual dataset path

# Define features and labels
X = df.drop(columns=["Claim_Status"])  # 'Claim_Status' should be the target variable
y = df["Claim_Status"]

# Convert categorical features if needed
X = pd.get_dummies(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost model
model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=2),
    n_estimators=100,
    learning_rate=0.1,
    random_state=42
)
model.fit(X_train, y_train)

# Predictions on test set
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Prediction for new claim
new_claim = pd.DataFrame([{
    "Age": 45,
    "BMI": 28.5,
    "Smoker": "Yes",
    "Claim_Amount": 5000,
    "Hospital_Visits": 2
}])  # Modify based on actual dataset features

# Convert categorical features
new_claim = pd.get_dummies(new_claim)

# Align new data with training columns
new_claim = new_claim.reindex(columns=X.columns, fill_value=0)

# Predict claim status
predicted_status = model.predict(new_claim)[0]
print(f"Predicted Claim Status: {predicted_status}")



Model Accuracy: 0.75
Predicted Claim Status: Rejected
