  
# *final model*

# please upload test_2021.csv and training data.csv from github  [https://github.com/Ahad-2004/fraud_detection]

In [None]:
# Install required packages
!pip install flask flask-cors scikit-learn pandas numpy xgboost joblib imbalanced-learn lightgbm

# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
import xgboost as xgb
import lightgbm as lgb
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as ImbPipeline
import joblib
import warnings
warnings.filterwarnings('ignore')

def load_and_preprocess_data():
    """
    Load and preprocess the fraud detection dataset
    """
    print("Loading training data...")
    try:
        # Load your training data
        df = pd.read_csv('/content/training data.csv')
        print(f"Loaded training dataset with shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")

        # The last column in your dataset appears to be the target variable
        target_column = df.columns[-1]  # Last column as target
        print(f"Identified target column: {target_column}")

        # Check the target column values
        print(f"Target column unique values:\n{df[target_column].value_counts()}")
        print(f"Fraud rate: {df[target_column].mean():.3f}")

    except FileNotFoundError:
        print("Training data file not found at /content/training data.csv")
        return None, None

    return df, target_column

def preprocess_data(df, target_column):
    """
    Preprocess the data for model training
    """
    # Remove claim_number as it's likely an identifier
    if 'claim_number' in df.columns:
        df = df.drop('claim_number', axis=1)

    # Convert date column to numerical features
    if 'claim_date' in df.columns:
        df['claim_date'] = pd.to_datetime(df['claim_date'], errors='coerce')
        df['claim_year'] = df['claim_date'].dt.year
        df['claim_month'] = df['claim_date'].dt.month
        df['claim_day'] = df['claim_date'].dt.day
        df = df.drop('claim_date', axis=1)

    # Separate features and target
    X = df.drop([target_column], axis=1)
    y = df[target_column]

    print(f"Features shape: {X.shape}")
    print(f"Target shape: {y.shape}")

    # Identify categorical columns (non-numeric)
    categorical_columns = X.select_dtypes(include=['object']).columns
    label_encoders = {}

    print(f"Categorical columns to encode: {list(categorical_columns)}")

    # Encode categorical variables
    for col in categorical_columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))
        label_encoders[col] = le
        print(f"Encoded {col}: {le.classes_}")

    # Handle any remaining non-numeric columns by converting to numeric
    for col in X.columns:
        if not pd.api.types.is_numeric_dtype(X[col]):
            X[col] = pd.to_numeric(X[col], errors='coerce')

    # Fill any remaining NaN values
    X = X.fillna(X.mean())

    # Save label encoders
    joblib.dump(label_encoders, 'label_encoders.pkl')

    return X, y, label_encoders

def train_best_model(X_train, y_train, X_test, y_test):
    """
    Train the best performing model with hyperparameter tuning
    """
    print("Training XGBoost model with hyperparameter tuning...")

    # Define parameter grid for XGBoost
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [6, 8, 10],
        'learning_rate': [0.05, 0.1, 0.15],
        'subsample': [0.8, 0.9, 1.0],
        'colsample_bytree': [0.8, 0.9, 1.0],
        'min_child_weight': [1, 3, 5]
    }

    # Calculate scale_pos_weight for handling class imbalance
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])

    # Create XGBoost classifier with class imbalance handling
    xgb_model = xgb.XGBClassifier(
        random_state=42,
        eval_metric='auc',
        scale_pos_weight=scale_pos_weight
    )

    # Perform grid search with cross-validation
    print("Performing hyperparameter tuning...")
    grid_search = GridSearchCV(
        estimator=xgb_model,
        param_grid=param_grid,
        scoring='roc_auc',
        cv=3,  # Reduced for faster training
        n_jobs=-1,
        verbose=1
    )

    grid_search.fit(X_train, y_train)

    # Get the best model
    best_model = grid_search.best_estimator_
    print(f"Best parameters: {grid_search.best_params_}")

    # Make predictions
    y_pred = best_model.predict(X_test)
    y_prob = best_model.predict_proba(X_test)[:, 1]

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)

    print(f"Model Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC Score: {roc_auc:.4f}")

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    return best_model

def train_with_smote(X_train, y_train, X_test, y_test):
    """
    Train model with SMOTE for handling class imbalance
    """
    print("Training with SMOTE for class imbalance handling...")

    # Create SMOTE pipeline
    smote = SMOTE(random_state=42)
    X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

    print(f"Original training set shape: {X_train.shape}")
    print(f"Balanced training set shape: {X_train_balanced.shape}")
    print(f"Original fraud rate: {y_train.mean():.3f}")
    print(f"Balanced fraud rate: {y_train_balanced.mean():.3f}")

    # Calculate scale_pos_weight for balanced dataset
    scale_pos_weight = len(y_train_balanced[y_train_balanced == 0]) / len(y_train_balanced[y_train_balanced == 1])

    # Train XGBoost model on balanced data
    model = xgb.XGBClassifier(
        n_estimators=200,
        max_depth=8,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric='auc',
        scale_pos_weight=scale_pos_weight
    )

    model.fit(X_train_balanced, y_train_balanced)

    # Make predictions
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)

    print(f"SMOTE Model Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC Score: {roc_auc:.4f}")

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    return model

def train_ensemble_model(X_train, y_train, X_test, y_test):
    """
    Train an ensemble of models for better performance
    """
    print("Training ensemble model (XGBoost + LightGBM)...")

    # Calculate scale_pos_weight for handling class imbalance
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])

    # Train XGBoost model
    xgb_model = xgb.XGBClassifier(
        n_estimators=200,
        max_depth=8,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric='auc',
        scale_pos_weight=scale_pos_weight
    )
    xgb_model.fit(X_train, y_train)

    # Train LightGBM model
    lgb_model = lgb.LGBMClassifier(
        n_estimators=200,
        max_depth=8,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        objective='binary',
        metric='auc'
    )
    lgb_model.fit(X_train, y_train)

    # Get predictions from both models
    xgb_pred = xgb_model.predict_proba(X_test)[:, 1]
    lgb_pred = lgb_model.predict_proba(X_test)[:, 1]

    # Ensemble prediction (average)
    ensemble_pred_proba = (xgb_pred + lgb_pred) / 2
    ensemble_pred = (ensemble_pred_proba > 0.5).astype(int)

    # Evaluate the ensemble model
    accuracy = accuracy_score(y_test, ensemble_pred)
    precision = precision_score(y_test, ensemble_pred)
    recall = recall_score(y_test, ensemble_pred)
    f1 = f1_score(y_test, ensemble_pred)
    roc_auc = roc_auc_score(y_test, ensemble_pred_proba)

    print(f"Ensemble Model Performance:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"ROC-AUC Score: {roc_auc:.4f}")

    print("\nClassification Report:")
    print(classification_report(y_test, ensemble_pred))

    # Return both models for later use
    return xgb_model, lgb_model, ensemble_pred_proba

def main():
    """
    Main function to train the fraud detection model
    """
    print("Starting Insurance Fraud Detection Model Training...")

    # Load and preprocess data
    df, target_column = load_and_preprocess_data()
    if df is None:
        return

    X, y, label_encoders = preprocess_data(df, target_column)

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    print(f"Training set shape: {X_train.shape}")
    print(f"Test set shape: {X_test.shape}")
    print(f"Training target distribution:\n{y_train.value_counts()}")

    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Save the scaler with feature names
    scaler.feature_names_in_ = X.columns.tolist()
    joblib.dump(scaler, 'scaler.pkl')

    # Train the best model
    best_model = train_best_model(X_train_scaled, y_train, X_test_scaled, y_test)

    # Train model with SMOTE
    smote_model = train_with_smote(X_train_scaled, y_train, X_test_scaled, y_test)

    # Train ensemble model
    xgb_model, lgb_model, ensemble_pred = train_ensemble_model(X_train_scaled, y_train, X_test_scaled, y_test)

    # Save the best performing model
    joblib.dump(best_model, 'fraud_detection_model.pkl')
    print("\nModel training completed!")
    print("Best model saved as 'fraud_detection_model.pkl'")
    print("Scaler saved as 'scaler.pkl'")
    print("Label encoders saved as 'label_encoders.pkl'")

    # Test with a sample from your test data
    print("\nTesting with your test data...")
    try:
        test_df = pd.read_csv('/content/test_2021.csv')
        print(f"Loaded test data with shape: {test_df.shape}")

        # Preprocess test data
        if 'claim_number' in test_df.columns:
            test_df = test_df.drop('claim_number', axis=1)

        if 'claim_date' in test_df.columns:
            test_df['claim_date'] = pd.to_datetime(test_df['claim_date'], errors='coerce')
            test_df['claim_year'] = test_df['claim_date'].dt.year
            test_df['claim_month'] = test_df['claim_date'].dt.month
            test_df['claim_day'] = test_df['claim_date'].dt.day
            test_df = test_df.drop('claim_date', axis=1)

        # Apply label encoding to categorical columns
        for col, encoder in label_encoders.items():
            if col in test_df.columns:
                # Handle unseen labels by using the first label
                test_df[col] = test_df[col].apply(
                    lambda x: x if str(x) in encoder.classes_ else encoder.classes_[0]
                )
                test_df[col] = encoder.transform(test_df[col].astype(str))

        # Ensure all features are present and in the correct order
        expected_features = scaler.feature_names_in_
        missing_features = set(expected_features) - set(test_df.columns)

        # Add missing features with default values (mean from training)
        for feature in missing_features:
            test_df[feature] = 0  # Default value for missing features

        # Reorder columns to match training order
        test_df = test_df.reindex(columns=expected_features, fill_value=0)

        # Scale the test data
        test_scaled = scaler.transform(test_df)

        # Make predictions
        test_predictions = best_model.predict(test_scaled)
        test_probabilities = best_model.predict_proba(test_scaled)[:, 1]

        print(f"Test predictions: {test_predictions[:10]} (first 10)")
        print(f"Test probabilities: {test_probabilities[:10]} (first 10)")

        # Show some detailed results
        for i in range(min(5, len(test_predictions))):
            print(f"Sample {i+1}: Prediction={test_predictions[i]}, Probability={test_probabilities[i]:.3f}")

    except FileNotFoundError:
        print("Test data file not found at /content/test_2021.csv")

# Run the main training function
main()

In [None]:
# Install required packages
!pip install ipywidgets

import pandas as pd
import numpy as np
import joblib
from IPython.display import display, clear_output
import ipywidgets as widgets

# Load the trained model, scaler, and label encoders
try:
    model = joblib.load('fraud_detection_model.pkl')
    scaler = joblib.load('scaler.pkl')
    label_encoders = joblib.load('label_encoders.pkl')
    print("✅ Models loaded successfully!")
except Exception as e:
    print(f"❌ Error loading models: {e}")
    print("Make sure you have run the training script first and the model files exist.")

# Define categorical options
gender_options = ['M', 'F']
marital_status_options = [0.0, 1.0]
living_status_options = ['Own', 'Rent']
claim_day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
accident_site_options = ['Highway', 'Local', 'Parking Lot']
channel_options = ['Broker', 'Online', 'Phone']
vehicle_category_options = ['Compact', 'Large', 'Medium']
vehicle_color_options = ['black', 'blue', 'gray', 'other', 'red', 'silver', 'white']

# Create input widgets
age_of_driver = widgets.IntSlider(value=40, min=18, max=80, description='Age of Driver:')
gender = widgets.Dropdown(options=gender_options, value='M', description='Gender:')
marital_status = widgets.Dropdown(options=marital_status_options, value=1.0, description='Marital Status:')
safty_rating = widgets.IntSlider(value=75, min=1, max=100, description='Safety Rating:')
annual_income = widgets.IntText(value=35000, description='Annual Income ($):')
high_education_ind = widgets.Dropdown(options=[0, 1], value=1, description='High Education:')
address_change_ind = widgets.Dropdown(options=[0, 1], value=0, description='Address Change:')
living_status = widgets.Dropdown(options=living_status_options, value='Own', description='Living Status:')
zip_code = widgets.IntText(value=50001, description='ZIP Code:')
claim_day_of_week = widgets.Dropdown(options=claim_day_options, value='Monday', description='Claim Day:')
accident_site = widgets.Dropdown(options=accident_site_options, value='Local', description='Accident Site:')
past_num_of_claims = widgets.IntSlider(value=0, min=0, max=10, description='Past Claims:')
witness_present_ind = widgets.Dropdown(options=[0.0, 1.0], value=1.0, description='Witness:')
liab_prct = widgets.FloatSlider(value=0.5, min=0.0, max=1.0, step=0.01, description='Liability %:')
channel = widgets.Dropdown(options=channel_options, value='Broker', description='Channel:')
policy_report_filed_ind = widgets.Dropdown(options=[0, 1], value=1, description='Policy Filed:')
claim_est_payout = widgets.IntText(value=5000, description='Claim Payout ($):')
age_of_vehicle = widgets.FloatSlider(value=5.0, min=0.0, max=30.0, step=0.1, description='Vehicle Age:')
vehicle_category = widgets.Dropdown(options=vehicle_category_options, value='Medium', description='Category:')
vehicle_price = widgets.IntText(value=20000, description='Vehicle Price ($):')
vehicle_color = widgets.Dropdown(options=vehicle_color_options, value='white', description='Color:')
vehicle_weight = widgets.FloatText(value=3000.0, description='Vehicle Weight (lbs):')
claim_year = widgets.IntSlider(value=2022, min=2015, max=2025, description='Claim Year:')
claim_month = widgets.IntSlider(value=6, min=1, max=12, description='Claim Month:')
claim_day = widgets.IntSlider(value=15, min=1, max=31, description='Claim Day:')

# Create prediction output area
output = widgets.Output()

def predict_fraud(button):
    with output:
        clear_output()
        print("🔍 Making prediction...")

        # Get form data
        data = {
            "age_of_driver": age_of_driver.value,
            "gender": gender.value,
            "marital_status": float(marital_status.value),
            "safty_rating": safty_rating.value,
            "annual_income": float(annual_income.value),
            "high_education_ind": high_education_ind.value,
            "address_change_ind": address_change_ind.value,
            "living_status": living_status.value,
            "zip_code": zip_code.value,
            "claim_day_of_week": claim_day_of_week.value,
            "accident_site": accident_site.value,
            "past_num_of_claims": past_num_of_claims.value,
            "witness_present_ind": float(witness_present_ind.value),
            "liab_prct": float(liab_prct.value),
            "channel": channel.value,
            "policy_report_filed_ind": policy_report_filed_ind.value,
            "claim_est_payout": float(claim_est_payout.value),
            "age_of_vehicle": float(age_of_vehicle.value),
            "vehicle_category": vehicle_category.value,
            "vehicle_price": float(vehicle_price.value),
            "vehicle_color": vehicle_color.value,
            "vehicle_weight": float(vehicle_weight.value),
            "claim_year": claim_year.value,
            "claim_month": claim_month.value,
            "claim_day": claim_day.value
        }

        try:
            # Create DataFrame from input
            input_df = pd.DataFrame([data])

            # Apply label encoding to categorical columns
            for col, encoder in label_encoders.items():
                if col in input_df.columns:
                    # Handle unseen labels by using the first label
                    input_df[col] = input_df[col].apply(
                        lambda x: x if str(x) in encoder.classes_ else encoder.classes_[0]
                    )
                    input_df[col] = encoder.transform(input_df[col].astype(str))

            # Ensure all features are present and in the correct order
            expected_features = scaler.feature_names_in_
            missing_features = set(expected_features) - set(input_df.columns)

            # Add missing features with default values (mean from training)
            for feature in missing_features:
                input_df[feature] = 0  # Default value for missing features

            # Reorder columns to match training order
            input_df = input_df.reindex(columns=expected_features, fill_value=0)

            # Scale the features
            input_scaled = scaler.transform(input_df)

            # Make prediction
            prediction = model.predict(input_scaled)[0]
            probability = model.predict_proba(input_scaled)[0][1]  # Probability of fraud

            print("📊 Prediction Results:")
            print(f"Status: {'⚠️ FRAUDULENT' if prediction == 1 else '✅ GENUINE'}")
            print(f"Probability of Fraud: {probability:.3f} ({probability*100:.1f}%)")
            print(f"Confidence: {(1-probability)*100:.1f}% for genuine, {probability*100:.1f}% for fraud")

            if prediction == 1:
                print("\n🚨 ALERT: This claim is flagged as potentially fraudulent!")
                print("💡 Recommendation: Investigate this claim further")
            else:
                print("\n✅ This claim appears to be genuine.")
                print("✅ Recommendation: Standard processing can proceed")

        except Exception as e:
            print(f"❌ Error making prediction: {e}")

# Create the prediction button
predict_button = widgets.Button(description="🔍 Check for Fraud")
predict_button.on_click(predict_fraud)

# Create the UI layout
form_items = [
    widgets.HTML("<h2>🚗 Insurance Fraud Detection System</h2>"),
    widgets.HTML("<p>Enter claim details below and click 'Check for Fraud' to get prediction</p>"),
    widgets.HBox([age_of_driver, gender]),
    widgets.HBox([marital_status, safty_rating]),
    widgets.HBox([annual_income, high_education_ind]),
    widgets.HBox([address_change_ind, living_status]),
    widgets.HBox([zip_code, claim_day_of_week]),
    widgets.HBox([accident_site, past_num_of_claims]),
    widgets.HBox([witness_present_ind, liab_prct]),
    widgets.HBox([channel, policy_report_filed_ind]),
    widgets.HBox([claim_est_payout, age_of_vehicle]),
    widgets.HBox([vehicle_category, vehicle_price]),
    widgets.HBox([vehicle_color, vehicle_weight]),
    widgets.HBox([claim_year, claim_month, claim_day]),
    predict_button,
    output
]

form = widgets.VBox(form_items)
print("🎯 Interactive Fraud Detection UI Ready:")
display(form)

# Create a sample test button
def test_with_sample(button):
    with output:
        clear_output()
        print("Testing with sample data from your test dataset...")

        try:
            # Load test data
            test_df = pd.read_csv('/content/test_2021.csv')

            # Select a random sample
            sample_idx = np.random.randint(0, len(test_df))
            sample_row = test_df.iloc[sample_idx]

            print(f"Testing with sample {sample_idx+1} from test dataset:")
            print(f"Claim Number: {sample_row['claim_number']}")
            print(f"Claim Date: {sample_row['claim_date']}")
            print(f"Claim Payout: ${sample_row['claim_est_payout']}")

            # Prepare data for prediction
            data = {
                "age_of_driver": int(sample_row['age_of_driver']),
                "gender": str(sample_row['gender']),
                "marital_status": float(sample_row['marital_status']),
                "safty_rating": int(sample_row['safty_rating']),
                "annual_income": float(sample_row['annual_income']),
                "high_education_ind": int(sample_row['high_education_ind']),
                "address_change_ind": int(sample_row['address_change_ind']),
                "living_status": str(sample_row['living_status']),
                "zip_code": int(sample_row['zip_code']),
                "claim_day_of_week": str(sample_row['claim_day_of_week']),
                "accident_site": str(sample_row['accident_site']),
                "past_num_of_claims": int(sample_row['past_num_of_claims']),
                "witness_present_ind": float(sample_row['witness_present_ind']),
                "liab_prct": float(sample_row['liab_prct']),
                "channel": str(sample_row['channel']),
                "policy_report_filed_ind": int(sample_row['policy_report_filed_ind']),
                "claim_est_payout": float(sample_row['claim_est_payout']),
                "age_of_vehicle": float(sample_row['age_of_vehicle']),
                "vehicle_category": str(sample_row['vehicle_category']),
                "vehicle_price": float(sample_row['vehicle_price']),
                "vehicle_color": str(sample_row['vehicle_color']),
                "vehicle_weight": float(sample_row['vehicle_weight']),
                "claim_year": 2021,  # Default for test data
                "claim_month": 1,    # Default for test data
                "claim_day": 1       # Default for test data
            }

            # Create DataFrame from input
            input_df = pd.DataFrame([data])

            # Apply label encoding to categorical columns
            for col, encoder in label_encoders.items():
                if col in input_df.columns:
                    # Handle unseen labels by using the first label
                    input_df[col] = input_df[col].apply(
                        lambda x: x if str(x) in encoder.classes_ else encoder.classes_[0]
                    )
                    input_df[col] = encoder.transform(input_df[col].astype(str))

            # Ensure all features are present and in the correct order
            expected_features = scaler.feature_names_in_
            missing_features = set(expected_features) - set(input_df.columns)

            # Add missing features with default values (mean from training)
            for feature in missing_features:
                input_df[feature] = 0  # Default value for missing features

            # Reorder columns to match training order
            input_df = input_df.reindex(columns=expected_features, fill_value=0)

            # Scale the features
            input_scaled = scaler.transform(input_df)

            # Make prediction
            prediction = model.predict(input_scaled)[0]
            probability = model.predict_proba(input_scaled)[0][1]  # Probability of fraud

            print("\n📊 Prediction Results:")
            print(f"Status: {'⚠️ FRAUDULENT' if prediction == 1 else '✅ GENUINE'}")
            print(f"Probability of Fraud: {probability:.3f} ({probability*100:.1f}%)")
            print(f"Confidence: {(1-probability)*100:.1f}% for genuine, {probability*100:.1f}% for fraud")

            if prediction == 1:
                print("\n🚨 ALERT: This claim is flagged as potentially fraudulent!")
            else:
                print("\n✅ This claim appears to be genuine.")

        except Exception as e:
            print(f"❌ Error testing with sample: {e}")

# Create sample test button
sample_button = widgets.Button(description="🎲 Test with Sample Data")
sample_button.on_click(test_with_sample)

# Add sample button to the form
form_items_with_sample = [
    widgets.HTML("<h2>🚗 Insurance Fraud Detection System</h2>"),
    widgets.HTML("<p>Enter claim details below and click 'Check for Fraud' to get prediction</p>"),
    widgets.HBox([age_of_driver, gender]),
    widgets.HBox([marital_status, safty_rating]),
    widgets.HBox([annual_income, high_education_ind]),
    widgets.HBox([address_change_ind, living_status]),
    widgets.HBox([zip_code, claim_day_of_week]),
    widgets.HBox([accident_site, past_num_of_claims]),
    widgets.HBox([witness_present_ind, liab_prct]),
    widgets.HBox([channel, policy_report_filed_ind]),
    widgets.HBox([claim_est_payout, age_of_vehicle]),
    widgets.HBox([vehicle_category, vehicle_price]),
    widgets.HBox([vehicle_color, vehicle_weight]),
    widgets.HBox([claim_year, claim_month, claim_day]),
    widgets.HBox([predict_button, sample_button]),
    output
]

form_with_sample = widgets.VBox(form_items_with_sample)
print("🎯 Interactive Fraud Detection UI with Sample Testing:")
display(form_with_sample)

In [None]:
import joblib
from google.colab import files

# Download the best performing model (XGBoost with hyperparameter tuning)
files.download('fraud_detection_model.pkl')
files.download('scaler.pkl')
files.download('label_encoders.pkl')

print("✅ Downloaded models:")
print("- fraud_detection_model.pkl (XGBoost model)")
print("- scaler.pkl (Feature scaler)")
print("- label_encoders.pkl (Categorical encoders)")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Downloaded models:
- fraud_detection_model.pkl (XGBoost model)
- scaler.pkl (Feature scaler)
- label_encoders.pkl (Categorical encoders)
