# Insurance Challenge 3 - Baseline Submission

This notebook provides a simple baseline for **Insurance Challenge 3: Fraud Detection**.

**Goal**: Predict `FraudLabel` (0/1) for each insurance claim
**Metric**: Macro-F1 Score - Higher is better

## Instructions:
1. **Replace API credentials** in the first cell with your team's API key and name
2. **Run all cells** to generate and submit baseline predictions
3. **Check the output** for your submission score

This baseline uses only tabular claim data with a simple Random Forest classifier.


In [None]:
# 1. Initialize Client and Load Data
!pip install xgboost


import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, IsolationForest
from agentds import BenchmarkClient

# üîë REPLACE WITH YOUR CREDENTIALS
client = BenchmarkClient(
    api_key="your-api-key-here",        # Get from your team dashboard
    team_name="your-team-name-here"     # Your exact team name
)

# Load data from PVC paths
print("üìÇ Loading Insurance Challenge 3 data...")

# Load claim data
train_claims = pd.read_csv("/home/jovyan/shared/datasets/Insurance/train_claims.csv")
test_claims = pd.read_csv("/home/jovyan/shared/datasets/Insurance/test_claims.csv")

print(f"‚úÖ Data loaded:")
print(f"   Train claims: {train_claims.shape}")
print(f"   Test claims: {test_claims.shape}")
print(f"   Train columns: {list(train_claims.columns)}")
print(f"   Test columns: {list(test_claims.columns)}")


In [None]:
# 2. Tabular-Only Baseline Model and Predictions

# From data inspection - meaningful claim features available in test data:
# ReportedDamage, NumParties, ClaimType, FraudLabel (train only)

# Select meaningful features for baseline (excluding PolicyID - just an identifier)
claim_features = ['ReportedDamage', 'NumParties', 'ClaimType']
print(f"üìä Using claim features: {claim_features}")

# Prepare training data with categorical encoding
X_train = train_claims[claim_features].copy()
# Encode categorical variable
X_train['ClaimType_encoded'] = pd.Categorical(X_train['ClaimType']).codes
X_train = X_train[['ReportedDamage', 'NumParties', 'ClaimType_encoded']].fillna(0)
y_train = train_claims['FraudLabel']  # Binary target (0/1)

# Prepare test data with same encoding
X_test = test_claims[claim_features].copy()
# Use same categorical encoding as training
X_test['ClaimType_encoded'] = pd.Categorical(X_test['ClaimType']).codes
X_test = X_test[['ReportedDamage', 'NumParties', 'ClaimType_encoded']].fillna(0)

# Train simple Random Forest baseline
print("ü§ñ Training Random Forest classifier...")
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Create submission file (format: ClaimID,FraudLabel)
submission_df = pd.DataFrame({
    'ClaimID': test_claims['ClaimID'],
    'FraudLabel': predictions
})

# Save predictions
submission_df.to_csv("insurance_challenge3_predictions.csv", index=False)
print(f"‚úÖ Predictions saved: {submission_df.shape[0]} predictions")
print(f"   Preview: {submission_df.head(3)}")
print(f"   Fraud rate: {predictions.mean():.3f} ({predictions.sum()} fraudulent out of {len(predictions)})")


In [None]:
# 3. Submit Predictions

# Submit predictions to the competition
print("üöÄ Submitting predictions...")

try:
    result = client.submit_prediction("Insurance", 3, "insurance_challenge3_predictions.csv")
    
    if result['success']:
        print("‚úÖ Submission successful!")
        print(f"   üìä Score: {result['score']:.4f}")
        print(f"   üìè Metric: {result['metric_name']}")
        print(f"   ‚úîÔ∏è  Validation: {'Passed' if result['validation_passed'] else 'Failed'}")
    else:
        print("‚ùå Submission failed!")
        print(f"   Error details: {result.get('details', {}).get('validation_errors', 'Unknown error')}")
        
except Exception as e:
    print(f"üí• Submission error: {e}")
    print("üîß Check your API key and team name are correct!")

print("\nüéØ Next steps:")
print("   1. Try incorporating relevant information outside this table!")
print("   2. You've completed all Insurance challenges!")
