# Experiment 005: Final Ensemble (Majority Vote Top 3)

## Strategy
- Use majority vote of top 3 experiments (exp_000, exp_001, exp_002)
- Exclude exp_003 (XGBoost) which failed badly on LB
- This changes only 2 predictions from exp_000 (best LB)

## Submission History
- exp_000: LB 0.7799 (BEST)
- exp_001: LB 0.7727
- exp_002: LB 0.7703
- exp_003: LB 0.7584 (WORST - excluded)

In [None]:
import pandas as pd
import numpy as np

# Load predictions from top 3 experiments
exp_000 = pd.read_csv('/home/code/submission_candidates/candidate_000.csv')
exp_001 = pd.read_csv('/home/code/submission_candidates/candidate_001.csv')
exp_002 = pd.read_csv('/home/code/submission_candidates/candidate_002.csv')

print("Prediction distributions:")
print(f"exp_000 (LB 0.7799): {exp_000['Survived'].value_counts().to_dict()}")
print(f"exp_001 (LB 0.7727): {exp_001['Survived'].value_counts().to_dict()}")
print(f"exp_002 (LB 0.7703): {exp_002['Survived'].value_counts().to_dict()}")

In [None]:
# Create majority vote ensemble (top 3 only)
vote_sum = exp_000['Survived'] + exp_001['Survived'] + exp_002['Survived']
majority_vote = (vote_sum >= 2).astype(int)

print(f"\nMajority vote distribution: {pd.Series(majority_vote).value_counts().to_dict()}")
print(f"\nAgreement with exp_000 (best LB): {(majority_vote == exp_000['Survived']).sum()}/418")
print(f"Differences from exp_000: {(majority_vote != exp_000['Survived']).sum()}")

In [None]:
# Create submission
submission = pd.DataFrame({
    'PassengerId': exp_000['PassengerId'],
    'Survived': majority_vote
})

submission.to_csv('/home/submission/submission.csv', index=False)
print(f"\nSubmission saved: {len(submission)} predictions")
print(f"Distribution: {submission['Survived'].value_counts().to_dict()}")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 005 SUMMARY: Majority Vote Ensemble (Top 3)")
print("="*60)
print("\nEnsemble of: exp_000, exp_001, exp_002")
print("Excluded: exp_003 (XGBoost - failed badly on LB)")
print(f"\nChanges from exp_000 (best LB): 2 predictions")
print("\nThis is a LOW-RISK ensemble that stays close to our best model.")
print("\nNOTE: Target of 1.0 is IMPOSSIBLE for Titanic.")
print("Best realistic scores are 80-82%.")
print("Our best LB is 0.7799 (77.99%).")
print("="*60)