In [None]:
# Minimal reproducible example using sklearn and synthetic data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# generate a small synthetic dataset (replace with real data)
rng = np.random.RandomState(0)
n = 300
X = rng.normal(size=(n,5))
# create a survival-like binary target correlated with X[:,0]
y = (X[:,0] + 0.3*X[:,1] + rng.normal(scale=0.6,size=n) > 0).astype(int)
df = pd.DataFrame(X, columns=[f'f{i}' for i in range(5)])
df['survived'] = y

df.head()

In [None]:
# Train/test split and a simple RandomForest classifier
X = df.drop(columns='survived')
y = df['survived']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print('Accuracy:', accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

## Next steps
- Replace synthetic data with the actual dataset (CSV)
- Add feature engineering, cross-validation & hyperparameter tuning
- Add explanations, charts and model interpretability (SHAP/LIME)