In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import xgboost as xgb

In [4]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
test_passenger_ids = test_df["PassengerId"]

In [6]:
combined = pd.concat([train_df, test_df], sort=False)

In [8]:
combined['Age'] = combined['Age'].fillna(combined['Age'].median())
combined['Fare'] = combined['Fare'].fillna(combined['Fare'].median())
combined['Embarked'] = combined['Embarked'].fillna(combined['Embarked'].mode()[0])

# Handle missing values

In [10]:
combined['Title'] = combined['Name'].apply(lambda x: x.split(',')[1].split('.')[0].strip())
combined['FamilySize'] = combined['SibSp'] + combined['Parch'] + 1
combined['IsAlone'] = (combined['FamilySize'] == 1).astype(int)

In [12]:
combined.drop(['Cabin', 'Ticket', 'Name'], axis=1, inplace=True)

In [14]:
label_cols = ['Sex', 'Embarked', 'Title']
for col in label_cols:
    le = LabelEncoder()
    combined[col] = le.fit_transform(combined[col])

In [16]:
train = combined[:len(train_df)]
test = combined[len(train_df):]

In [18]:
X = train.drop(['Survived', 'PassengerId'], axis=1)
y = train['Survived']
X_test_final = test.drop(['Survived', 'PassengerId'], axis=1)

In [20]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [24]:
# Predict on validation set and check accuracy
val_predictions = model.predict(X_val)
accuracy = accuracy_score(y_val, val_predictions)
print(f"✅ Validation Accuracy: {accuracy:.4f}")


✅ Validation Accuracy: 0.8045


In [26]:
model.fit(X, y)
predictions = model.predict(X_test_final)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [28]:
submission = pd.DataFrame({
    "PassengerId": test_passenger_ids,
    "Survived": predictions
})

In [30]:
# Save to CSV
submission.to_csv("submission.csv", index=False)
print("✅ Submission file created: submission.csv")

✅ Submission file created: submission.csv
