In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Load Data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Combine datasets for easier preprocessing
full_df = pd.concat([train_df, test_df], sort=False)

# Basic Cleaning and Feature Engineering
full_df['Sex'] = full_df['Sex'].map({'male': 0, 'female': 1})
full_df['Embarked'] = full_df['Embarked'].fillna('S')
full_df['Embarked'] = full_df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
full_df['Fare'] = full_df['Fare'].fillna(full_df['Fare'].median())
full_df['Age'] = full_df['Age'].fillna(full_df['Age'].median())

# Features to Use
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']

# Split the data again
X_train = full_df.loc[full_df['Survived'].notnull(), features]
y_train = full_df.loc[full_df['Survived'].notnull(), 'Survived']
X_test = full_df.loc[full_df['Survived'].isnull(), features]

# Build the Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
predictions = model.predict(X_test)

# Prepare Submission File
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived': predictions
})

submission.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' saved!")


Submission file 'submission.csv' saved!


In [3]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Combine for consistent preprocessing
full_df = pd.concat([train_df, test_df], sort=False)

# Handle missing values and convert categories
full_df['Sex'] = full_df['Sex'].map({'male': 0, 'female': 1})
full_df['Embarked'] = full_df['Embarked'].fillna('S')
full_df['Embarked'] = full_df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
full_df['Fare'] = full_df['Fare'].fillna(full_df['Fare'].median())
full_df['Age'] = full_df['Age'].fillna(full_df['Age'].median())

# Features to use
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']

# Prepare training and test sets
X_train = full_df.loc[full_df['Survived'].notnull(), features]
y_train = full_df.loc[full_df['Survived'].notnull(), 'Survived']
X_test  = full_df.loc[full_df['Survived'].isnull(), features]

# Create XGBoost model
model = xgb.XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss', random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on test set
predictions = model.predict(X_test)

# Save predictions
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived': predictions
})

submission.to_csv('xgb_submission.csv', index=False)
print(" XGBoost predictions saved to 'xgb_submission.csv'")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ XGBoost predictions saved to 'xgb_submission.csv'
