In [1]:
import numpy as np 
import pandas as pd 



submission_filepath = '/kaggle/input/titanic/gender_submission.csv'
test_filepath = '/kaggle/input/titanic/test.csv'
train_filepath = '/kaggle/input/titanic/train.csv'
test_data_unmerged = pd.read_csv(test_filepath)
train_data = pd.read_csv(train_filepath)
submission = pd.read_csv(submission_filepath)

test_data = pd.merge(test_data_unmerged, submission, on = 'PassengerId', how = 'inner')


****Making Features and preparing data for classification model****

In [2]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


# Split features and target
X_train = train_data.drop('Survived', axis=1)
y_train = train_data['Survived']
X_test = test_data.drop('Survived', axis=1)
y_test = test_data['Survived']

# Identify column types
categorical_features = ['Sex', 'Pclass']
numeric_features = ['Age', 'Fare']

#preprocess data
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numeric_features)
    ]
)



In [3]:
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline

#making model
xgb_model = XGBClassifier(
    n_estimators=200,     
    learning_rate=0.1,    
    max_depth=5,         
    eval_metric='logloss',
    use_label_encoder=False
)


pipeline = Pipeline(steps = [('preproscessing', preprocessor) ,
                             ('model' , xgb_model)]
                   )


pipeline.fit(X_train, y_train)

y_prediction = pipeline.predict(X_test)

In [4]:
from sklearn.metrics import accuracy_score

accuracy_score = accuracy_score(y_test, y_prediction)

print(accuracy_score)

0.8564593301435407


In [5]:
submission = pd.DataFrame({'PasengerId': test_data.PassengerId, 'Survived' : y_prediction})

submission.to_csv('submission.csv', index = False)

