In [55]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

from xgboost import XGBClassifier

In [30]:
train_data = pd.read_csv('train.csv', index_col='PassengerId')
test_data = pd.read_csv('test.csv', index_col='PassengerId')

In [31]:
X_train = train_data.drop(columns='Survived')
y_train = train_data['Survived']

In [52]:
X_test = test_data.drop(columns='Survived')
y_test = test_data['Survived']

In [68]:
num_pipeline = Pipeline([('imputer',SimpleImputer(strategy = 'mean')),('scaler',StandardScaler())])
cat_pipeline = Pipeline([('imputer',SimpleImputer(strategy = 'most_frequent')) , ('1hot', OneHotEncoder())])

preprocessor = ColumnTransformer([('numerical', num_pipeline, ['Age', 'SibSp', 'Parch','Fare']), ('categorical', cat_pipeline, ['Pclass', 'Sex', 'Embarked'])])

model_xgb = Pipeline([('preprocessor', preprocessor), ('xgb', XGBClassifier())])

In [45]:
model_xgb.fit(X_train, y_train)

In [46]:
cross_val_score(model_xgb, X_train, y_train, cv = 5).mean()

0.8125980792166217

In [53]:
accuracy_score(model_xgb.predict(X_test), y_test)

0.7440191387559809

In [72]:
params = [{'xgb__max_depth':[2,4,6,8],
           'xgb__learning_rate':[0.05, 0.1, 0.15, 0.2],
           'xgb__gamma':[0.05, 0.1, 0.15, 0.2]}]

grd_search = GridSearchCV(model_xgb, param_grid=params, cv = 3)

In [73]:
grd_search.fit(X_train, y_train)

In [74]:
grd_search.best_params_

{'xgb__gamma': 0.15, 'xgb__learning_rate': 0.15, 'xgb__max_depth': 6}

In [76]:
best_xgb_model = grd_search.best_estimator_

In [84]:
best_xgb_model.fit(X_train, y_train)

In [85]:
cross_val_score(best_xgb_model, X_train, y_train, cv=3).mean()

0.8271604938271606

In [86]:
accuracy_score(y_test, best_xgb_model.predict(X_test))

0.7535885167464115