In [24]:
import pandas as pd
import numpy as np
import os

In [3]:
data = pd.read_csv(os.path.join(os.getcwd(), "titanic_data","train.csv"))
test = pd.read_csv(os.path.join(os.getcwd(), "titanic_data","test.csv"))
test_ids = test["PassengerId"]

In [4]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
def clean(data):
    data = data.drop(["Ticket", "Cabin", "Name", "PassengerId"], axis=1)
    
    cols = ["SibSp", "Parch", "Fare", "Age"]
    for col in cols:
        data[col].fillna(data[col].median(), inplace=True)
        
    data.Embarked.fillna("U", inplace=True)
    return data

In [6]:
data = clean(data)
test = clean(test)

In [7]:
data.head(5)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [10]:
from sklearn import preprocessing

In [11]:
label_encoder = preprocessing.LabelEncoder()

cols = ["Sex", "Embarked"]

for col in cols:
    data[col] = label_encoder.fit_transform(data[col])
    test[col] = label_encoder.transform(test[col])
    print(label_encoder.classes_)
    
data.head(5)

[0 1]
[0 1 2 3]


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

y = data["Survived"]
X = data.drop("Survived", axis=1)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
classifier = LogisticRegression(random_state=0, max_iter=1000).fit(X_train, y_train)

In [14]:
predictions = classifier.predict(X_val)

In [15]:
from sklearn.metrics import accuracy_score
accuracy_score(y_val, predictions)

0.8100558659217877

In [16]:
submission_preds = classifier.predict(test)

In [17]:
df = pd.DataFrame({"PassengerId":test_ids.values, 
                   "Survived":submission_preds,
                  })

In [18]:
df.to_csv("submission.csv", index=False)

In [19]:
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

In [48]:
random_forest_classifier = RandomForestClassifier(min_samples_split=50, random_state=42, max_depth=4)

In [65]:
y_train = np.array(y_train).reshape(-1,1)
X_train = np.array(X_train)
y_val = np.array(y_val).reshape(-1,1)
X_val = np.array(X_val)
print(X_train.shape)

(712, 7)


In [50]:
random_forest_classifier.fit(X_train, y_train)
predictions_forest = random_forest_classifier.predict(X_val)

  random_forest_classifier.fit(X_train, y_train)


In [51]:
accuracy_score(y_val, predictions_forest)

0.8156424581005587

In [40]:
from xgboost import XGBClassifier

In [41]:
xgb_model = XGBClassifier(random_state = 42)

In [70]:
# make a dictionary of hyparameter values to search --> this is called search space
search_space = {
    "n_estimators": [25, 50, 100, 200, 250],
    "max_depth": [2, 4, 6, 8],
    "gamma": [0.01, 0.1], # minimum amount of loss (info gain) required to do more splits
    "learning_rate": [0.001, 0.01, 0.1, 1]
}

In [71]:
from sklearn.model_selection import GridSearchCV

In [72]:
GS = GridSearchCV(estimator=xgb_model, # The model that we want to find the hyperparameters for
                  param_grid=search_space, # The search space that has the hyperparameters to try out
                  scoring = ["r2", "neg_root_mean_squared_error"], # This is how we will evaluate the models and get the best one
                  refit = "r2",
                  cv = 5, # k-fold cross validation
                  verbose = 4)

In [73]:
GS.fit(X_train, y_train)

Fitting 5 folds for each of 160 candidates, totalling 800 fits
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=2, n_estimators=25; neg_root_mean_squared_error: (test=-0.473) r2: (test=0.048) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=2, n_estimators=25; neg_root_mean_squared_error: (test=-0.410) r2: (test=0.286) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=2, n_estimators=25; neg_root_mean_squared_error: (test=-0.510) r2: (test=-0.114) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=2, n_estimators=25; neg_root_mean_squared_error: (test=-0.467) r2: (test=0.067) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=2, n_estimators=25; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.163) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=2, n_estimators=50; neg_root_mean_squared_error: (test=-0.473) r2: (test=0.048) total time=   0.0s
[CV 2/

[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=50; neg_root_mean_squared_error: (test=-0.442) r2: (test=0.167) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=50; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=50; neg_root_mean_squared_error: (test=-0.482) r2: (test=0.007) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=50; neg_root_mean_squared_error: (test=-0.428) r2: (test=0.217) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=50; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.163) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=100; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimato

[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.496) r2: (test=-0.054) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.482) r2: (test=0.007) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.133) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.450) r2: (test=0.137) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.460) r2: (test=0.097) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=2, n_estimator

[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.392) r2: (test=0.345) total time=   0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.436) r2: (test=0.187) total time=   0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.420) r2: (test=0.247) total time=   0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.163) total time=   0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators=250; neg_root_mean_squared_error: (test=-0.410) r2: (test=0.286) total time=   0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=6, n_estimators

[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.283) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.392) r2: (test=0.345) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.410) r2: (test=0.286) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.428) r2: (test=0.217) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.420) r2: (test=0.247) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.394) r2: (test=0.343) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=4, n_estimators=25; ne

[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=250; neg_root_mean_squared_error: (test=-0.428) r2: (test=0.223) total time=   0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.435) r2: (test=0.197) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.435) r2: (test=0.197) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.436) r2: (test=0.187) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.278) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.420) r2: (test=0.253) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=8, n_estimators=50; neg_roo

[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=25; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.157) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=25; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.133) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=50; neg_root_mean_squared_error: (test=-0.458) r2: (test=0.107) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=50; neg_root_mean_squared_error: (test=-0.488) r2: (test=-0.012) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=50; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.127) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=50; neg_root_mean_squared_error: (test=-0.467) r2: (test=0.067) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=1, max_depth=4, n_estimators=50; neg_root_mean_squared

[CV 5/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=50; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.163) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=100; neg_root_mean_squared_error: (test=-0.466) r2: (test=0.078) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=100; neg_root_mean_squared_error: (test=-0.480) r2: (test=0.018) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=100; neg_root_mean_squared_error: (test=-0.475) r2: (test=0.037) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=100; neg_root_mean_squared_error: (test=-0.460) r2: (test=0.097) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=100; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.163) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=8, n_estimators=200; neg_root_mean_sq

[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators=100; neg_root_mean_squared_error: (test=-0.467) r2: (test=0.067) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators=100; neg_root_mean_squared_error: (test=-0.460) r2: (test=0.104) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators=200; neg_root_mean_squared_error: (test=-0.426) r2: (test=0.226) total time=   0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators=200; neg_root_mean_squared_error: (test=-0.401) r2: (test=0.316) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators=200; neg_root_mean_squared_error: (test=-0.467) r2: (test=0.067) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators=200; neg_root_mean_squared_error: (test=-0.467) r2: (test=0.067) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=4, n_estimators

[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators=200; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators=200; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.157) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators=200; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.278) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators=200; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.283) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators=250; neg_root_mean_squared_error: (test=-0.442) r2: (test=0.167) total time=   0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators=250; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=8, n_estimators

[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=4, n_estimators=250; neg_root_mean_squared_error: (test=-0.392) r2: (test=0.345) total time=   0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=4, n_estimators=250; neg_root_mean_squared_error: (test=-0.436) r2: (test=0.187) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=4, n_estimators=250; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.127) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=4, n_estimators=250; neg_root_mean_squared_error: (test=-0.402) r2: (test=0.313) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=25; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=25; neg_root_mean_squared_error: (test=-0.401) r2: (test=0.316) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=25; neg_

[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=50; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.127) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=50; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.133) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.418) r2: (test=0.256) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.401) r2: (test=0.316) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.278) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=100; neg_root_mean_squared_error: (test=-0.428) r2: (test=0.217) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=2, n_estimators=100; neg_root_m

[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=100; neg_root_mean_squared_error: (test=-0.402) r2: (test=0.308) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=100; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.283) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.435) r2: (test=0.197) total time=   0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.450) r2: (test=0.137) total time=   0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.452) r2: (test=0.127) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=200; neg_root_mean_squared_error: (test=-0.411) r2: (test=0.278) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=200; neg_root

[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.428) r2: (test=0.217) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.482) r2: (test=0.007) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=200; neg_root_mean_squared_error: (test=-0.436) r2: (test=0.193) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.480) r2: (test=0.018) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.442) r2: (test=0.167) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=250; neg_root_mean_squared_error: (test=-0.428) r2: (test=0.217) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=2, n_estimators=250; neg_root_mean_squared_

[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=6, n_estimators=250; neg_root_mean_squared_error: (test=-0.475) r2: (test=0.037) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=6, n_estimators=250; neg_root_mean_squared_error: (test=-0.420) r2: (test=0.253) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.426) r2: (test=0.226) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.480) r2: (test=0.018) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.444) r2: (test=0.157) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=8, n_estimators=25; neg_root_mean_squared_error: (test=-0.489) r2: (test=-0.024) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=8, n_estimators=25; neg_root_mean_squared_erro

GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     feature_types=None, gamma=0.1, gpu_id=None,
                                     grow_policy=None, importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=0.1, ma...
                                     max_leaves=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, 

In [74]:
print(GS.best_score_)
print(GS.best_params)

0.2877177898695812
{'gamma': 0.01, 'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 250}


In [75]:
xgb_model = XGBClassifier(random_state=42, 
                          gamma=0.01, 
                          learning_rate=0.1, 
                          max_depth=2, 
                          n_estimators=250)

In [76]:
xgb_model.fit(X_train, y_train, 
              early_stopping_rounds=10,
              eval_set=[(X_val, y_val)])

[0]	validation_0-logloss:0.65472
[1]	validation_0-logloss:0.62374
[2]	validation_0-logloss:0.59855
[3]	validation_0-logloss:0.57795
[4]	validation_0-logloss:0.55894
[5]	validation_0-logloss:0.54481
[6]	validation_0-logloss:0.53110
[7]	validation_0-logloss:0.52060
[8]	validation_0-logloss:0.51054
[9]	validation_0-logloss:0.50306
[10]	validation_0-logloss:0.49681
[11]	validation_0-logloss:0.49035
[12]	validation_0-logloss:0.48597
[13]	validation_0-logloss:0.48111
[14]	validation_0-logloss:0.47867
[15]	validation_0-logloss:0.47450
[16]	validation_0-logloss:0.47120
[17]	validation_0-logloss:0.46816
[18]	validation_0-logloss:0.46506
[19]	validation_0-logloss:0.46251
[20]	validation_0-logloss:0.46022
[21]	validation_0-logloss:0.45773
[22]	validation_0-logloss:0.45613
[23]	validation_0-logloss:0.45425
[24]	validation_0-logloss:0.45071
[25]	validation_0-logloss:0.44906
[26]	validation_0-logloss:0.44724
[27]	validation_0-logloss:0.44641
[28]	validation_0-logloss:0.44427
[29]	validation_0-loglos



[32]	validation_0-logloss:0.43767
[33]	validation_0-logloss:0.43605
[34]	validation_0-logloss:0.43535
[35]	validation_0-logloss:0.43431
[36]	validation_0-logloss:0.43305
[37]	validation_0-logloss:0.43251
[38]	validation_0-logloss:0.43175
[39]	validation_0-logloss:0.43164
[40]	validation_0-logloss:0.43105
[41]	validation_0-logloss:0.42832
[42]	validation_0-logloss:0.42757
[43]	validation_0-logloss:0.42665
[44]	validation_0-logloss:0.42623
[45]	validation_0-logloss:0.42625
[46]	validation_0-logloss:0.42563
[47]	validation_0-logloss:0.42483
[48]	validation_0-logloss:0.42440
[49]	validation_0-logloss:0.42405
[50]	validation_0-logloss:0.42338
[51]	validation_0-logloss:0.42352
[52]	validation_0-logloss:0.42296
[53]	validation_0-logloss:0.42083
[54]	validation_0-logloss:0.42050
[55]	validation_0-logloss:0.42067
[56]	validation_0-logloss:0.42058
[57]	validation_0-logloss:0.42018
[58]	validation_0-logloss:0.41957
[59]	validation_0-logloss:0.41926
[60]	validation_0-logloss:0.41948
[61]	validatio

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=0.01, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=2, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=250, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=42, ...)

In [77]:
xgb_model.best_iteration

96

In [78]:
print(f"Metrics train:\n\tAccuracy score: {accuracy_score(xgb_model.predict(X_train),y_train):.4f} \
\nMetrics test:\n\tAccuracy score: {accuracy_score(xgb_model.predict(X_val),y_val):.4f}")

Metrics train:
	Accuracy score: 0.8567 
Metrics test:
	Accuracy score: 0.8212


In [79]:
submission_preds = xgb_model.predict(test)

In [80]:
df = pd.DataFrame({"PassengerId":test_ids.values, 
                   "Survived":submission_preds,
                  })

In [81]:
df.to_csv("submission2.csv", index=False)