# Import Libraries

In [1]:
import re
import pandas as pd 
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split


In [2]:
pd.set_option('display.max_rows', None)

# Load the Data

In [3]:
data = pd.read_csv('new_data_v2.csv')

# Getting to Know the Data

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 889 entries, 0 to 888
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  889 non-null    int64  
 1   Survived     889 non-null    int64  
 2   Pclass       889 non-null    int64  
 3   Sex          889 non-null    float64
 4   Age          889 non-null    float64
 5   SibSp        889 non-null    int64  
 6   Parch        889 non-null    int64  
 7   Fare         889 non-null    float64
 8   Embarked     889 non-null    float64
dtypes: float64(4), int64(5)
memory usage: 62.6 KB


Notes:

Looks like there is some missing data in the **Age** and **Cabin** and **Embarked**.

# Setting up and Using Several Machine Learning Methods

In [5]:
features = [x for x in data.columns if x != 'Survived' if x != 'PassengerId']

In [6]:
print(f"Total Number of Features: {len(features)}")

Total Number of Features: 7


In [7]:
x = data[features]
y = data['Survived']

In [8]:
x_train, x_valid, y_train, y_valid = train_test_split(x, y, train_size=0.8, test_size=0.2, random_state=42)

A succes rate for being able to correctly predict the survival of the Titanic passangers will be used to determine the quaility of the ML model.

In [9]:
def success_rate(preds, y_valid):
    count = 0
    for pred, val in zip(preds, y_valid):
        if pred == val:
            count += 1
    return count/len(preds)

## Using the Gradient Boosting Classifier

In [10]:
from sklearn.ensemble import GradientBoostingClassifier

In [18]:
GB_model = GradientBoostingClassifier(learning_rate=0.12689473684210525, n_estimators=250, subsample=0.12, random_state=42)
GB_model.fit(x_train, y_train)
GB_preds = GB_model.predict(x_valid)

In [19]:
print(f"The Success Rate of the Gradient Boosting Classifier is {success_rate(GB_preds, y_valid)*100:0.0f}%.")

The Success Rate of the Gradient Boosting Classifier is 87%.


In [13]:
def booster(xs, ys, zs, x_train, y_train, x_valid, y_valid):
    check = []
    for x in xs:
        for y in ys:
            for z in zs:
                GB_model = GradientBoostingClassifier(learning_rate=x, n_estimators=int(z), subsample=y, random_state=42)
                GB_model.fit(x_train, y_train)
                GB_preds = GB_model.predict(x_valid)
                print(success_rate(GB_preds, y_valid), x, y, z)
                check.append([success_rate(GB_preds, y_valid), x, y, z])
    
    maxi = max([x[0] for x in check])
    for res in check:
        if res[0] == maxi:
            print(f"Best Results: SR: {res[0]}%, LR: {res[1]}, SS: {res[2]}, N: {res[3]} ")
    #return max(check)

In [14]:
xs = list(np.linspace(0.001, 0.3, 20))
ys = list(np.linspace(0.01, 1, 10))
zs = list(np.linspace(50, 500, 10))

In [15]:
booster(xs, ys, zs, x_train, y_train, x_valid, y_valid)

0.6123595505617978 0.001 0.01 50.0
0.6123595505617978 0.001 0.01 100.0
0.6123595505617978 0.001 0.01 150.0
0.6123595505617978 0.001 0.01 200.0
0.6123595505617978 0.001 0.01 250.0
0.6123595505617978 0.001 0.01 300.0
0.6292134831460674 0.001 0.01 350.0
0.6685393258426966 0.001 0.01 400.0
0.7191011235955056 0.001 0.01 450.0
0.7359550561797753 0.001 0.01 500.0
0.6123595505617978 0.001 0.12 50.0
0.6123595505617978 0.001 0.12 100.0
0.6123595505617978 0.001 0.12 150.0
0.6123595505617978 0.001 0.12 200.0
0.7303370786516854 0.001 0.12 250.0
0.7921348314606742 0.001 0.12 300.0
0.8033707865168539 0.001 0.12 350.0
0.8033707865168539 0.001 0.12 400.0
0.8033707865168539 0.001 0.12 450.0
0.8033707865168539 0.001 0.12 500.0
0.6123595505617978 0.001 0.23 50.0
0.6123595505617978 0.001 0.23 100.0
0.6123595505617978 0.001 0.23 150.0
0.6123595505617978 0.001 0.23 200.0
0.7584269662921348 0.001 0.23 250.0
0.8033707865168539 0.001 0.23 300.0
0.8033707865168539 0.001 0.23 350.0
0.8033707865168539 0.001 0.23 4

0.8258426966292135 0.016736842105263158 1.0 200.0
0.8258426966292135 0.016736842105263158 1.0 250.0
0.8202247191011236 0.016736842105263158 1.0 300.0
0.8314606741573034 0.016736842105263158 1.0 350.0
0.8146067415730337 0.016736842105263158 1.0 400.0
0.8202247191011236 0.016736842105263158 1.0 450.0
0.8258426966292135 0.016736842105263158 1.0 500.0
0.7584269662921348 0.032473684210526314 0.01 50.0
0.7640449438202247 0.032473684210526314 0.01 100.0
0.7808988764044944 0.032473684210526314 0.01 150.0
0.7921348314606742 0.032473684210526314 0.01 200.0
0.7865168539325843 0.032473684210526314 0.01 250.0
0.7752808988764045 0.032473684210526314 0.01 300.0
0.7584269662921348 0.032473684210526314 0.01 350.0
0.7808988764044944 0.032473684210526314 0.01 400.0
0.8033707865168539 0.032473684210526314 0.01 450.0
0.7078651685393258 0.032473684210526314 0.01 500.0
0.8089887640449438 0.032473684210526314 0.12 50.0
0.8202247191011236 0.032473684210526314 0.12 100.0
0.8258426966292135 0.032473684210526314 

0.8314606741573034 0.04821052631578947 0.56 350.0
0.8202247191011236 0.04821052631578947 0.56 400.0
0.8146067415730337 0.04821052631578947 0.56 450.0
0.8202247191011236 0.04821052631578947 0.56 500.0
0.8258426966292135 0.04821052631578947 0.67 50.0
0.8146067415730337 0.04821052631578947 0.67 100.0
0.8370786516853933 0.04821052631578947 0.67 150.0
0.8258426966292135 0.04821052631578947 0.67 200.0
0.8258426966292135 0.04821052631578947 0.67 250.0
0.8258426966292135 0.04821052631578947 0.67 300.0
0.8258426966292135 0.04821052631578947 0.67 350.0
0.8314606741573034 0.04821052631578947 0.67 400.0
0.8314606741573034 0.04821052631578947 0.67 450.0
0.8314606741573034 0.04821052631578947 0.67 500.0
0.8202247191011236 0.04821052631578947 0.78 50.0
0.8146067415730337 0.04821052631578947 0.78 100.0
0.8258426966292135 0.04821052631578947 0.78 150.0
0.8370786516853933 0.04821052631578947 0.78 200.0
0.8258426966292135 0.04821052631578947 0.78 250.0
0.8370786516853933 0.04821052631578947 0.78 300.0
0.

0.8539325842696629 0.07968421052631579 0.23 200.0
0.8426966292134831 0.07968421052631579 0.23 250.0
0.8426966292134831 0.07968421052631579 0.23 300.0
0.848314606741573 0.07968421052631579 0.23 350.0
0.8370786516853933 0.07968421052631579 0.23 400.0
0.8314606741573034 0.07968421052631579 0.23 450.0
0.8202247191011236 0.07968421052631579 0.23 500.0
0.8033707865168539 0.07968421052631579 0.34 50.0
0.8089887640449438 0.07968421052631579 0.34 100.0
0.8089887640449438 0.07968421052631579 0.34 150.0
0.8202247191011236 0.07968421052631579 0.34 200.0
0.8314606741573034 0.07968421052631579 0.34 250.0
0.8314606741573034 0.07968421052631579 0.34 300.0
0.8370786516853933 0.07968421052631579 0.34 350.0
0.8258426966292135 0.07968421052631579 0.34 400.0
0.8202247191011236 0.07968421052631579 0.34 450.0
0.8146067415730337 0.07968421052631579 0.34 500.0
0.8089887640449438 0.07968421052631579 0.45 50.0
0.8146067415730337 0.07968421052631579 0.45 100.0
0.8146067415730337 0.07968421052631579 0.45 150.0
0.8

0.8033707865168539 0.09542105263157893 0.89 450.0
0.8033707865168539 0.09542105263157893 0.89 500.0
0.8258426966292135 0.09542105263157893 1.0 50.0
0.8258426966292135 0.09542105263157893 1.0 100.0
0.8202247191011236 0.09542105263157893 1.0 150.0
0.8258426966292135 0.09542105263157893 1.0 200.0
0.8258426966292135 0.09542105263157893 1.0 250.0
0.8314606741573034 0.09542105263157893 1.0 300.0
0.8258426966292135 0.09542105263157893 1.0 350.0
0.8202247191011236 0.09542105263157893 1.0 400.0
0.8089887640449438 0.09542105263157893 1.0 450.0
0.8033707865168539 0.09542105263157893 1.0 500.0
0.5955056179775281 0.1111578947368421 0.01 50.0
0.6123595505617978 0.1111578947368421 0.01 100.0
0.6123595505617978 0.1111578947368421 0.01 150.0
0.6123595505617978 0.1111578947368421 0.01 200.0
0.6123595505617978 0.1111578947368421 0.01 250.0
0.6123595505617978 0.1111578947368421 0.01 300.0
0.6123595505617978 0.1111578947368421 0.01 350.0
0.6123595505617978 0.1111578947368421 0.01 400.0
0.6123595505617978 0

0.8202247191011236 0.12689473684210525 0.56 300.0
0.8146067415730337 0.12689473684210525 0.56 350.0
0.8089887640449438 0.12689473684210525 0.56 400.0
0.8089887640449438 0.12689473684210525 0.56 450.0
0.8033707865168539 0.12689473684210525 0.56 500.0
0.8202247191011236 0.12689473684210525 0.67 50.0
0.8258426966292135 0.12689473684210525 0.67 100.0
0.8146067415730337 0.12689473684210525 0.67 150.0
0.8146067415730337 0.12689473684210525 0.67 200.0
0.8033707865168539 0.12689473684210525 0.67 250.0
0.8202247191011236 0.12689473684210525 0.67 300.0
0.8146067415730337 0.12689473684210525 0.67 350.0
0.8089887640449438 0.12689473684210525 0.67 400.0
0.8033707865168539 0.12689473684210525 0.67 450.0
0.7921348314606742 0.12689473684210525 0.67 500.0
0.8146067415730337 0.12689473684210525 0.78 50.0
0.8146067415730337 0.12689473684210525 0.78 100.0
0.8258426966292135 0.12689473684210525 0.78 150.0
0.8146067415730337 0.12689473684210525 0.78 200.0
0.8033707865168539 0.12689473684210525 0.78 250.0
0.

0.8202247191011236 0.15836842105263157 0.23 200.0
0.8258426966292135 0.15836842105263157 0.23 250.0
0.8146067415730337 0.15836842105263157 0.23 300.0
0.8258426966292135 0.15836842105263157 0.23 350.0
0.7921348314606742 0.15836842105263157 0.23 400.0
0.797752808988764 0.15836842105263157 0.23 450.0
0.797752808988764 0.15836842105263157 0.23 500.0
0.8258426966292135 0.15836842105263157 0.34 50.0
0.8258426966292135 0.15836842105263157 0.34 100.0
0.8089887640449438 0.15836842105263157 0.34 150.0
0.8089887640449438 0.15836842105263157 0.34 200.0
0.8202247191011236 0.15836842105263157 0.34 250.0
0.8258426966292135 0.15836842105263157 0.34 300.0
0.8314606741573034 0.15836842105263157 0.34 350.0
0.8089887640449438 0.15836842105263157 0.34 400.0
0.8146067415730337 0.15836842105263157 0.34 450.0
0.8258426966292135 0.15836842105263157 0.34 500.0
0.8089887640449438 0.15836842105263157 0.45 50.0
0.797752808988764 0.15836842105263157 0.45 100.0
0.8202247191011236 0.15836842105263157 0.45 150.0
0.837

0.797752808988764 0.17410526315789474 0.89 450.0
0.8033707865168539 0.17410526315789474 0.89 500.0
0.8202247191011236 0.17410526315789474 1.0 50.0
0.8202247191011236 0.17410526315789474 1.0 100.0
0.8089887640449438 0.17410526315789474 1.0 150.0
0.8314606741573034 0.17410526315789474 1.0 200.0
0.8202247191011236 0.17410526315789474 1.0 250.0
0.8258426966292135 0.17410526315789474 1.0 300.0
0.8370786516853933 0.17410526315789474 1.0 350.0
0.8146067415730337 0.17410526315789474 1.0 400.0
0.8258426966292135 0.17410526315789474 1.0 450.0
0.8202247191011236 0.17410526315789474 1.0 500.0
0.5617977528089888 0.18984210526315787 0.01 50.0
0.4943820224719101 0.18984210526315787 0.01 100.0
0.4943820224719101 0.18984210526315787 0.01 150.0
0.4943820224719101 0.18984210526315787 0.01 200.0
0.4943820224719101 0.18984210526315787 0.01 250.0
0.4943820224719101 0.18984210526315787 0.01 300.0
0.4943820224719101 0.18984210526315787 0.01 350.0
0.4943820224719101 0.18984210526315787 0.01 400.0
0.49438202247

0.8089887640449438 0.20557894736842103 0.56 200.0
0.8258426966292135 0.20557894736842103 0.56 250.0
0.7921348314606742 0.20557894736842103 0.56 300.0
0.797752808988764 0.20557894736842103 0.56 350.0
0.7921348314606742 0.20557894736842103 0.56 400.0
0.7921348314606742 0.20557894736842103 0.56 450.0
0.7865168539325843 0.20557894736842103 0.56 500.0
0.8033707865168539 0.20557894736842103 0.67 50.0
0.8258426966292135 0.20557894736842103 0.67 100.0
0.8314606741573034 0.20557894736842103 0.67 150.0
0.8202247191011236 0.20557894736842103 0.67 200.0
0.8146067415730337 0.20557894736842103 0.67 250.0
0.8033707865168539 0.20557894736842103 0.67 300.0
0.797752808988764 0.20557894736842103 0.67 350.0
0.7921348314606742 0.20557894736842103 0.67 400.0
0.8033707865168539 0.20557894736842103 0.67 450.0
0.7921348314606742 0.20557894736842103 0.67 500.0
0.8202247191011236 0.20557894736842103 0.78 50.0
0.8089887640449438 0.20557894736842103 0.78 100.0
0.8146067415730337 0.20557894736842103 0.78 150.0
0.82

0.7640449438202247 0.23705263157894735 0.23 200.0
0.7303370786516854 0.23705263157894735 0.23 250.0
0.7078651685393258 0.23705263157894735 0.23 300.0
0.6797752808988764 0.23705263157894735 0.23 350.0
0.6797752808988764 0.23705263157894735 0.23 400.0
0.702247191011236 0.23705263157894735 0.23 450.0
0.449438202247191 0.23705263157894735 0.23 500.0
0.8258426966292135 0.23705263157894735 0.34 50.0
0.797752808988764 0.23705263157894735 0.34 100.0
0.8089887640449438 0.23705263157894735 0.34 150.0
0.8146067415730337 0.23705263157894735 0.34 200.0
0.8089887640449438 0.23705263157894735 0.34 250.0
0.797752808988764 0.23705263157894735 0.34 300.0
0.8033707865168539 0.23705263157894735 0.34 350.0
0.8033707865168539 0.23705263157894735 0.34 400.0
0.8033707865168539 0.23705263157894735 0.34 450.0
0.7752808988764045 0.23705263157894735 0.34 500.0
0.7921348314606742 0.23705263157894735 0.45 50.0
0.8314606741573034 0.23705263157894735 0.45 100.0
0.8314606741573034 0.23705263157894735 0.45 150.0
0.8258

0.8089887640449438 0.2527894736842105 1.0 200.0
0.8089887640449438 0.2527894736842105 1.0 250.0
0.8146067415730337 0.2527894736842105 1.0 300.0
0.8089887640449438 0.2527894736842105 1.0 350.0
0.8089887640449438 0.2527894736842105 1.0 400.0
0.797752808988764 0.2527894736842105 1.0 450.0
0.8033707865168539 0.2527894736842105 1.0 500.0
0.43820224719101125 0.26852631578947367 0.01 50.0
0.43820224719101125 0.26852631578947367 0.01 100.0
0.38764044943820225 0.26852631578947367 0.01 150.0
0.38764044943820225 0.26852631578947367 0.01 200.0
0.38764044943820225 0.26852631578947367 0.01 250.0
0.38764044943820225 0.26852631578947367 0.01 300.0
0.38764044943820225 0.26852631578947367 0.01 350.0
0.38764044943820225 0.26852631578947367 0.01 400.0
0.38764044943820225 0.26852631578947367 0.01 450.0
0.38764044943820225 0.26852631578947367 0.01 500.0
0.7752808988764045 0.26852631578947367 0.12 50.0
0.6853932584269663 0.26852631578947367 0.12 100.0
0.4101123595505618 0.26852631578947367 0.12 150.0
0.41011

0.797752808988764 0.28426315789473683 0.56 450.0
0.7921348314606742 0.28426315789473683 0.56 500.0
0.8258426966292135 0.28426315789473683 0.67 50.0
0.8089887640449438 0.28426315789473683 0.67 100.0
0.8146067415730337 0.28426315789473683 0.67 150.0
0.8033707865168539 0.28426315789473683 0.67 200.0
0.797752808988764 0.28426315789473683 0.67 250.0
0.7865168539325843 0.28426315789473683 0.67 300.0
0.7865168539325843 0.28426315789473683 0.67 350.0
0.797752808988764 0.28426315789473683 0.67 400.0
0.797752808988764 0.28426315789473683 0.67 450.0
0.7865168539325843 0.28426315789473683 0.67 500.0
0.8258426966292135 0.28426315789473683 0.78 50.0
0.8202247191011236 0.28426315789473683 0.78 100.0
0.8089887640449438 0.28426315789473683 0.78 150.0
0.797752808988764 0.28426315789473683 0.78 200.0
0.7921348314606742 0.28426315789473683 0.78 250.0
0.797752808988764 0.28426315789473683 0.78 300.0
0.8033707865168539 0.28426315789473683 0.78 350.0
0.797752808988764 0.28426315789473683 0.78 400.0
0.8033707