In [6]:
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer


In [7]:
class FalseScaler:
    def __init__(self):
        pass

    def fit(self, X):
        return X

    def transform(self, X):
        return X

    def fit_transform(self, X):
        return X

    def inverse_transform(self, X):
        return X


In [8]:
def load_and_preprocess_data( data: str, drop=[], X_slice=slice(0, -1), y_slice=-1, x_label=[], test_size=0.2,
        y_label=False, columns_to_encode=[], columns_to_scale=[], scale_y=True, random_state=None, shuffle=True, drop_first=True):
    """load dataset from csv file and preprocess it

    Args:
        data (str): path to dataset csv file
        drop (list, optional): columns to drop from dataset. Defaults to [].
        X_slice (slice, optional): independent variables slice. Defaults to slice(0, -1).
        y_slice (int|slice, optional): dependent variables slice. Defaults to -1.
        x_label (list, optional): independent variables to be label encoded. Defaults to [].
        y_label (bool, optional): label encode dependent variables. Defaults to False.
        columns_to_encode (list, optional): columns to get_dummies. Defaults to [].
        columns_to_scale (list, optional): columns to normalize. Defaults to [].
        scale_y (bool, optional): normalize the dependent variables. Defaults to True.
        random_state (int, optional): train test split random state. Defaults to None.
        drop_first (bool, optional): drop the first dummy column. Defaults to True.
    """
    raw_dataset = pd.read_csv(data)
    dataset = raw_dataset.drop(drop, axis=1)
    X = dataset.iloc[:, X_slice]
    y = dataset.iloc[:, y_slice].values

    # X_label_encoder= ColumnTransformer([('Label',LabelEncoder(), x_labels)], remainder='passthrough')
    # X = X_label_encoder.fit_transform(X)

    # imputer_embarked  = SimpleImputer(missing_values=np.nan, strategy="most_frequent")
    # imputer_age = SimpleImputer(missing_values=np.nan, strategy="mean")

    # X['Embarked'] = imputer_embarked.fit_transform(X['Embarked'].values.reshape(-1,1))
    # X['Age'] = imputer_age.fit_transform(X['Age'].values.reshape(-1,1))
    X['Embarked'].fillna('unknown', inplace=True)
    X['Age'].fillna(29, inplace=True)
    # return X
    # print(X.isna().sum().sum())
    # X = X.dropna(axis=0)

    X_label_encoders = {}
    y_label_encoder = LabelEncoder()

    if x_label != []:
        for label in x_label:
            X_label_encoders[label] = LabelEncoder()
            X_label_encoders[label].fit(X[label])
    def X_label_encode(X_new):
        X_new = X_new.copy()
        if x_label != []:
            for label in x_label:
                # X_label_encoders[label] = LabelEncoder()
                X_new[label] = X_label_encoders[label].transform(X_new[label])
        return X_new

    X = X_label_encode(X)

    # X = X_label_encoder.fit_transform(X)
    y_label_encoder.fit(y)

    def y_label_encode(y_new):
        if y_label:
            y_new = y_label_encoder.transform(y_new)
            return y_new
        else:
            return y_new
    y = y_label_encode(y)

    # column_transformer = ColumnTransformer([('OneHotEncode', OneHotEncoder(drop='first'), columns_to_encode)], remainder='passthrough')
    # X = column_transformer.fit_transform(X)
    one_hot_encoder = OneHotEncoder(drop='first', handle_unknown='ignore')
    encoded_columns = []
    if columns_to_encode != []:
        one_hot_encoder.fit(X[columns_to_encode])
        encoded_columns = one_hot_encoder.get_feature_names_out()
    def get_dummies(X_new):
        if columns_to_encode != []:
            X_new_dummies = one_hot_encoder.transform(X_new[columns_to_encode]).toarray()
            # print(X_dummies.shape)
            # print(one_hot_encoder.get_feature_names_out())
        #     X_new = pd.get_dummies(
            X_new = pd.concat([X_new.drop(columns_to_encode,axis=1), pd.DataFrame(data=X_new_dummies,columns=encoded_columns)], axis=1)
        #         X_new, columns=columns_to_encode, drop_first=drop_first)
        return X_new
    X_dummies = get_dummies(X)


    # print(column_transformer
    column_order = X_dummies.columns.values
    
    X_train, X_test, y_train, y_test = X_dummies,X_dummies,y,y
    if test_size > 0:
        X_train, X_test, y_train, y_test = train_test_split(
            X_dummies, y, test_size=test_size, random_state=random_state,shuffle=shuffle)

    # X_scaler = ColumnTransformer(
    #         [('Scaler', StandardScaler(), columns_to_scale)], remainder='passthrough')
    # y_scaler = ColumnTransformer([('Scaler', StandardScaler(), scale_y)], remainder='passthrough')
    X_scaler = StandardScaler()
        
    y_scaler = StandardScaler() if scale_y else FalseScaler()
    X_train_scaled = X_train.copy()
    if columns_to_scale != []:
        # print(X_train)
        X_train_scaled[columns_to_scale] = X_scaler.fit_transform(X_train[columns_to_scale])
    X_train_scaled = X_train_scaled[column_order]
    X_test_scaled = X_test.copy()
    if columns_to_scale != []:
        X_test_scaled[columns_to_scale] = X_scaler.transform(X_test[columns_to_scale])
    X_test_scaled = X_test_scaled[column_order]
    y_train_scaled = y_scaler.fit_transform(y_train)
    y_test_scaled = y_scaler.transform(y_test)
    X_tf_validation, X_tf_test,y_tf_validation, y_tf_test = train_test_split(X_test_scaled,y_test, test_size=0.5)
    y_tf_validation = y_scaler.transform(y_tf_validation)

    def scaler(X_new):
        X_new_scaled = X_new.copy()
        if columns_to_scale != []:
            X_new_scaled[columns_to_scale] = X_scaler.transform(X_new[columns_to_scale])
        return X_new_scaled

    def preprocess(path):
        X_new_raw = pd.read_csv(path)
        X_new = X_new_raw.drop(drop, axis=1)
        # X_new2 = X_new_raw.drop(drop, axis=1).reset_index(drop=True)
        # print(X_new['Sex'].unique())
        X_new['Embarked'].fillna('unknown', inplace=True)
        X_new['Age'].fillna(29, inplace=True)
        X_new['Fare'].fillna(0, inplace=True)
        X_new = X_label_encode(X_new)
        X_new = get_dummies(X_new)
        # X_new.drop(['SibSp_8'],axis=1)
        X_new_scaled = scaler(X_new)
        return {
            "X_test": X_new,
            # "X_test2": X_new2,
            "X_test_raw": X_new_raw,
            "X_test_scaled": X_new_scaled
        }




    return {
        "X":X,
        # "X_raw":X_raw,
        "X_train": X_train,
        "X_dummies": X_dummies,
        "X_train_scaled": X_train_scaled,
        "X_test": X_test,
        "X_test_scaled": X_test_scaled,
        "X_tf_test": X_tf_test,
        "X_tf_validation": X_tf_validation,
        "X_scaler": X_scaler,
        # "X_label_encoder": X_label_encoder,
        "preprocess": preprocess,
        "y":y,
        "y_train": y_train,
        "y_train_scaled": y_train_scaled,
        "y_test": y_test,
        "y_test_scaled": y_test_scaled,
        "y_tf_test": y_tf_test,
        "y_tf_validation": y_tf_validation,
        "y_scaler": y_scaler,
        "y_label_encoder": y_label_encoder,
    }


In [87]:
all_features = ['PassengerId','Pclass','Name','Sex','Age','SibSp','Parch','Ticket','Fare','Cabin','Embarked']

target_feature = ['Survived']

# train_features = ['Pclass','Sex','Age','Fare']
train_features = ['Pclass','Sex','Age','SibSp','Parch']


drop_features = [feat for feat in all_features if feat not in train_features]

numerical_features = ['Age']
# numerical_features = ['Age','Fare']

label_features = ['Sex']

categorical_features = [feat for feat in train_features if feat not in numerical_features+label_features]

# categorical_features_no_label = [feat for feat in categorical_features if feat not in label_features]

print('Drop features: {}\nCategorical: {}\n'.format(drop_features,categorical_features))

Drop features: ['PassengerId', 'Name', 'Ticket', 'Fare', 'Cabin', 'Embarked']
Categorical: ['Pclass', 'SibSp', 'Parch']



In [113]:
raw_dataset = pd.read_csv('train.csv')
dataset = raw_dataset
X = dataset[train_features]
y = dataset[target_feature].values

dataset_test = pd.read_csv('test.csv')
test_data_raw_raw = dataset_test[dataset_test['Sex']=='male']
test_data_raw = test_data_raw_raw[train_features]
# test_data_raw.loc[test_data_raw['Parch']==9,'Parch'] = np.nan
# test_data_raw.iloc[342]
#

In [114]:
test_data_raw

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch
0,3,male,34.5,0,0
2,2,male,62.0,0,0
3,3,male,27.0,0,0
5,3,male,14.0,0,0
7,2,male,26.0,1,1
...,...,...,...,...,...
407,1,male,50.0,1,1
413,3,male,,0,0
415,3,male,38.5,0,0
416,3,male,,0,0


In [115]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.feature_extraction.text import _VectorizerMixin
from sklearn.feature_selection._base import SelectorMixin

num = make_pipeline(IterativeImputer(max_iter=10, random_state=0),StandardScaler())
cat = make_pipeline(SimpleImputer(missing_values=np.nan, strategy='most_frequent'),OneHotEncoder( drop='first',handle_unknown='ignore', sparse=False))

ct = ColumnTransformer([
    ('Numerical', num, numerical_features),
    ('Label', OrdinalEncoder(), label_features), 
    # ('Fill categorical', SimpleImputer(missing_values=np.nan, strategy='most_frequent'), categorical_features),
    ('Categorical', cat, categorical_features),
], remainder='passthrough')

X_ct = ct.fit_transform(X)

test_data = ct.transform(test_data_raw)

# print(X_ct)




In [116]:

def get_feature_out(estimator, feature_in):
    if hasattr(estimator,'get_feature_names'):
        if isinstance(estimator, _VectorizerMixin):
            # handling all vectorizers
            return [f'vec_{f}' \
                for f in estimator.get_feature_names_out()]
        else:
            return estimator.get_feature_names_out(feature_in)
    elif isinstance(estimator, SelectorMixin):
        return np.array(feature_in)[estimator.get_support()]
    else:
        return feature_in


def get_ct_feature_names(ct):
    # handles all estimators, pipelines inside ColumnTransfomer
    # doesn't work when remainder =='passthrough'
    # which requires the input column names.
    output_features = []

    for name, estimator, features in ct.transformers_:
        if name!='remainder':
            if isinstance(estimator, Pipeline):
                current_features = features
                for step in estimator:
                    current_features = get_feature_out(step, current_features)
                features_out = current_features
            else:
                features_out = get_feature_out(estimator, features)
            output_features.extend(features_out)
        elif estimator=='passthrough':
            output_features.extend(ct.feature_names_in_[features])
                
    return output_features

In [117]:
X_ct

array([[-0.5924806 ,  1.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.63878901,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.2846632 ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.2846632 ,  1.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.17706291,  1.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [118]:
pd.DataFrame(X_ct, 
             columns=get_ct_feature_names(ct))

Unnamed: 0,Age,Sex,Pclass_2,Pclass_3,SibSp_1,SibSp_2,SibSp_3,SibSp_4,SibSp_5,SibSp_8,Parch_1,Parch_2,Parch_3,Parch_4,Parch_5,Parch_6
0,-0.592481,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.638789,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.284663,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.407926,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.407926,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,-0.207709,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
887,-0.823344,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
888,0.000000,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
889,-0.284663,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [119]:
pd.DataFrame(test_data, 
             columns=get_ct_feature_names(ct)).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Age       266 non-null    float64
 1   Sex       266 non-null    float64
 2   Pclass_2  266 non-null    float64
 3   Pclass_3  266 non-null    float64
 4   SibSp_1   266 non-null    float64
 5   SibSp_2   266 non-null    float64
 6   SibSp_3   266 non-null    float64
 7   SibSp_4   266 non-null    float64
 8   SibSp_5   266 non-null    float64
 9   SibSp_8   266 non-null    float64
 10  Parch_1   266 non-null    float64
 11  Parch_2   266 non-null    float64
 12  Parch_3   266 non-null    float64
 13  Parch_4   266 non-null    float64
 14  Parch_5   266 non-null    float64
 15  Parch_6   266 non-null    float64
dtypes: float64(16)
memory usage: 33.4 KB


In [120]:
X_train, X_test, y_train, y_test = train_test_split(X_ct, y, test_size=0.20, random_state=0)

X_test2, X_val, y_test2, y_val = train_test_split(X_test,y_test,test_size=0.5, random_state=0)

In [121]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(random_state = 0)
classifier.fit(X_train,y_train)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[94 16]
 [24 45]]


0.776536312849162

In [122]:
from sklearn.ensemble import RandomForestClassifier
classifier2 = RandomForestClassifier(n_estimators = 1000,)
classifier2.fit(X_train, y_train)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier2.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

  classifier2.fit(X_train, y_train)


[[95 15]
 [20 49]]


0.8044692737430168

In [123]:
# Training the K-NN model on the Training set
from sklearn.neighbors import KNeighborsClassifier
classifier4 = KNeighborsClassifier(n_neighbors = 21, metric = 'minkowski', p = 2)
classifier4.fit(X_train,y_train)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier4.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[91 19]
 [22 47]]


  return self._fit(X, y)


0.770949720670391

In [124]:
from sklearn.svm import SVC
classifier3 = SVC(kernel = 'rbf')
classifier3.fit(X_train, y_train)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier3.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[98 12]
 [22 47]]


  y = column_or_1d(y, warn=True)


0.8100558659217877

In [125]:
from sklearn.svm import SVC
classifier5 = SVC(kernel = 'linear')
classifier5.fit(X_train, y_train)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier5.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[96 14]
 [22 47]]


  y = column_or_1d(y, warn=True)


0.7988826815642458

In [126]:
import tensorflow as tf
output_size = 2

batch_size = 50

input_size = 9

max_epochs = 200

hidden_layer_size = 1500

model = tf.keras.Sequential([
    # tf.keras.layers.Dense(input_size,activation='relu'),
    # tf.keras.layers.Dense(input_size,activation='sigmoid'),
    # tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    # # tf.keras.layers.Dense(hidden_layer_size,activation='sigmoid'),
    # tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    # tf.keras.layers.Dense(2,activation='relu'),
    # tf.keras.layers.Dense(hidden_layer_size/2,activation='sigmoid'),
    tf.keras.layers.Dense(2,activation='softmax'),
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stopper = tf.keras.callbacks.EarlyStopping(patience=5)

model.fit(
    X_train,
    y_train,
    epochs=max_epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
    verbose=2,
    callbacks=[early_stopper]
    )

Epoch 1/200
15/15 - 7s - loss: 0.6355 - accuracy: 0.6194 - val_loss: 0.5542 - val_accuracy: 0.7556
Epoch 2/200
15/15 - 3s - loss: 0.5067 - accuracy: 0.7921 - val_loss: 0.4591 - val_accuracy: 0.7889
Epoch 3/200
15/15 - 3s - loss: 0.4516 - accuracy: 0.8160 - val_loss: 0.4570 - val_accuracy: 0.7556
Epoch 4/200
15/15 - 3s - loss: 0.4233 - accuracy: 0.8315 - val_loss: 0.4545 - val_accuracy: 0.7667
Epoch 5/200
15/15 - 3s - loss: 0.4220 - accuracy: 0.8315 - val_loss: 0.5179 - val_accuracy: 0.7667
Epoch 6/200
15/15 - 3s - loss: 0.4337 - accuracy: 0.8174 - val_loss: 0.4724 - val_accuracy: 0.7667
Epoch 7/200
15/15 - 5s - loss: 0.4217 - accuracy: 0.8258 - val_loss: 0.5062 - val_accuracy: 0.7556
Epoch 8/200
15/15 - 2s - loss: 0.4064 - accuracy: 0.8216 - val_loss: 0.4760 - val_accuracy: 0.7556
Epoch 9/200
15/15 - 3s - loss: 0.3979 - accuracy: 0.8315 - val_loss: 0.4676 - val_accuracy: 0.7778


<keras.callbacks.History at 0x1e27b613460>

In [127]:
test_loss, test_accuracy = model.evaluate(X_test2,y_test2)
# test_loss, test_accuracy = model.evaluate(data['X_test_scaled'],data['y_test_scaled'])

print(f'Test loss: {"%.4f"% test_loss}, Accuracy: {"%.2f" % (test_accuracy *100)}%')

Test loss: 0.4288, Accuracy: 83.15%


In [128]:
y_tf_pred_raw = model.predict(test_data)
# y_tf_pred_raw = data['y_scaler'].inverse_transform(model.predict(data['X_tf_test']))
y_tf_pred = np.array([ [np.argmax(x)] for x in y_tf_pred_raw])
y_tf_pred


array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [129]:
y_tf_pred_raw.shape

(266, 2)

In [131]:
data_targets_raw = pd.read_csv('submission_perfect.csv')
data_targets = data_targets_raw[data_targets_raw['PassengerId'].isin(test_data_raw_raw['PassengerId'])]['Survived'].values.reshape(-1,1)
data_targets


array([[0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
    

In [133]:

y_tf_pred_raw = model.predict(test_data)
# y_tf_pred_raw = data['y_scaler'].inverse_transform(model.predict(data['X_tf_test']))
y_tf_pred = np.array([ [np.argmax(x)] for x in y_tf_pred_raw])

results = pd.DataFrame({
    "DecisionTree": [accuracy_score(data_targets,classifier.predict(test_data))],
    "RandomForest": [accuracy_score(data_targets,classifier2.predict(test_data))],
    "KNeigbors": [accuracy_score(data_targets,classifier4.predict(test_data))],
    "SVC -rbf": [accuracy_score(data_targets,classifier3.predict(test_data))],
    "SVC - linear": [accuracy_score(data_targets,classifier5.predict(test_data))],
    "TensorFlow": [accuracy_score(data_targets,y_tf_pred)],
})

results

Unnamed: 0,DecisionTree,RandomForest,KNeigbors,SVC -rbf,SVC - linear,TensorFlow
0,0.736842,0.740602,0.808271,0.815789,0.804511,0.808271


In [None]:
# output = pd.DataFrame({'PassengerId': dataset_test['PassengerId'], 'Survived': y_tf_pred.ravel()})
# output.to_csv('submission.csv', index=False)