In [162]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier

RANDOM_STATE = 1234

In [163]:
train = pd.read_csv('D:/Titanic/titanic/train.csv')
test = pd.read_csv("D:/Titanic/titanic/test.csv")
test_passengerId = test['PassengerId']

In [164]:
train.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
test.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [165]:
train['Age'].fillna(value=train['Age'].mean(), inplace=True)

test['Age'].fillna(value=test['Age'].mean(), inplace=True)
test['Fare'].fillna(value=test['Fare'].mean(), inplace=True)

In [166]:
train.dropna()
test.dropna().head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,male,34.5,0,0,7.8292,Q
1,3,female,47.0,1,0,7.0,S
2,2,male,62.0,0,0,9.6875,Q
3,3,male,27.0,0,0,8.6625,S
4,3,female,22.0,1,1,12.2875,S


In [167]:
one_hot_columns = ['Embarked']

df_train = pd.get_dummies(data=train, prefix=one_hot_columns, columns=one_hot_columns, dtype=int)
df_train['Sex'] = (df_train['Sex'] == 'male').astype(int)

df_test = pd.get_dummies(data=test, prefix=one_hot_columns, columns=one_hot_columns, dtype=int)
df_test['Sex'] = (df_train['Sex'] == 'male').astype(int)

In [168]:
numerical_column = ['Age', 'Fare']
df_train[numerical_column] = StandardScaler().fit_transform(df_train[numerical_column])
df_test[numerical_column] = StandardScaler().fit_transform(df_test[numerical_column])

In [169]:
features = [x for x in df_train.columns if x not in 'Survived']
# X = df_train.iloc[:,[1,2,3,4,5,6,7,8,9]].astype(int)
X = df_train[features]
y = df_train['Survived'].to_numpy().reshape(-1, 1)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=RANDOM_STATE)

print('X train shape:', X_train.shape)
print('Y train shape:', y_train.shape)
print('X valid shape:', X_valid.shape)
print('Y valid shape:', y_valid.shape)

X train shape: (712, 9)
Y train shape: (712, 1)
X valid shape: (179, 9)
Y valid shape: (179, 1)


In [170]:
def create_model(learning_rate=0.001, kernel=0.001): 
    model = Sequential([
        Dense(units=30, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(kernel), input_dim=X_train.shape[1], name='L1'),
        Dense(units=15, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(kernel), name='L2'),
        Dense(units=1, activation='linear', kernel_regularizer=tf.keras.regularizers.l2(kernel), name='L3') 
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=['accuracy'])

    return model

In [171]:
model = KerasClassifier(model=create_model, 
                        learning_rate=None,
                        kernel=None,
                        epochs=None,
                        batch_size=None,
                        random_state=RANDOM_STATE)

param_grid = {
  "epochs": [20, 30, 50], 
  "batch_size": [50, 100, 150], 
  "kernel": [1e-4, 1e-3, 1e-2, 1e-1, 1],
  "learning_rate": [1e-5, 1e-4, 1e-3, 1e-2]
}

grid = RandomizedSearchCV(
  estimator=model,
  param_distributions=param_grid,
  scoring="accuracy",
  cv=5,
  return_train_score=True,
  n_iter=30,
  verbose=0,
  n_jobs=10
)

grid_result = grid.fit(X_train, y_train, verbose=0)

In [172]:
final_model = grid.best_estimator_
final_model.score(X_valid, y_valid)



0.8212290502793296

In [173]:
result = pd.DataFrame(grid.cv_results_)
result = result.sort_values("rank_test_score")
result.to_csv("result_randomized.csv")

In [174]:
# best_model = create_model(learning_rate=grid.best_params_['learning_rate'], kernel=grid.best_params_['kernel'])

In [175]:
# best_result = best_model.fit(X_train, y_train, epochs=grid.best_params_['epochs'], batch_size=grid.best_params_['batch_size'], verbose=0)

In [176]:
yhat = grid.predict(df_test)
yhat = tf.math.sigmoid(yhat)
yhat = (yhat.numpy() >= 0.5).astype(int)
yhat



InvalidArgumentError: Value for attr 'T' of int64 is not in the list of allowed values: bfloat16, half, float, double, complex64, complex128
	; NodeDef: {{node Sigmoid}}; Op<name=Sigmoid; signature=x:T -> y:T; attr=T:type,allowed=[DT_BFLOAT16, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128]> [Op:Sigmoid] name: 

In [None]:
yhat = np.reshape(yhat, (-1,))
yhat = pd.Series(data=yhat, name='Survived')

In [None]:
submission = pd.DataFrame(
  {
    'PassengerId': test_passengerId,
    'Survived': yhat
  }
)
submission.to_csv('submit_nn.csv', index=False)