#### Setup

In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

#### Load Data

In [13]:

test = pd.read_csv('test.csv', index_col = 'PassengerId')
train = pd.read_csv('train.csv', index_col = 'PassengerId')
data = [train, test]

#### Clean Data

In [14]:
# Null Values
for df in data:
    mean_age = df['Age'].mean()
    df['Age'] = df['Age'].fillna(mean_age)
    df['Age'] = df['Age'].astype(int)
    mode_embarked = df['Embarked'].mode()[0]
    df['Embarked'] = df['Embarked'].fillna(mode_embarked)
    df['Embarked'] = df['Embarked'].astype(str)

# Feature Engineering
for df in data:
    df['FamilySize'] = df['SibSp'] + df['Parch']
    df['DeltaFare'] = df['Fare'] - df['Fare'].mean()

for df in data:
    print (df.head())



             Survived  Pclass  \
PassengerId                     
1                   0       3   
2                   1       1   
3                   1       3   
4                   1       1   
5                   0       3   

                                                          Name     Sex  Age  \
PassengerId                                                                   
1                                      Braund, Mr. Owen Harris    male   22   
2            Cumings, Mrs. John Bradley (Florence Briggs Th...  female   38   
3                                       Heikkinen, Miss. Laina  female   26   
4                 Futrelle, Mrs. Jacques Heath (Lily May Peel)  female   35   
5                                     Allen, Mr. William Henry    male   35   

             SibSp  Parch            Ticket     Fare Cabin Embarked  \
PassengerId                                                           
1                1      0         A/5 21171   7.2500   NaN        S   
2

#### Data Preprocessing

In [17]:
cat_cols = ['Pclass', 'Sex', 'SibSp', 'Parch', 'Cabin', 'Embarked', 'FamilySize']
num_cols = ['Age', 'Fare', 'DeltaFare']
str_cols = ['Name', 'Ticket']

# Encoding
encoder = OneHotEncoder()
for df in data:
    for col in cat_cols:
        encoded_col = encoder.fit_transform(df[[col]])
        encoded_df = pd.DataFrame(encoded_col.toarray(), columns=encoder.get_feature_names_out([col]))
        df = df.join(encoded_df)
        df.drop(col, axis = 1, inplace=True)

# StandardScale
scaler = StandardScaler()
for df in data:
    for col in num_cols:
        scaled_col = scaler.fit_transform(df[[col]])
        df[col] = scaled_col

# Tokenize
tokenizer = Tokenizer()
for df in data:
    for col in str_cols:
        tokenizer.fit_on_texts(df[col])
        tokenized_col = tokenizer.texts_to_matrix(df[col])
        tokenized_df = pd.DataFrame(tokenized_col, columns=[f"{col}_{i}" for i in range(tokenized_col.shape[1])])
        df = df.join(tokenized_df)
        df.drop(col, axis = 1, inplace=True)

for df in data:
    print(df)


             Survived  Pclass  \
PassengerId                     
1                   0       3   
2                   1       1   
3                   1       3   
4                   1       1   
5                   0       3   
...               ...     ...   
887                 0       2   
888                 1       1   
889                 0       3   
890                 1       1   
891                 0       3   

                                                          Name     Sex  \
PassengerId                                                              
1                                      Braund, Mr. Owen Harris    male   
2            Cumings, Mrs. John Bradley (Florence Briggs Th...  female   
3                                       Heikkinen, Miss. Laina  female   
4                 Futrelle, Mrs. Jacques Heath (Lily May Peel)  female   
5                                     Allen, Mr. William Henry    male   
...                                                 

In [None]:
# Neural Network Processing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

model = Sequential([
    tf.keras.Layers.Dense(128, activation = 'relu', input_shape = (train.shape[1],)),
    tf.keras.Layers.Dense(128, activation = 'relu'),
    tf.keras.Layers.Dense(1, activation = 'sigmoid')
])

adam = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

model.fit(train, train['Survived'], epochs=10, batch_size=32)