#### Setup

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

#### Load Data

In [2]:

test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')
data = [train, test]

#### Clean Data

In [3]:
# Null Values
for df in data:
    mean_age = df['Age'].mean()
    df['Age'] = df['Age'].fillna(mean_age)
    df['Age'] = df['Age'].astype(int)
    mode_embarked = df['Embarked'].mode()[0]
    df['Embarked'] = df['Embarked'].fillna(mode_embarked)
    df['Embarked'] = df['Embarked'].astype(str)

#drop string columns
for df in data:
    df.drop(columns = ['Name', 'Ticket', 'Cabin'], inplace = True)

# Feature Engineering
for df in data:
    df['FamilySize'] = df['SibSp'] + df['Parch']
    df['DeltaFare'] = df['Fare'] - df['Fare'].mean()

for df in data:
    print (df.head())



   PassengerId  Survived  Pclass     Sex  Age  SibSp  Parch     Fare Embarked  \
0            1         0       3    male   22      1      0   7.2500        S   
1            2         1       1  female   38      1      0  71.2833        C   
2            3         1       3  female   26      0      0   7.9250        S   
3            4         1       1  female   35      1      0  53.1000        S   
4            5         0       3    male   35      0      0   8.0500        S   

   FamilySize  DeltaFare  
0           1 -24.954208  
1           1  39.079092  
2           0 -24.279208  
3           1  20.895792  
4           0 -24.154208  
   PassengerId  Pclass     Sex  Age  SibSp  Parch     Fare Embarked  \
0          892       3    male   34      0      0   7.8292        Q   
1          893       3  female   47      1      0   7.0000        S   
2          894       2    male   62      0      0   9.6875        Q   
3          895       3    male   27      0      0   8.6625        S

#### Data Preprocessing

In [4]:
cat_cols = ['Pclass', 'Sex', 'SibSp', 'Parch', 'Embarked', 'FamilySize']
num_cols = ['Age', 'Fare', 'DeltaFare']

#encoding
encoder = OneHotEncoder()
for i in range(len(data)):
    for col in cat_cols:
        encoded_col = encoder.fit_transform(data[i][[col]])
        encoded_df = pd.DataFrame(encoded_col.toarray(), columns=encoder.get_feature_names_out([col]))
        data[i] = data[i].join(encoded_df)

# StandardScale
scaler = StandardScaler()
for i in range(len(data)):
    for col in num_cols:
        scaled_col = scaler.fit_transform(data[i][[col]])
        data[i][col] = scaled_col

train.drop(columns = cat_cols, inplace = True)
test.drop(columns = cat_cols, inplace = True)
train.drop(columns = num_cols, inplace = True)
test.drop(columns = num_cols, inplace = True)
           


In [5]:
for df in data:
    print(df.columns)

Index(['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch',
       'Fare', 'Embarked', 'FamilySize', 'DeltaFare', 'Pclass_1', 'Pclass_2',
       'Pclass_3', 'Sex_female', 'Sex_male', 'SibSp_0', 'SibSp_1', 'SibSp_2',
       'SibSp_3', 'SibSp_4', 'SibSp_5', 'SibSp_8', 'Parch_0', 'Parch_1',
       'Parch_2', 'Parch_3', 'Parch_4', 'Parch_5', 'Parch_6', 'Embarked_C',
       'Embarked_Q', 'Embarked_S', 'FamilySize_0', 'FamilySize_1',
       'FamilySize_2', 'FamilySize_3', 'FamilySize_4', 'FamilySize_5',
       'FamilySize_6', 'FamilySize_7', 'FamilySize_10'],
      dtype='object')
Index(['PassengerId', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked', 'FamilySize', 'DeltaFare', 'Pclass_1', 'Pclass_2',
       'Pclass_3', 'Sex_female', 'Sex_male', 'SibSp_0', 'SibSp_1', 'SibSp_2',
       'SibSp_3', 'SibSp_4', 'SibSp_5', 'SibSp_8', 'Parch_0', 'Parch_1',
       'Parch_2', 'Parch_3', 'Parch_4', 'Parch_5', 'Parch_6', 'Parch_9',
       'Embarked_C', 'Embarked_Q', '

In [9]:
# Neural Network Processing
# Neural Network Processing
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

model = Sequential([
    Dense(128, activation = 'relu', input_shape = (train.shape[1],)),
    Dense(128, activation = 'relu'),
    Dense(1, activation = 'sigmoid')
])

adam = Adam(learning_rate=0.001)
model.compile(optimizer = adam, loss = 'binary_crossentropy', metrics = ['accuracy'])

model.fit(train, train['Survived'], epochs=400, batch_size=32)

Epoch 1/400


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 630us/step - accuracy: 0.5244 - loss: 2.5702 
Epoch 2/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 667us/step - accuracy: 0.5271 - loss: 2.1811
Epoch 3/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704us/step - accuracy: 0.5936 - loss: 0.8804
Epoch 4/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 704us/step - accuracy: 0.6817 - loss: 0.7268
Epoch 5/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 593us/step - accuracy: 0.7081 - loss: 0.6737
Epoch 6/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 630us/step - accuracy: 0.6769 - loss: 0.8154
Epoch 7/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 630us/step - accuracy: 0.7606 - loss: 0.5915
Epoch 8/400
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 593us/step - accuracy: 0.8178 - loss: 0.5582
Epoch 9/400
[1m28/28[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x242e4001580>

In [10]:
predictions = model.predict(train.drop(columns=['Survived']))

# Convert predictions to DataFrame
predictions_df = pd.DataFrame({
    'PassengerId': train['PassengerId'],
    'Survived': predictions.flatten()
})

# Save predictions to CSV
predictions_df.to_csv('predictions.csv', index=False)


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_3" is incompatible with the layer: expected axis -1 of input shape to have value 2, but received input with shape (32, 1)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 1), dtype=int64)
  • training=False
  • mask=None