In [59]:
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

In [60]:
train = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')
train.head()
train.count()

PassengerId    891
Survived       891
Pclass         891
Name           891
Sex            891
Age            714
SibSp          891
Parch          891
Ticket         891
Fare           891
Cabin          204
Embarked       889
dtype: int64

In [61]:
train = train.drop(columns=['PassengerId', 'Name', 'Cabin', 'Ticket'])
test_ids = test['PassengerId']
test = test.drop(columns=['PassengerId', 'Name', 'Cabin', 'Ticket'])
train.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [62]:
from sklearn.preprocessing import LabelEncoder

In [63]:
categorical_cols = ['Sex', 'Embarked']

label_encoder = LabelEncoder()

for col in categorical_cols:
    train[col] = label_encoder.fit_transform(train[col])
    test[col] = label_encoder.transform(test[col])

train.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2


In [64]:
train['Age'].fillna(train['Age'].mean(), inplace=True)

In [65]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(train.drop(columns=['Survived']), train['Survived'])

In [66]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [67]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1], )),
    Dense(1, activation='sigmoid')
])

model.summary()

In [68]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=50)

Epoch 1/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4895 - loss: 2.9109 - val_accuracy: 0.6682 - val_loss: 1.3024
Epoch 2/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6961 - loss: 1.0471 - val_accuracy: 0.6502 - val_loss: 0.8080
Epoch 3/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6916 - loss: 0.6790 - val_accuracy: 0.6637 - val_loss: 0.5889
Epoch 4/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6961 - loss: 0.5799 - val_accuracy: 0.6592 - val_loss: 0.5874
Epoch 5/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7051 - loss: 0.5737 - val_accuracy: 0.6637 - val_loss: 0.5751
Epoch 6/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7036 - loss: 0.5628 - val_accuracy: 0.6906 - val_loss: 0.5649
Epoch 7/50
[1m21/21[0m [32m━━━━━━━━━━

In [69]:
test['Age'].fillna(train['Age'].mean(), inplace=True)
test['Fare'].fillna(train['Fare'].median(), inplace=True)

In [70]:
Y_pred = model.predict(test)

submission = pd.DataFrame({
    'PassengerId': test_ids,
    'Survived': (Y_pred > 0.5).astype(int).flatten()
})

print(submission)

submission.to_csv('submission.csv', index=False)

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
     PassengerId  Survived
0            892         0
1            893         0
2            894         0
3            895         0
4            896         0
..           ...       ...
413         1305         0
414         1306         1
415         1307         0
416         1308         0
417         1309         0

[418 rows x 2 columns]
