In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

In [2]:
train = pd.read_csv("../input/spaceship-titanic/train.csv")
train.sample(6)

In [3]:
x_train = train.drop(columns="Transported")
y_train = train["Transported"]

In [4]:
class SpaceshipData():
    NUMERICAL = ["Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]
    CATEGORICAL = ["HomePlanet", "CryoSleep", "Destination", "VIP"]
    OTHER = ["PassengerId", "Cabin", "Name"]
    
    def __init__(self):
        self.normalize = tf.keras.layers.Normalization()
        self.tokens_0 = set()
        self.tokens_2 = set()
    
    def preprocess(self, x, training):
        x = x.fillna({col: x[col].mean() for col in SpaceshipData.NUMERICAL})
        x = x.fillna({col: x[col].mode() for col in SpaceshipData.CATEGORICAL})

        x = pd.get_dummies(x, columns=SpaceshipData.CATEGORICAL)

        x = x.fillna({"Cabin": "//"})
        if training:
            for cabin in x["Cabin"]:
                tokens = cabin.split("/")
                self.tokens_0.add(tokens[0])
                self.tokens_2.add(tokens[2])
            self.tokens_0.remove("")
            self.tokens_2.remove("")
            
        for token_0 in self.tokens_0:
            x["Token0" + token_0] = x["Cabin"].map(lambda cabin: int(token_0 in cabin))
        for token_2 in self.tokens_2:
            x["Token2" + token_2] = x["Cabin"].map(lambda cabin: int(token_2 in cabin))
            
        x_ids = x["PassengerId"]
        x = x.drop(columns=SpaceshipData.OTHER)
        
        if training:
            self.normalize.adapt(x[SpaceshipData.NUMERICAL])
                        
        x_numerical = self.normalize(x[SpaceshipData.NUMERICAL])
        x_categorical = tf.convert_to_tensor(x.drop(columns=SpaceshipData.NUMERICAL),
                                             dtype=tf.float32)
        return tf.concat([x_numerical, x_categorical], axis=1), x_ids

In [5]:
spaceship = SpaceshipData()
x_train, train_ids = spaceship.preprocess(x_train, training=True)

In [6]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)
])
criterion = tf.keras.losses.BinaryCrossentropy(from_logits=True)
model.compile(
    optimizer="adam",
    loss=criterion,
    metrics=[tf.keras.metrics.BinaryAccuracy()]
)

In [7]:
model.fit(x=x_train, y=y_train, epochs=25)

In [8]:
x_test = pd.read_csv("../input/spaceship-titanic/test.csv")
x_test, test_ids = spaceship.preprocess(x_test, training=False)

In [9]:
y_pred = tf.math.sigmoid(model(x_test))
y_pred = tf.cast(tf.math.round(tf.squeeze(y_pred)), dtype=tf.bool)

submission = pd.DataFrame({
    "PassengerId": test_ids,
    "Transported": y_pred
})
submission.to_csv("submission.csv", index=False)