In [None]:
import pandas as pd

df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
display(df.head())
display(test_df.head())
display(df.info())
display(test_df.info())

In [None]:
df['Survived'].value_counts().plot.pie(autopct = '%1.2f%%')

In [None]:
df[['Sex','Survived']].groupby(['Sex']).mean().plot.bar()

In [None]:
df[["Pclass", "Survived"]].groupby("Pclass").mean().plot.bar()

In [None]:
df[["SibSp", "Survived"]].groupby("SibSp").mean().plot.bar()

In [None]:
df[["Parch", "Survived"]].groupby("Parch").mean().plot.bar()

In [None]:
import seaborn as sns

age_data = df[df['Age'].notna()][['Age', 'Survived']]
display(sns.histplot(age_data['Age']))

ageFacet=sns.FacetGrid(df,hue='Survived',aspect=3);
ageFacet.map(sns.kdeplot,'Age',fill=True);
ageFacet.set(xlim=(0,df['Age'].max()));
ageFacet.add_legend();

In [None]:
fareFacet=sns.FacetGrid(df,hue='Survived',aspect=3)
fareFacet.map(sns.kdeplot,'Fare',fill=True)
fareFacet.set(xlim=(0,150))
fareFacet.add_legend()

In [None]:
df["Embarked"].mode()[0]

In [None]:
gender_map = {"male": 0, "female": 1}
df["Sex"] = df["Sex"].map(gender_map)
test_df["Sex"] = test_df["Sex"].map(gender_map)

df["Age"] = df["Age"].fillna(df["Age"].mean())
test_df["Age"] = test_df["Age"].fillna(df["Age"].mean())

embarked_map = {"S": 0, "C": 1, "Q": 2}
df["Embarked"] = df["Embarked"].map(embarked_map)
test_df["Embarked"] = test_df["Embarked"].map(embarked_map)

display(df.isna().sum())
display(test_df.isna().sum())
display(df.describe())
display(test_df.describe())
display(df.info())
display(test_df.info())

In [None]:
max_Pclass = max(df["Pclass"].max(), test_df["Pclass"].max())
df["Pclass"] = df["Pclass"] / max_Pclass
test_df["Pclass"] = test_df["Pclass"] / max_Pclass

max_Age = max(df["Age"].max(), test_df["Age"].max())
df["Age"] = df["Age"] / max_Age
test_df["Age"] = test_df["Age"] / max_Age

max_SibSp = max(df["SibSp"].max(), test_df["SibSp"].max())
df["SibSp"] = df["SibSp"] / max_SibSp
test_df["SibSp"] = test_df["SibSp"] / max_SibSp

max_Parch = max(df["Parch"].max(), test_df["Parch"].max())
df["Parch"] = df["Parch"] / max_Parch
test_df["Parch"] = test_df["Parch"] / max_Parch

mean_Fare = df["Fare"].mean()
test_df["Fare"] = test_df["Fare"].fillna(mean_Fare)

max_Fare = max(df["Fare"].max(), test_df["Fare"].max())
df["Fare"] = df["Fare"] / max_Fare
test_df["Fare"] = test_df["Fare"] / max_Fare

mod_Embarked = df["Embarked"].mode()[0]
df["Embarked"] = df["Embarked"].fillna(mod_Embarked)
test_df["Embarked"] = test_df["Embarked"].fillna(mod_Embarked)

max_Embarked = max(df["Embarked"].max(), test_df["Embarked"].max())
df["Embarked"] = df["Embarked"] / max_Embarked
test_df["Embarked"] = test_df["Embarked"] / max_Embarked

In [None]:
features = ["Pclass", "Sex", "Age",	"SibSp", "Parch", "Fare", "Embarked"]

train_features = ["Survived"] + features
test_features = ["PassengerId"] + features

df = df[train_features]
test_df = test_df[test_features]

display(df.info())
display(test_df.info())
display(df.describe())
display(test_df.describe())

In [None]:
import tensorflow as tf

tensor_data = tf.convert_to_tensor(df)
X = tensor_data[:,1:]
y = tensor_data[:,0]

X.shape

In [None]:
from keras import Sequential
from keras.layers import InputLayer, Dense, Dropout

model = Sequential([
    InputLayer((X.shape[1],)),
    Dense(20, activation="relu"),
    Dense(20, activation="relu"),
    Dense(5, activation="relu"),
    Dense(1, activation="sigmoid")
])

model.summary()

In [None]:
from keras.callbacks import EarlyStopping

callback = EarlyStopping(monitor="loss", patience=3)

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
history = model.fit(X, y, epochs=100, batch_size=16, validation_data=(X, y), callbacks=[callback])

In [None]:
from matplotlib import pyplot as plt 
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.ylabel("acc")
plt.xlabel("epoch")
plt.legend(["train", "test"])
plt.show()

In [None]:
from matplotlib import pyplot as plt 
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"])
plt.show()

In [None]:
test_tensor = tf.convert_to_tensor(test_df)
X_test = test_tensor[:, 1:]

In [None]:
y_test = model.predict(X_test)
y_test = [1 if x > 0.5 else 0 for x in y_test]
y_test

In [None]:
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived':  y_test
})

submission.describe()

In [None]:
submission.to_csv("result.csv", index=False)