## Notebook d'application du modèle pour la génération de l'output

In [65]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

In [66]:
train_df = pd.read_csv("./data/train.csv", sep=",")
test_df = pd.read_csv("./data/test.csv", sep=",")

In [67]:
def prepare_df(df):
    prepared_df = (
        df
        .copy(deep=True)
        .drop(columns=["Name", "Sex", "Ticket", "Cabin", "Embarked"])
        .dropna()
    )
    return prepared_df

In [68]:
prepared_train_df = train_df.pipe(prepare_df)
prepared_test_df = test_df.pipe(prepare_df)

X_train = prepared_train_df.drop(columns=["Survived", "PassengerId"])
y_train = prepared_train_df.loc[:, "Survived"]
X_test = prepared_test_df.drop(columns=["PassengerId"])

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [69]:
# y_pred

In [70]:
# Valeurs prédites par le modèle
y_pred_series = pd.Series(name="Survived", data=y_pred, index=prepared_test_df.PassengerId)
# y_pred_series

In [71]:
# Valeurs éliminées avant la prédiction
eliminated_mask = ~test_df.PassengerId.isin(y_pred_series.index)
eliminated_df = test_df[eliminated_mask]

eliminated_pred_series = pd.Series(name="Survived", data=[0]*eliminated_df.shape[0], index=eliminated_df.PassengerId)
# eliminated_pred_series

In [72]:
survival_pred_series = pd.concat([y_pred_series, eliminated_pred_series]).sort_index()
survival_pred_series.to_csv("./data/submission.csv", index=True)