In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

train = pd.read_csv("titanic_train.csv")
test = pd.read_csv("titanic_reserved.csv")

df = train.copy()

df['honorific'] = df['name'].str.extract(r',\s*([^\.]*)\.')[0].str.strip()
df['honorific'] = df['honorific'].str.replace('the ', '', regex=False)

mr = ['Rev', 'Col', 'Dr', 'Major', 'Don', 'Capt']
mrs = ['Dona', 'Countess']
miss = ['Mlle', 'Ms']

for t in mr:
    df['honorific'] = df['honorific'].replace(t, 'Mr')

for t in mrs:
    df['honorific'] = df['honorific'].replace(t, 'Mrs')

for t in miss:
    df['honorific'] = df['honorific'].replace(t, 'Miss')

group_means = df.groupby('honorific')['age'].transform('mean')
df['age'] = df['age'].fillna(group_means)

df_model = df.drop(columns=['name'])
X = pd.get_dummies(df_model.drop(columns=['survived']), drop_first=True)
y = df_model['survived']

test_proc = test.copy()
test_proc['honorific'] = test_proc['name'].str.extract(r',\s*([^\.]*)\.')[0].str.strip()
test_proc['honorific'] = test_proc['honorific'].str.replace('the ', '', regex=False)

for t in mr:
    test_proc['honorific'] = test_proc['honorific'].replace(t, 'Mr')
for t in mrs:
    test_proc['honorific'] = test_proc['honorific'].replace(t, 'Mrs')
for t in miss:
    test_proc['honorific'] = test_proc['honorific'].replace(t, 'Miss')

group_means_test = test_proc.groupby('honorific')['age'].transform('mean')
test_proc['age'] = test_proc['age'].fillna(group_means_test)

test_model = test_proc.drop(columns=['name'])
test_X = pd.get_dummies(test_model, drop_first=True)

test_X = test_X.reindex(columns=X.columns, fill_value=0)

model = LogisticRegression(max_iter=2000, random_state=105)
model.fit(X, y)

preds = model.predict(test_X)
list_preds = list(map(int, preds.tolist()))
print(list_preds)


[0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0]
