In [100]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [101]:
data = pd.read_csv('../ML/datasets/titanic_reserved.csv')
data_train = pd.read_csv('../ML/datasets/titanic_train.csv')

In [102]:
data['fam_size'] = data['sibsp'] + data['parch']
data_train['fam_size'] = data_train['sibsp'] + data_train['parch']
data = data.drop(columns=['ticket', 'cabin', 'home.dest', 'sibsp', 'parch'])
data_train = data_train.drop(columns=['ticket', 'cabin', 'home.dest', 'sibsp', 'parch'])

In [103]:
data.embarked = data.embarked.fillna('S')

In [104]:
data

Unnamed: 0,pclass,name,sex,age,fare,embarked,fam_size
0,3,"Sage, Mr. Frederick",male,,69.5500,S,10
1,1,"Snyder, Mr. John Pillsbury",male,24.0000,82.2667,S,1
2,2,"Ashby, Mr. John",male,57.0000,13.0000,S,0
3,3,"Rosblom, Mr. Viktor Richard",male,18.0000,20.2125,S,2
4,2,"Doling, Miss. Elsie",female,18.0000,23.0000,S,1
...,...,...,...,...,...,...,...
323,1,"Brewe, Dr. Arthur Jackson",male,,39.6000,C,0
324,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45.0000,26.5500,S,0
325,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,82.1708,C,1
326,2,"Caldwell, Master. Alden Gates",male,0.8333,29.0000,S,2


In [105]:
list_honorific = []
for i in data['name']:
	fw = i.index(',')
	lw = i.index('.')
	list_honorific.append(i[fw+2:lw])
data['honorific'] = list_honorific
list_honorific = []
for i in data_train['name']:
	fw = i.index(',')
	lw = i.index('.')
	list_honorific.append(i[fw+2:lw])
data_train['honorific'] = list_honorific

In [106]:
data_train.honorific.value_counts()

Mr              572
Miss            194
Mrs             145
Master           46
Rev               6
Dr                6
Col               3
Mlle              2
Ms                2
Dona              1
the Countess      1
Major             1
Don               1
Capt              1
Name: honorific, dtype: int64

In [107]:
Mr = ['Rev', 'Col', 'Dr', 'Major', 'Don', 'Capt', 'Sir', 'Jonkheer']
Mrs = ['Dona', 'Countess', 'the Countess', 'Lady', 'Mme']
Miss = ['Mlle', 'Ms']

data.replace({'honorific': Mr}, 'Mr', inplace=True)
data.replace({'honorific': Mrs}, 'Mrs', inplace=True)
data.replace({'honorific': Miss}, 'Miss', inplace=True)

data_train.replace({'honorific': Mr}, 'Mr', inplace=True)
data_train.replace({'honorific': Mrs}, 'Mrs', inplace=True)
data_train.replace({'honorific': Miss}, 'Miss', inplace=True)

In [108]:
data[data.honorific == 'Mr'] = data[data.honorific == 'Mr'].fillna(data[data.honorific == 'Mr'].age.mean())
data[data.honorific == 'Miss'] = data[data.honorific == 'Miss'].fillna(data[data.honorific == 'Miss'].age.mean())
data[data.honorific == 'Mrs'] = data[data.honorific == 'Mrs'].fillna(data[data.honorific == 'Mrs'].age.mean())
data[data.honorific == 'Master'] = data[data.honorific == 'Master'].fillna(data[data.honorific == 'Master'].age.mean())

In [109]:
data_train[data_train.honorific == 'Mr'] = data_train[data_train.honorific == 'Mr'].fillna(data_train[data_train.honorific == 'Mr'].age.mean())
data_train[data_train.honorific == 'Miss'] = data_train[data_train.honorific == 'Miss'].fillna(data_train[data_train.honorific == 'Miss'].age.mean())
data_train[data_train.honorific == 'Mrs'] = data_train[data_train.honorific == 'Mrs'].fillna(data_train[data_train.honorific == 'Mrs'].age.mean())
data_train[data_train.honorific == 'Master'] = data_train[data_train.honorific == 'Master'].fillna(data_train[data_train.honorific == 'Master'].age.mean())

In [110]:
data = data.drop(columns='name')
data_train = data_train.drop(columns='name')

In [111]:
data

Unnamed: 0,pclass,sex,age,fare,embarked,fam_size,honorific
0,3,male,32.536913,69.5500,S,10,Mr
1,1,male,24.000000,82.2667,S,1,Mr
2,2,male,57.000000,13.0000,S,0,Mr
3,3,male,18.000000,20.2125,S,2,Mr
4,2,female,18.000000,23.0000,S,1,Miss
...,...,...,...,...,...,...,...
323,1,male,32.536913,39.6000,C,0,Mr
324,1,male,45.000000,26.5500,S,0,Mr
325,1,female,36.936170,82.1708,C,1,Mrs
326,2,male,0.833300,29.0000,S,2,Master


In [112]:
data = pd.get_dummies(data, drop_first=True)
data_train = pd.get_dummies(data_train, drop_first=True)

In [113]:
data

Unnamed: 0,pclass,age,fare,fam_size,sex_male,embarked_Q,embarked_S,honorific_Miss,honorific_Mr,honorific_Mrs
0,3,32.536913,69.5500,10,1,0,1,0,1,0
1,1,24.000000,82.2667,1,1,0,1,0,1,0
2,2,57.000000,13.0000,0,1,0,1,0,1,0
3,3,18.000000,20.2125,2,1,0,1,0,1,0
4,2,18.000000,23.0000,1,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...
323,1,32.536913,39.6000,0,1,0,0,0,1,0
324,1,45.000000,26.5500,0,1,0,1,0,1,0
325,1,36.936170,82.1708,1,0,0,0,0,0,1
326,2,0.833300,29.0000,2,1,0,1,0,0,0


In [114]:
data_train

Unnamed: 0,pclass,survived,age,fare,fam_size,sex_male,embarked_Q,embarked_S,honorific_Miss,honorific_Mr,honorific_Mrs
0,3,1,22.005765,7.7333,0,0,1,0,1,0,0
1,3,1,22.005765,7.7500,0,0,1,0,1,0,0
2,3,1,38.000000,7.2292,0,0,0,0,0,0,1
3,3,0,22.000000,7.8958,0,1,0,1,0,1,0
4,3,0,16.000000,9.5000,0,1,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
976,3,1,22.005765,8.0500,0,0,0,1,1,0,0
977,3,1,19.000000,8.0500,0,1,0,1,0,1,0
978,2,0,34.000000,13.0000,0,1,0,1,0,1,0
979,3,0,22.000000,8.0500,0,1,0,1,0,1,0


In [118]:
X = data_train.drop(columns='survived')
y = data_train['survived']

clr = LogisticRegression(random_state=1)
clr.fit(X, y)
pred = clr.predict(data)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [119]:
with open('../ML/outputs/file_log_reg_titanic.txt', 'w') as f:
    f.write(f'{list(pred)}')