In [35]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import random as rd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [15]:
dataset = pd.read_csv("./titanic.csv")

In [16]:
del dataset["PassengerId"]
del dataset["Name"]

In [17]:
feature = dataset.iloc[:, :-1]
label = dataset.iloc[:, -1]

In [18]:
feature["Age"].fillna(feature["Age"].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feature["Age"].fillna(feature["Age"].mean(), inplace=True)


In [19]:
feature["Cabin"].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feature["Cabin"].fillna(0, inplace=True)


In [20]:
label.fillna("S", inplace=True)

In [27]:
one_feature = pd.get_dummies(feature, dtype=int)

In [22]:
scaler = MinMaxScaler()

In [23]:
scaler.fit_transform(one_feature)

array([[0.        , 1.        , 0.27117366, ..., 0.        , 0.        ,
        0.        ],
       [1.        , 0.        , 0.4722292 , ..., 0.        , 0.        ,
        0.        ],
       [1.        , 1.        , 0.32143755, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 1.        , 0.36792055, ..., 0.        , 0.        ,
        0.        ],
       [1.        , 0.        , 0.32143755, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.39683338, ..., 0.        , 0.        ,
        0.        ]])

In [24]:
x_tr, x_ts, y_tr, y_ts = train_test_split(one_feature, label, test_size=0.2)

In [25]:
model1 = SVC()

In [26]:
model1.fit(x_tr, y_tr)

In [28]:
model2 = DecisionTreeClassifier()

In [29]:
model2.fit(x_tr, y_tr)

In [30]:
model3 = MLPClassifier(hidden_layer_sizes=(150,150,150))

In [31]:
model3.fit(x_tr, y_tr)

In [32]:
pred1 = model1.predict(x_ts)

In [33]:
pred2 = model2.predict(x_ts)

In [34]:
pred3 = model3.predict(x_ts)

In [36]:
print(accuracy_score(pred1, y_ts))
print(confusion_matrix(pred1, y_ts))

0.7262569832402235
[[  0   0   0]
 [  0   0   0]
 [ 38  11 130]]


In [37]:
print(accuracy_score(pred2, y_ts))
print(confusion_matrix(pred2, y_ts))

0.88268156424581
[[ 24   0   2]
 [  0   6   0]
 [ 14   5 128]]


In [38]:
print(accuracy_score(pred3, y_ts))
print(confusion_matrix(pred3, y_ts))

0.8156424581005587
[[ 15   2   2]
 [  0   7   4]
 [ 23   2 124]]
