In [51]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [56]:
df = pd.read_csv('./data/full_train_test.csv')
df_train = df[(df['train'] == 1)]
df_test = df[(df['test'] == 1)]

#### SVC

In [68]:
# SVC Training
feature_names = [
    'pclass', 'age_scaled', 'true_fare_scaled', 
#     'family_size', 'group_size',
    'family_size_1', 'family_size_2', 'family_size_3',
    'group_size_1', 'group_size_2', 'group_size_3',
#     'sex', 
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]

features = df_train[feature_names]
label = df_train[['survived']]

model = SVC(
    C=1,
    gamma=0.1
)
model.fit(features,label)
model.score(features,label)

0.835016835016835

In [70]:
# SVC Predicting

df_test['title_5'] = 0
test_features = df_test[feature_names]

df_sub = pd.DataFrame()
df_sub['PassengerId'] = df_test['passengerid']
df_sub['Survived'] = model.predict(test_features)
sub_path = './data/submission.csv'
df_sub.to_csv(sub_path, index=False)
df_sub

Unnamed: 0,PassengerId,Survived
891,892,0.0
892,893,1.0
893,894,0.0
894,895,0.0
895,896,1.0
896,897,0.0
897,898,1.0
898,899,0.0
899,900,1.0
900,901,0.0


#### MLP 1

In [31]:
# MLP Training

feature_names = [
    'pclass', 'age_scaled', 'true_fare_scaled', 'family_size', 'group_size', 'family_size_1', 'family_size_2', 
    'family_size_3', 'group_size_1', 'group_size_2', 'group_size_3', 'sex', 'sex_0', 'sex_1', 
    'title_1', 'title_2', 'title_3', 'title_4', 'title_5', 'title_6', 'title_7'
]

features = df_train[feature_names]
label = df_train[['survived']]

model = MLPClassifier(
    alpha=1e-05, 
    hidden_layer_sizes=4,
    max_iter=1500, 
    solver='lbfgs'
)
model.fit(features,label)
model.score(features,label)

0.8484848484848485

In [34]:
# MLP Predicting

df_test['title_5'] = 0
test_features = df_test[feature_names]

df_sub = pd.DataFrame()
df_sub['PassengerId'] = df_test['passengerid']
df_sub['Survived'] = model.predict(test_features)
sub_path = './data/submission.csv'
df_sub.to_csv(sub_path, index=False)

#### MLP 2

In [65]:
# MLP Training

feature_names = [
    'pclass', 'age_scaled', 
    'true_fare_scaled', 
    'family_scaled', 'group_scaled',
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]

features = df_train[feature_names]
label = df_train[['survived']]

model = MLPClassifier(
    alpha=0.001, 
    hidden_layer_sizes=7,
    max_iter=1500, 
    solver='adam'
)
model.fit(features,label)
model.score(features,label)

0.8327721661054994

In [66]:
# MLP Predicting

# df_test['title_5'] = 0
test_features = df_test[feature_names]

df_sub = pd.DataFrame()
df_sub['PassengerId'] = df_test['passengerid']
df_sub['Survived'] = model.predict(test_features)
sub_path = './data/submission.csv'
df_sub.to_csv(sub_path, index=False)

In [67]:
df_sub

Unnamed: 0,PassengerId,Survived
891,892,0.0
892,893,1.0
893,894,0.0
894,895,0.0
895,896,1.0
896,897,0.0
897,898,1.0
898,899,0.0
899,900,1.0
900,901,0.0


#### Linear SVM

In [48]:
# Linear SVM Training

feature_names = [
    'pclass', 'age_scaled',
#     'family_size', 'group_size',
    'family_size_1', 'family_size_2', 'family_size_3',
    'group_size_1', 'group_size_2', 'group_size_3',
#     'sex', 
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]

features = df_train[feature_names]
label = df_train[['survived']]

model = LinearSVC(
    C=1
)
model.fit(features,label)
model.score(features,label)

0.8282828282828283

In [49]:
# Linear SVM Predicting

test_features = df_test[feature_names]

df_sub = pd.DataFrame()
df_sub['PassengerId'] = df_test['passengerid']
df_sub['Survived'] = model.predict(test_features)
sub_path = './data/submission.csv'
df_sub.to_csv(sub_path, index=False)

#### Logistic Regression

In [59]:
feature_names = [
    'pclass', 'age_scaled', 
    'true_fare_scaled', 
    'family_scaled', 'group_scaled',
    'sex', 
    'title_1', 'title_2', 'title_3', 'title_4', 
]
features = df_train[feature_names]
label = df_train[['survived']]

model = LogisticRegression(
    C=10,
    multi_class='ovr',
    solver='newton-cg'
)
model.fit(features,label)
model.score(features,label)

0.8305274971941639

In [60]:
test_features = df_test[feature_names]

df_sub = pd.DataFrame()
df_sub['PassengerId'] = df_test['passengerid']
df_sub['Survived'] = model.predict(test_features)
sub_path = './data/submission.csv'
df_sub.to_csv(sub_path, index=False)

In [64]:
df_sub

Unnamed: 0,PassengerId,Survived
891,892,0.0
892,893,1.0
893,894,0.0
894,895,0.0
895,896,1.0
896,897,0.0
897,898,1.0
898,899,0.0
899,900,1.0
900,901,0.0
