# 타이타닉 생존율 예측
- feature 값 : Pclass, Sex
- label 값 : Survived
1.   KNN 
2.   SVM



In [2]:
import tensorflow as tf
import keras
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [3]:
data = pd.read_csv("./trainClean.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 712 entries, 0 to 711
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   712 non-null    int64  
 1   PassengerId  712 non-null    int64  
 2   Pclass       712 non-null    int64  
 3   Sex          712 non-null    int64  
 4   Age          712 non-null    float64
 5   SibSp        712 non-null    int64  
 6   Parch        712 non-null    int64  
 7   Fare         712 non-null    float64
 8   Survived     712 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 50.2 KB


#### Data 분할하기, 8:2 비율

In [4]:
train, test = train_test_split(data,test_size=0.2)
print(train.shape, test.shape)

(569, 9) (143, 9)


#### train data 전처리

In [6]:
X_train = train[['Pclass','Sex']].values
Y_train = train['Survived'].values
print(X_train.shape, Y_train.shape)

(569, 2) (569,)


In [7]:
X_test = test[['Pclass','Sex']].values
Y_test = test['Survived'].values
print(X_test.shape, Y_test.shape)

(143, 2) (143,)


#### KNN 모델 학습

In [8]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, Y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

#### SVM 모델 학습

In [9]:
clf = svm.SVC(C = 1, gamma = 0.1)
clf.fit(X_train, Y_train)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

#### 학습된 모델로 결과 예측

In [11]:
k_predict = knn.predict(X_test)
k_predict

array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

In [12]:
s_predict = clf.predict(X_test)
s_predict

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])

#### 정확도 확인

In [13]:
acc_score = metrics.accuracy_score(Y_test,k_predict)
print(f"ACC : {acc_score}")

ACC : 0.8041958041958042


In [14]:
acc_score = metrics.accuracy_score(Y_test,s_predict)
print(f"ACC : {acc_score}")

ACC : 0.7552447552447552


In [15]:
comparison = pd.DataFrame({'prediction_knn':k_predict, 'prediction_svm':s_predict, 'ground_truth':Y_test})
comparison

Unnamed: 0,prediction_knn,prediction_svm,ground_truth
0,0,0,0
1,0,0,0
2,0,0,0
3,1,0,1
4,1,0,0
...,...,...,...
138,0,0,0
139,0,0,0
140,0,0,0
141,0,0,0
