In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk
from sklearn import preprocessing
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [21]:
df = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

In [22]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [23]:
df = df.drop(columns=['Name','SibSp','Parch','Ticket','Cabin','Embarked'])

In [24]:
df = df.set_index('PassengerId')

In [25]:
df = pd.get_dummies(df, columns = ['Pclass'], prefix='Class')

In [26]:
df['Sex'] = [1 if sex == 'male' else 0 for sex in df['Sex']]

In [27]:
df

Unnamed: 0_level_0,Survived,Sex,Age,Fare,Class_1,Class_2,Class_3
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,1,22.0,7.2500,0,0,1
2,1,0,38.0,71.2833,1,0,0
3,1,0,26.0,7.9250,0,0,1
4,1,0,35.0,53.1000,1,0,0
5,0,1,35.0,8.0500,0,0,1
...,...,...,...,...,...,...,...
887,0,1,27.0,13.0000,0,1,0
888,1,0,19.0,30.0000,1,0,0
889,0,0,,23.4500,0,0,1
890,1,1,26.0,30.0000,1,0,0


In [35]:
df = df.dropna()

In [48]:
y = df['Survived']
X = df.drop(columns=['Survived'])

In [56]:
mm_scaler = preprocessing.MinMaxScaler()
X_minmax = mm_scaler.fit_transform(X)
X_minmax = pd.DataFrame(data=X_minmax, columns = X.columns)

In [58]:
X_minmax

Unnamed: 0,Sex,Age,Fare,Class_1,Class_2,Class_3
0,1.0,0.271174,0.014151,0.0,0.0,1.0
1,0.0,0.472229,0.139136,1.0,0.0,0.0
2,0.0,0.321438,0.015469,0.0,0.0,1.0
3,0.0,0.434531,0.103644,1.0,0.0,0.0
4,1.0,0.434531,0.015713,0.0,0.0,1.0
...,...,...,...,...,...,...
709,0.0,0.484795,0.056848,0.0,0.0,1.0
710,1.0,0.334004,0.025374,0.0,1.0,0.0
711,0.0,0.233476,0.058556,1.0,0.0,0.0
712,1.0,0.321438,0.058556,1.0,0.0,0.0


In [99]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1337)

In [100]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [101]:
lr = LinearRegression()
knn = KNeighborsClassifier()
gauss = GaussianNB()
svc = SVC()

In [102]:
lr.fit(X_train, y_train)
knn.fit(X_train, y_train)
gauss.fit(X_train, y_train)
svc.fit(X_train, y_train)

SVC()

In [104]:
lr_pred = lr.predict(X_test)
knn_pred = knn.predict(X_test)
gauss_pred = gauss.predict(X_test)
svc_pred = svc.predict(X_test)

In [105]:
lr_pred = [1 if i > 0.5 else 0 for i in lr_pred]

In [106]:
pd.DataFrame({'lr':lr_pred, 'knn':knn_pred, 'gauss':gauss_pred, 'svm':svc_pred, 'real':y_test}).head(50)

Unnamed: 0_level_0,lr,knn,gauss,svm,real
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
34,0,0,0,0,0
272,0,0,0,0,1
135,0,0,0,0,0
15,1,1,0,0,0
589,0,0,0,0,0
252,1,0,0,0,0
136,0,0,0,0,0
694,0,0,0,0,0
273,1,0,1,0,1
718,1,0,1,0,1


In [107]:
lr_acc = accuracy_score(lr_pred, y_test)
knn_acc = accuracy_score(knn_pred, y_test)
gauss_acc = accuracy_score(gauss_pred, y_test)
svc_acc = accuracy_score(svc_pred, y_test)

In [108]:
print('lr_acc: ', lr_acc)
print('knn_acc: ', knn_acc)
print('gauss_acc: ', gauss_acc)
print('svc_acc: ', svc_acc)

lr_acc:  0.7838983050847458
knn_acc:  0.6694915254237288
gauss_acc:  0.8305084745762712
svc_acc:  0.6822033898305084
