In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

In [None]:
train = pd.read_csv("../input/train.csv")
test = pd.read_csv("../input/test.csv")

In [None]:
train.head(30)

In [None]:
train["Cabin_Deck"] = train["Cabin"].str.slice(0, 1)
train["Cabin_Room"] = train["Cabin"].str.slice(1, 5).str.extract("([0-9]+)", expand=False).astype("float")
test["Cabin_Deck"] = test["Cabin"].str.slice(0, 1)
test["Cabin_Room"] = test["Cabin"].str.slice(1, 5).str.extract("([0-9]+)", expand=False).astype("float")

In [None]:
train["Cabin_Deck"] = train["Cabin_Deck"].fillna("N")
train["Cabin_Room"] = train["Cabin_Room"].fillna(train["Cabin_Room"].mean())
test["Cabin_Deck"] = test["Cabin_Deck"].fillna("N")
test["Cabin_Room"] = test["Cabin_Room"].fillna(train["Cabin_Room"].mean())

In [None]:
for df in train, test :
    df["Age_was_missing"] = df["Age"].isnull()
    df["Age_was_missing"] = df["Age_was_missing"].apply(lambda v:1 if v==True else 0)
    df["Age"].fillna(df["Age"].median(), inplace = True)
    df["Embarked"].fillna(df["Embarked"].mode()[0], inplace = True)
    df["Fare"].fillna(df["Fare"].median(), inplace = True)
    df["FamilySize"] = df.SibSp + df.Parch + 1
    df["Solo"] = df["FamilySize"].apply(lambda v:1 if v == True else 0)
    df["Title"] = df["Name"].str.split(", ", expand=True)[1].str.split(".", expand=True)[0]

In [None]:
from sklearn import preprocessing
label = preprocessing.LabelEncoder()
for df in train, test :
    label.fit(df["Sex"])
    df["Sex"] = label.transform(df["Sex"])
    label.fit(df["Embarked"])
    df["Embarked"] = label.transform(df["Embarked"])
    label.fit(df["Cabin_Deck"]) 
    df["Cabin_Deck"] = label.transform(df["Cabin_Deck"])
    label.fit(df["Title"])
    df["Title"] = label.transform(df["Title"])    

In [None]:
TargetLabel = train['Survived']
train.drop("Survived", axis=1, inplace = True)
PassengerId = test["PassengerId"]
drop_column = ["PassengerId","Cabin", "Ticket", "Name"]
train.drop(drop_column, axis=1, inplace = True)
test.drop(drop_column, axis=1, inplace = True)

In [None]:
ntrain = len(train)
ntest = len(test)
kf = KFold(n_splits=5,random_state=2017)
def get_oof(classifier,train_x,train_y,test_x):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.zeros((5,ntest))
    
    for i,(train_idx,test_idx) in enumerate(kf.split(train_x)):
        kf_train_x = train_x.iloc[train_idx]
        kf_train_y = train_y[train_idx]
        kf_test_x = train_x.iloc[test_idx]
        
        classifier.fit(kf_train_x,kf_train_y)
        oof_train[test_idx] = classifier.predict(kf_test_x)
        oof_test_skf[i,:] = classifier.predict(test_x)
    
    oof_test = oof_test_skf.mean(axis=0)
    
    return oof_train.reshape(-1,1),oof_test.reshape(-1,1)

In [None]:
def tuneParams(classifier,params,train_x,train_y):
    md = GridSearchCV(classifier,params,cv=10,scoring='accuracy',n_jobs=-1)
    md.fit(train_x,train_y)
    best_params,best_score = md.best_params_,np.round(md.best_score_*100,2)
    
    return best_params,best_score

In [None]:
xgbParams = {'max_depth':[3,5,8,10],'learning_rate':[0.01,0.1,0.5],'n_estimator':[100,300,500],'gamma':[0.01,0.05,0.1,0.5]}
ABParams = {'n_estimators':[100,300,500],'learning_rate':[0.01,0.5,0.1,0.5]}
RFParams = {'n_estimators':[10,30,50],'max_depth':[3,5,8,10],'min_samples_split':[2,5,10],'min_samples_leaf':[2,4,10],'random_state':[5]}
ETParams = {'n_estimators':[100,300,500],'max_depth':[3,8,12],'min_samples_leaf':[2,4,9],'verbose':[0]}
KNNParams = {'n_neighbors':[3,8,14],'leaf_size':[2,5,9],'weights':['uniform']}
SVCParams = {'C':[0.01,0.1,0.5],'gamma':[0.01,0.2]}

In [None]:
xgb_best_Params,xgb_best_score = tuneParams(XGBClassifier(),xgbParams,train,TargetLabel)
print("XGB:",xgb_best_Params,xgb_best_score)
AB_best_Params,AB_best_score = tuneParams(AdaBoostClassifier(),ABParams,train,TargetLabel)
print("AdaBoost:",AB_best_Params,AB_best_score)
RF_best_Params,RF_best_score = tuneParams(RandomForestClassifier(),RFParams,train,TargetLabel)
print("RandomForest:",RF_best_Params,RF_best_score)
ET_best_Params,ET_best_score = tuneParams(ExtraTreesClassifier(),ETParams,train,TargetLabel)
print("ExtraTrees:",ET_best_Params,ET_best_score)
KNN_best_Params,KNN_best_score = tuneParams(KNeighborsClassifier(),KNNParams,train,TargetLabel)
print("KNeighbors:",KNN_best_Params,KNN_best_score)
SVC_best_Params,SVC_best_score = tuneParams(SVC(),SVCParams,train,TargetLabel)
print("SVC:",SVC_best_Params,SVC_best_score)

In [None]:
xgb_oof_train,xgb_oof_test = get_oof(XGBClassifier(**xgb_best_Params),train,TargetLabel,test)
AB_oof_train,AB_oof_test = get_oof(AdaBoostClassifier(**AB_best_Params),train,TargetLabel,test)
RF_oof_train,RF_oof_test = get_oof(RandomForestClassifier(**RF_best_Params),train,TargetLabel,test)
ET_oof_train,ET_oof_test = get_oof(ExtraTreesClassifier(**ET_best_Params),train,TargetLabel,test)
KNN_oof_train,KNN_oof_test =  get_oof(KNeighborsClassifier(**KNN_best_Params),train,TargetLabel,test)
SVC_oof_train,SVC_oof_test = get_oof(SVC(**SVC_best_Params),train,TargetLabel,test)

In [None]:
final_train = np.concatenate((xgb_oof_train,RF_oof_train,ET_oof_train,KNN_oof_train,SVC_oof_train),axis=1)
final_test = np.concatenate((xgb_oof_test,RF_oof_test,ET_oof_test,KNN_oof_test,SVC_oof_test),axis=1)

In [None]:
LRParams = {'penalty':['l1','l2'],'C':[0.01,0.05,0.1,0.2]}
LR_best_Params,LR_best_score = tuneParams(LogisticRegression(),LRParams,final_train,TargetLabel)
print("LR:",LR_best_Params,LR_best_score)

In [None]:
LR_model = LogisticRegression(**LR_best_Params)
LR_model.fit(final_train,TargetLabel)
prediction = LR_model.predict(final_test)

In [None]:
output = pd.DataFrame({'PassengerId': PassengerId,
                            'Survived':prediction})
output.to_csv('submission.csv',index=False)