In [None]:
import re
import numpy as np
import pandas as pd
from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

from sklearn.linear_model import LinearRegression, SGDRegressor, LogisticRegression, SGDClassifier
from sklearn.svm import SVR, SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import (BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier,
                              VotingClassifier, AdaBoostClassifier)
from mlxtend.classifier import StackingClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from statistics import mode 
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

In [None]:
train = pd.read_csv("../input/train.csv",)
test = pd.read_csv("../input/test.csv")
sub_sample = pd.read_csv("../input/gender_submission.csv")

In [None]:
full_data = pd.concat([train.drop("Survived", axis=1), test], ignore_index=True)

In [None]:
del full_data["PassengerId"]
del full_data["Ticket"]
del full_data["Cabin"]

def find_title(name):
    title = re.search(' ([A-Za-z]+)\.', name)
    if title:
        return title.group(1)
    return ""

titles = []
for name in full_data["Name"]:
    titles.append(find_title(name))

full_data["Title"] = titles
del full_data["Name"]

full_data = full_data.replace(["Lady", "Countess","Capt", "Col", "Don", "Dr", "Major", "Rev", "Sir", "Jonkheer", "Dona"], "Others")
full_data = full_data.replace(["Mlle","Mme"], "Mrs")
full_data = full_data.replace(["Ms"],"Miss")
title_map = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Others": 5}
full_data["Title"] = full_data["Title"].map(title_map)

sex_map = {"male":1, "female":0}
full_data["Sex"] = full_data["Sex"].map(sex_map)

full_data["Age"] = full_data["Age"].fillna(full_data["Age"].mean())
full_data["Age"] = full_data["Age"].astype(int)

full_data["Fare"] = full_data["Fare"].fillna(full_data["Fare"].mean())
scaler = MinMaxScaler()
scaled_fare = scaler.fit_transform(np.array(full_data["Fare"]).reshape(len(full_data["Fare"]),-1))
full_data["Fare"] = scaled_fare

full_data["Embarked"] = full_data["Embarked"].fillna("S")
embark_map = {"S" : 0, "C" : 1, "Q": 2}
full_data["Embarked"] = full_data["Embarked"].map(embark_map)

In [None]:
full_data.head()

In [None]:
X = full_data.iloc[:train.shape[0], :]
y = train["Survived"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Single Weak Classifiers

In [None]:
lr = LogisticRegression()
lr_pred = lr.fit(X_train, y_train).predict(X_test)
print("Logistic Regression: ", accuracy_score(lr_pred, y_test))

knn = KNeighborsClassifier(n_neighbors = 1)
knn_pred = lr.fit(X_train, y_train).predict(X_test)
print("K Nearest Neighbors: ", accuracy_score(knn_pred, y_test))

svc = SVC()
svc_pred = svc.fit(X_train, y_train).predict(X_test)
print("Support Vector Classifier: ", accuracy_score(svc_pred, y_test))

dt = DecisionTreeClassifier(criterion='entropy', max_depth = 1)
dt_pred = dt.fit(X_train, y_train).predict(X_test)
print("Decision Tree: ", accuracy_score(dt_pred, y_test))

rc = RidgeClassifier()
rc_pred = rc.fit(X_train, y_train).predict(X_test)
print("Ridge Classifier: ", accuracy_score(rc_pred, y_test))

rf = RandomForestClassifier()
rf_pred = rf.fit(X_train, y_train).predict(X_test)
print("Random Forest: ", accuracy_score(rf_pred, y_test))

## Voting Classifier

In [None]:
vt_clf = VotingClassifier(estimators = [("lr", lr), ("knn", knn),  ("svc", svc), ("dt", dt), ("rc", rc), ("rf", rf)], voting='hard') 
vt_clf.fit(X_train,y_train)
vt_clf.score(X_test,y_test)

## Bagging

In [None]:
bagging_lr = BaggingClassifier(base_estimator = lr, n_estimators = 10, max_samples = 0.8, max_features = 0.8)
bagging_svc = BaggingClassifier(base_estimator = svc, n_estimators = 10, max_samples = 0.8, max_features = 0.8)
bagging_knn = BaggingClassifier(base_estimator = knn, n_estimators = 10, max_samples = 0.8, max_features = 0.8)
bagging_dt = BaggingClassifier(base_estimator = dt, n_estimators = 10, max_samples = 0.8, max_features = 0.8)

In [None]:
clf_label = ['Linear Regression', 'SVC', 'KNN' 'Decision Tree', 'Bagging LR', 'Bagging SVC', 'Bagging K-NN', 'Bagging Tree']
clf_list = [lr, svc, knn, dt, bagging_lr, bagging_svc, bagging_knn, bagging_dt]

for clf, label in zip(clf_list, clf_label):        
    scores = cross_val_score(clf, X, y, cv = 5, scoring='accuracy')
    print ("Accuracy: %.2f (+/- %.2f) %s" %(scores.mean(), scores.std(), label))

## Boosting

In [None]:
dt = DecisionTreeClassifier(criterion='entropy', max_depth=1)
num_est = [10, 20, 30, 100]
label = ["AdaBoost (n_est=10)", "AdaBoost (n_est=20)", "AdaBoost (n_est=30)", "AdaBoost (n_est=100)"]

for n_est, label in zip(num_est, label):     
    boosting = AdaBoostClassifier(base_estimator = dt, n_estimators = n_est)   
    scores = cross_val_score(boosting, X, y, cv = 5, scoring = 'accuracy')
    print ("Accuracy: %.2f (+/- %.2f) %s" %(scores.mean(), scores.std(), label))

In [None]:
rf = RandomForestClassifier(random_state=0, max_depth=10)
num_est = [10, 20, 30, 40]
label = ["XGBoost (n_est=10)", "XGBoost (n_est=20)", "XGBoost (n_est=30)", "XGBoost (n_est=40)"]

for n_est, label in zip(num_est, label):     
    boosting = XGBClassifier(base_estimator = rf, n_estimators = n_est)   
    scores = cross_val_score(boosting, X, y, cv = 5, scoring = 'accuracy')
    print ("Accuracy: %.2f (+/- %.2f) %s" %(scores.mean(), scores.std(), label))
    

In [None]:
boosting = XGBClassifier(base_estimator = rf, n_estimators = 10)

In [None]:
X_test = full_data.iloc[train.shape[0]:, :]
pred = boosting.fit(X_train, y_train).predict(X_test)

In [None]:
sub_sample.head(), sub_sample.shape

In [None]:
submission = pd.DataFrame({
    "PassengerId" : test["PassengerId"],
    "Survived" : pred
})

In [None]:
submission.to_csv("submission.csv", index=False)