In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Importing other necessary packages
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
%matplotlib inline
warnings.filterwarnings("ignore")

In [None]:
# Importing the databases
train=pd.read_csv("/kaggle/input/titanic/train.csv")
test=pd.read_csv("/kaggle/input/titanic/test.csv")

In [None]:
# Merging the 2 datasets
all_data=train.append(test)

In [None]:
# Having a look at our data and checking for missing values using the info method
train.info()

In [None]:
# Dropping Cabin column
train=train.drop(columns="Cabin")

In [None]:
# Dropping rows with null Embarked
train.dropna(subset=["Embarked"],inplace=True)

In [None]:
# Creating a column with the Last Names of the passengers
train["Last Name"]=train["Name"].apply(lambda x: x.split(",")[0])

In [None]:
# Creating a column with the Title of each passenger
train["Title"]=train["Name"].apply(lambda x: x.split(",")[1].split(".")[0].lstrip())

In [None]:
# Creating a column for total family member on board
train["Total Family Members"]=train["SibSp"]+train["Parch"]

In [None]:
# Creating a column if a passenger was part of a family
train["Family"]=train["Total Family Members"].apply(lambda x: 1 if x!=0 else 0)

In [None]:
# Impute the missing values from Age with the median
train["Age"].fillna(train["Age"].median(),inplace=True)

In [None]:
# Having a look at the tendencies of our data using the describe method
train.describe()

## From a very basic analysis we can see that according to our subset:
- 38.38% or just over one third of our passengers survived
- The average age of the passengers was almost 30 years old and the median was 28 years old
- The mean fare was 32.2$ but with a large deviation due to differences in class

# Exploratory Data Analysis (EDA)

In [None]:
# Creating barcharts for our categorical variables
train_cat=train[["Survived","Pclass","Sex","Embarked","Title","Family"]]
for i in train_cat.columns:
    train_cat[i].value_counts().plot(kind="bar")
    print("Bar chart for "+i+" column")
    plt.show()

In [None]:
# Creating histograms for our numeric data
train_num=train[["Age","SibSp","Parch","Fare"]]
for i in train_num.columns:
    train_num[i].plot(kind="hist")
    print("Histogram for "+i+" column")
    plt.show()

In [None]:
# Correlation of numerical columns
train_num.corr()

In [None]:
# Heatmap of correlation
sns.heatmap(data=train_num.corr())

In [None]:
# Comparison of survival rates with numerical columns
train.groupby("Survived")["Age","SibSp","Parch","Fare"].mean()

### A Survivor was on average:
- 28 years old
- had less siblings/spouses on board than those who did not survived
- had more parents/children on board than those who did not survived
- paid a fare more than twice greater than those who did not make it

In [None]:
# Age histogram of Survivors
train.loc[train["Survived"]==1,"Age"].plot(kind="hist")

In [None]:
# Age histrogram of those who did not survive
train.loc[train["Survived"]==0,"Age"].plot(kind="hist")

#### We can see that children (up to 20 years old) were a priority and the majority was saved

In [None]:
# Comparison of survival rates with categorical columns

In [None]:
# Survived and Embarked
print(pd.crosstab(index=train["Survived"],columns=train["Pclass"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total"))
print("\n""In relative terms")
print(pd.crosstab(index=train["Survived"],columns=train["Pclass"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total",normalize=True).applymap(lambda x: "{0:.1f}%".format(100*x)))

In [None]:
# Survived and Sex
print(pd.crosstab(index=train["Survived"],columns=train["Sex"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total"))
print("\n""In relative terms")
print(pd.crosstab(index=train["Survived"],columns=train["Sex"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total",normalize=True).applymap(lambda x: "{0:.1f}%".format(100*x)))

In [None]:
# Survived and Embarked
print(pd.crosstab(index=train["Survived"],columns=train["Embarked"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total"))
print("\n""In relative terms")
print(pd.crosstab(index=train["Survived"],columns=train["Embarked"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total",normalize=True).applymap(lambda x: "{0:.1f}%".format(100*x)))

In [None]:
# Survived and Title
print(pd.crosstab(index=train["Survived"],columns=train["Title"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total"))
print("\n""In relative terms")
print(pd.crosstab(index=train["Survived"],columns=train["Title"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total",normalize=True).applymap(lambda x: "{0:.1f}%".format(100*x)))

In [None]:
# Survived and Family
print(pd.crosstab(index=train["Survived"],columns=train["Family"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total"))
print("\n""In relative terms")
print(pd.crosstab(index=train["Survived"],columns=train["Family"],values=train["PassengerId"],aggfunc='count',margins=True,
            margins_name="Total",normalize=True).applymap(lambda x: "{0:.1f}%".format(100*x)))

### We get that most Survivors were: 1st class passengers and females and were part of a family

# Model Building

In [None]:
# importing relevant packages
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Applying all the variables we created and the preprocessing to the merged set too
# Dropping Cabin column
all_data=all_data.drop(columns="Cabin")

# Dropping rows with null Embarked
all_data.dropna(subset=["Embarked"],inplace=True)

# Creating a column with the Last Names of the passengers
all_data["Last Name"]=all_data["Name"].apply(lambda x: x.split(",")[0])

# Creating a column with the Title of each passenger
all_data["Title"]=all_data["Name"].apply(lambda x: x.split(",")[1].split(".")[0].lstrip())

# Creating a column for total family member on board
all_data["Total Family Members"]=all_data["SibSp"]+all_data["Parch"]

# Creating a column if a passenger was part of a family
all_data["Family"]=all_data["Total Family Members"].apply(lambda x: 1 if x!=0 else 0)

# Impute the missing values from Age with the median
all_data["Age"].fillna(all_data["Age"].median(),inplace=True)

In [None]:
# Getting dummy variables with OneHotEncoder method
'''enc = OneHotEncoder(handle_unknown='ignore')
all_data_onehot=pd.DataFrame(enc.fit_transform(all_data[["Pclass","Sex","Embarked","Title","Family"]]).toarray())
all_data_onehot.columns=enc.get_feature_names()
all_data_model=all_data
all_data_model = all_data_model.join(all_data_onehot)
all_data_model

# Selecting relevant variables for our model
all_data_model=all_data_model.drop(columns=["Pclass","PassengerId","Name","Ticket","Sex","Embarked","Title","Family","Last Name"])

# Splitting the datasets again
test=all_data_model[all_data_model["Survived"].isnull()]
train=all_data_model[all_data_model["Survived"].notnull()]

X_train=train.drop(columns="Survived")
Y_train=train["Survived"]
X_test=test.drop(columns="Survived")'''

In [None]:
# Getting dummy variables with get_dummies method
# converted Pclass and Family to category for pd.get_dummies()
all_data.Pclass = all_data.Pclass.astype(str)
all_data.Family = all_data.Family.astype(str)


#created dummy variables from categories (also can use OneHotEncoder)
all_dummies = pd.get_dummies(all_data[['Pclass','Age','Sex','SibSp','Parch','Embarked','Title','Total Family Members','Family']])

# Splitting the datasets again
X_test=all_dummies.iloc[889:]
X_train=all_dummies.iloc[:889]

Y_train=all_data.iloc[:889,1]

# Model Building
We are going to test different classification models and we will select the one that performs best.
In order to get dummy variables we chose the get_dummies method instead of OneHotEncoder as it produced better results on our models.
The models we are going to use are:
- Naive Bayes (73.3%)
- Logistic Regression (82.1%)
- Decision Tree (78.3%)
- K Nearest Neighbor (80.5%)
- Random Forest (79.2%)
- Support Vector Classifier (65.1%)
- Xtreme Gradient Boosting (82.3%)
- **Soft Voting Classifier - All Models (82.7%)**

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier

In [None]:
# Naive Bayes
gnb = GaussianNB()
cv = cross_val_score(gnb,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Logistic Regression
lr = LogisticRegression(max_iter = 2000)
cv = cross_val_score(lr,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Decision Tree
dt = tree.DecisionTreeClassifier(random_state = 1)
cv = cross_val_score(dt,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# K Nearest Neighbors
knn = KNeighborsClassifier()
cv = cross_val_score(knn,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Random Forest
rf = RandomForestClassifier(random_state = 1)
cv = cross_val_score(rf,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Support Vector Classifier
svc = SVC(probability = True)
cv = cross_val_score(svc,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Xtreme Gradient Boosting
xgb = XGBClassifier(random_state =1,eval_metric="logloss")
cv = cross_val_score(xgb,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Soft Voting Classifier
voting_clf = VotingClassifier(estimators = [('lr',lr),('knn',knn),('rf',rf),('gnb',gnb),('svc',svc),('xgb',xgb)], voting = 'soft') 
cv = cross_val_score(voting_clf,X_train,Y_train,cv=5)
print(cv)
print(cv.mean())

### We are going to try to scale the data in order to see if we get better results
For the scaled data the performances of the models are:


|Model|Normal Data|Scaled Data|
|-----|-----------|-----------|
|Naive Bayes| **73.3%** | 69.4%|
|Logistic Regression| 82.1%| 82.1%|
|Decision Tree| **78.3%**| 78.2%|
|K Nearest Neighbor| 80.5%|**82.1%**|
|Random Forest| 79.2%| **79.3%**|
|***Support Vector Classifier***| 65.1%| **82.8%**|
|Xtreme Gradient Boosting| **82.3%**| 82.2%|
|Soft Voting Classifier| **82.7%**| 82.6%|

In [None]:
# Scale data 
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
all_data_scaled = all_dummies.copy()
all_data_scaled[['Age','SibSp','Parch']]= scale.fit_transform(all_data_scaled[['Age','SibSp','Parch']])
all_data_scaled

# Splitting the datasets again
X_test_scaled=all_data_scaled.iloc[889:]
X_train_scaled=all_data_scaled.iloc[:889]

Y_train_scaled=all_data.iloc[:889,1]

In [None]:
# Naive Bayes
gnb = GaussianNB()
cv = cross_val_score(gnb,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Logistic Regression
lr = LogisticRegression(max_iter = 2000)
cv = cross_val_score(lr,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Decision Tree
dt = tree.DecisionTreeClassifier(random_state = 1)
cv = cross_val_score(dt,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# K Nearest Neighbors
knn = KNeighborsClassifier()
cv = cross_val_score(knn,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Random Forest
rf = RandomForestClassifier(random_state = 1)
cv = cross_val_score(rf,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Support Vector Classifier
svc_scaled = SVC(probability = True)
cv = cross_val_score(svc_scaled,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Xtreme Gradient Boosting
xgb = XGBClassifier(random_state =1,eval_metric="logloss")
cv = cross_val_score(xgb,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

In [None]:
# Soft Voting Classifier
voting_clf = VotingClassifier(estimators = [('lr',lr),('knn',knn),('rf',rf),('gnb',gnb),('svc',svc_scaled),('xgb',xgb)], voting = 'soft') 
cv = cross_val_score(voting_clf,X_train_scaled,Y_train,cv=5)
print(cv)
print(cv.mean())

## Performance tuning
We will try to tune the above models so we can get better results. We won't tune Naive Bayes as there aren't many tuning options and we won't tune the Decision Tree as we are using Random Forest and Xtreme Gradient Boost. We are going to use the scaled data as they produced better results in general

The performances of the tuned models are:

|Model|Baseline (Scaled Data)|Tuned|
|-----|-----------|-----------|
|Logistic Regression| 82.1%| **82.3%**|
|K Nearest Neighbor| 82.1%| **82.2%**|
|***Random Forest***| 79.3%| **83.5%**|
|Support Vector Classifier| 82.8%| **83.3**%|
|Xtreme Gradient Boosting| 82.2%| **83.2%**|

In [None]:
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import RandomizedSearchCV 

In [None]:
#simple performance reporting function
def clf_performance(classifier, model_name):
    print(model_name)
    print('Best Score: ' + str(classifier.best_score_))
    print('Best Parameters: ' + str(classifier.best_params_))

In [None]:
# Logistic Regression
lr = LogisticRegression()
param_grid = {'max_iter' : [2000],
              'penalty' : ['l1', 'l2'],
              'C' : np.logspace(-4, 4, 20),
              'solver' : ['liblinear']}

clf_lr = GridSearchCV(lr, param_grid = param_grid, cv = 5, verbose = True, n_jobs = -1)
best_clf_lr = clf_lr.fit(X_train_scaled,Y_train)
clf_performance(best_clf_lr,'Logistic Regression')

In [None]:
# K Nearest Neighbors
knn = KNeighborsClassifier()
param_grid = {'n_neighbors' : [3,5,7,9],
              'weights' : ['uniform', 'distance'],
              'algorithm' : ['auto', 'ball_tree','kd_tree'],
              'p' : [1,2]}
clf_knn = GridSearchCV(knn, param_grid = param_grid, cv = 5, verbose = True, n_jobs = -1)
best_clf_knn = clf_knn.fit(X_train_scaled,Y_train)
clf_performance(best_clf_knn,'KNN')

In [None]:
# Random Forest
# Because the total feature space is so large, I used a randomized search to narrow down the paramters for the model. I took the best model from this and did a more granular search 

'''rf = RandomForestClassifier(random_state = 1)
param_grid =  {'n_estimators': [1000], 
                                  'bootstrap': [True],
                                  'max_depth': [5],
                                  'max_features': ['auto','sqrt'],
                                  'min_samples_leaf': [1,2,4,10],
                                  'min_samples_split': [2,5,10]}
                                  
clf_rf_rnd = RandomizedSearchCV(rf, param_distributions = param_grid, n_iter = 100, cv = 5, verbose = True, n_jobs = -1)
best_clf_rf_rnd = clf_rf_rnd.fit(X_train_scaled,Y_train)
clf_performance(best_clf_rf_rnd,'Random Forest')'''

In [None]:
rf = RandomForestClassifier(random_state = 1)
param_grid =  {'n_estimators': [900], 
                                  'bootstrap': [True],
                                  'max_depth': [5],
                                  'max_features': ['auto'],
                                  'min_samples_leaf': [1],
                                  'min_samples_split': [4]}
                                  
clf_rf_rnd = RandomizedSearchCV(rf, param_distributions = param_grid, n_iter = 100, cv = 5, verbose = True, n_jobs = -1)
best_clf_rf = clf_rf_rnd.fit(X_train_scaled,Y_train)
clf_performance(best_clf_rf,'Random Forest')

In [None]:
# Support Vector Classifier
svc = SVC(probability = True)
param_grid = tuned_parameters = [{'kernel': ['rbf'], 'gamma': [.1,.5,1,2,5,10],
                                  'C': [.1, 1, 10, 100, 1000]},
                                 {'kernel': ['linear'], 'C': [.1, 1, 10, 100, 1000]},
                                 {'kernel': ['poly'], 'degree' : [2,3,4,5], 'C': [.1, 1, 10, 100, 1000]}]
clf_svc = GridSearchCV(svc, param_grid = param_grid, cv = 5, verbose = True, n_jobs = -1)
best_clf_svc = clf_svc.fit(X_train_scaled,Y_train)
clf_performance(best_clf_svc,'SVC')

In [None]:
# XG Booster
xgb = XGBClassifier(random_state = 1,eval_metric="logloss")

param_grid = {
    'n_estimators': [10],
    'colsample_bytree': [0.3],
    'max_depth': [5],
    'reg_alpha': [0.5],
    'reg_lambda': [0.5],
    'subsample': [0.9],
    'learning_rate':[0.5],
    'gamma':[0],
    'min_child_weight':[0],
}

clf_xgb_rnd = RandomizedSearchCV(xgb, param_distributions = param_grid, n_iter = 500, cv = 3, verbose = True, n_jobs = -1)
best_clf_xgb = clf_xgb_rnd.fit(X_train_scaled,Y_train)
clf_performance(best_clf_xgb,'XGB')

In [None]:
'''xgb = XGBClassifier(random_state = 1,eval_metric="logloss")

param_grid = {
    'n_estimators': [10],
    'colsample_bytree': [0.3],
    'max_depth': [10],
    'reg_alpha': [0.5],
    'reg_lambda': [0.5],
    'subsample': [0.9],
    'learning_rate':[0.5],
    'gamma':[0],
    'min_child_weight':[0],
}

clf_xgb_rnd = RandomizedSearchCV(xgb, param_distributions = param_grid, n_iter = 500, cv = 3, verbose = True, n_jobs = -1)
best_clf_xgb_rnd = clf_xgb_rnd.fit(X_train_scaled,Y_train)
clf_performance(best_clf_xgb_rnd,'XGB')'''

## Creating ensembles of the tuned models using VotingClassifier

1) Experimented with a hard voting classifier of the three best estimators (RF, SVC, XGB) (83.0%)

2) Experimented with a soft voting classifier of the three best estimators (RF, SVC, XGB) (82.2%) (Best Performance)

3) Experimented with soft voting on all estimators (KNN, SVM, RF, LR, XGB) (83.1%)

4) Experimented with hard voting on all estimators (KNN, SVM, RF, LR, XGB) (83.2%)

In [None]:
best_lr = best_clf_lr.best_estimator_
best_knn = best_clf_knn.best_estimator_
best_svc = best_clf_svc.best_estimator_
best_rf = best_clf_rf.best_estimator_
best_xgb = best_clf_xgb.best_estimator_

voting_clf_hard = VotingClassifier(estimators = [('xgb', best_xgb),('rf',best_rf),('svc',best_svc)], voting = 'hard') 
voting_clf_soft = VotingClassifier(estimators = [('xgb', best_xgb),('rf',best_rf),('svc',best_svc)], voting = 'soft') 
voting_clf_all_soft = VotingClassifier(estimators = [('knn',best_knn),('rf',best_rf),('svc',best_svc), ('xgb', best_xgb),('lr', best_lr)], voting = 'soft') 
voting_clf_all_hard = VotingClassifier(estimators = [('knn',best_knn),('rf',best_rf),('svc',best_svc), ('xgb', best_xgb),('lr', best_lr)], voting = 'hard')

print('voting_clf_hard :',cross_val_score(voting_clf_hard,X_train,Y_train,cv=5))
print('voting_clf_hard mean :',cross_val_score(voting_clf_hard,X_train,Y_train,cv=5).mean())

print('voting_clf_soft :',cross_val_score(voting_clf_soft,X_train,Y_train,cv=5))
print('voting_clf_soft mean :',cross_val_score(voting_clf_soft,X_train,Y_train,cv=5).mean())

print('voting_clf_all_soft :',cross_val_score(voting_clf_all_soft,X_train,Y_train,cv=5))
print('voting_clf_all_soft mean :',cross_val_score(voting_clf_all_soft,X_train,Y_train,cv=5).mean())

print('voting_clf_all_hard :',cross_val_score(voting_clf_all_hard,X_train,Y_train,cv=5))
print('voting_clf_all_hard mean :',cross_val_score(voting_clf_all_hard,X_train,Y_train,cv=5).mean())

In [None]:
#Make Predictions 
voting_clf_hard.fit(X_train_scaled, Y_train)
voting_clf_soft.fit(X_train_scaled, Y_train)
voting_clf_all_soft.fit(X_train_scaled, Y_train)
voting_clf_all_hard.fit(X_train_scaled, Y_train)
clf_rf_rnd.fit(X_train_scaled, Y_train)
svc_scaled.fit(X_train_scaled, Y_train)


y_hat_vc_hard = voting_clf_hard.predict(X_test_scaled).astype(int)
y_hat_vc_soft =  voting_clf_soft.predict(X_test_scaled).astype(int)
y_hat_vc_all_soft = voting_clf_all_soft.predict(X_test_scaled).astype(int)
y_hat_vc_all_hard = voting_clf_all_hard.predict(X_test_scaled).astype(int)
y_hat_rf = clf_rf_rnd.predict(X_test_scaled).astype(int)
y_hat_svc=svc_scaled.predict(X_test_scaled).astype(int)

In [None]:
# Convert output to dataframe 
final_data = {'PassengerId': test.PassengerId, 'Survived': y_hat_rf}
submission = pd.DataFrame(data=final_data)

final_data_2 = {'PassengerId': test.PassengerId, 'Survived': y_hat_vc_hard}
submission_2 = pd.DataFrame(data=final_data_2)

final_data_3 = {'PassengerId': test.PassengerId, 'Survived': y_hat_vc_soft}
submission_3 = pd.DataFrame(data=final_data_3)

final_data_4 = {'PassengerId': test.PassengerId, 'Survived': y_hat_vc_all_soft}
submission_4 = pd.DataFrame(data=final_data_4)

final_data_5 = {'PassengerId': test.PassengerId, 'Survived': y_hat_vc_all_hard}
submission_5 = pd.DataFrame(data=final_data_5)

final_data_6 = {'PassengerId': test.PassengerId, 'Survived': y_hat_svc}
submission_6 = pd.DataFrame(data=final_data_6)

#final_data_comp = {'PassengerId': test.PassengerId, 'Survived_vc_hard': y_hat_vc_hard, 'Survived_rf': y_hat_rf, 'Survived_vc_soft' : y_hat_vc_soft, 'Survived_vc_all' : y_hat_vc_all,  'Survived_vc_xgb' : y_hat_vc_xgb}
#comparison = pd.DataFrame(data=final_data_comp)

In [None]:
# Prepare submission files 
submission.to_csv('submission_rf.csv', index =False)
submission_2.to_csv('submission_vc_hard.csv',index=False)
submission_3.to_csv('submission_vc_soft.csv', index=False)
submission_4.to_csv('submission_vc_all_soft.csv', index=False)
submission_5.to_csv('submission_vc_all_hard.csv', index=False)
submission_6.to_csv('submission_vc_svc.csv', index=False)