In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Math

%matplotlib inline

In [2]:
train = pd.read_csv('data/titanic_train_model.csv', index_col="PassengerId")
X = train.drop('Survived', axis=1)
y = train.Survived

In [3]:
train.sort_values(by='Survived').head()

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title,Survived
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.827377,-0.737695,-0.592481,0.432793,-0.473674,-0.502445,-0.571933,-0.742596,-0.71219,0
520,0.827377,-0.737695,0.177063,-0.474545,-0.473674,-0.489442,-0.571933,-0.742596,-0.71219,0
522,0.827377,-0.737695,-0.592481,-0.474545,-0.473674,-0.489442,-0.571933,-0.742596,-0.71219,0
523,0.827377,-0.737695,0.0,-0.474545,-0.473674,-0.502949,1.000883,-0.742596,-0.71219,0
525,0.827377,-0.737695,0.0,-0.474545,-0.473674,-0.502864,1.000883,-0.742596,-0.71219,0


In [4]:
y.head()

PassengerId
1    0
2    1
3    1
4    1
5    0
Name: Survived, dtype: int64

In [5]:
X[X['Age'].isnull()]

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


## 1. Linear Models

![validation](images/linear_regression.png)

$$ \frac{1}{2m} \sum_i (h(x_i) - y_i)^2 \text{ mit } h(x) = m*x + t$$
$$  $$

In [6]:
from sklearn import linear_model
model_lr = linear_model.LogisticRegression()

### Model fitting

In [7]:
model_lr.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [8]:
model_lr.score(X,y)

0.81818181818181823

There are lots of estimators in sklearn. They all share a common interface. Thus it's easy to use estimators even when you don't fully understand how they work. Though make sure to read at least the doc string documentation about an estimator before using it.

## Estimator API  

### 1. estimator.fit(X_train, y_train)  
- Trains the model using training data X und training labels y

### 2. estimator.predict(X_test)
- Uses the prior trained model to predict labels (callsification), values (regression) for test data

### 3. estimator.score(X_val, y_val)
- Uses estimator.predict(X_val) to predict the labels/values, then uses the given labels y_val to score the result

## 2. How do wen know if our model is actually useful? => Validation
Every estimator has its advantages and drawbacks. Its generalization error can be decomposed in terms of bias, variance and noise. The bias of an estimator is its average error for different data sets. The variance of an estimator indicates how sensitive it is to varying data sets. Noise is a property of the data. The estimator has no impact on the noise in the data thus we can only try to lower the variance and the bias of an estimator. 

error = bias + variance + (noise)

Our goal is find an estimator which is able to generalize well to new/unseen data sets.

### How do we know if our estimator has high bias/variance?
We need some new/unknown data for which we know the correct labels in order to test our model. We can't use our test data as we don't know the correct labels. That's why there is now other possibility but to take it from our valuable training data. This data set is called validation data set.

**Overfitting:** Small training error + Large validation error => Reduce model complexity + Regularization  
**Underfitting:** Large training error + Large validation error => Increase model complexity

In [9]:
train.sort_values(by='Survived', inplace=True)
X = train.drop('Survived', axis=1)
y = train.Survived

### Split data: data => train data, validation data

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=3, stratify=None, shuffle=False)

In [11]:
X_val[y_val == 0]

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


In [12]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=3, stratify=y, shuffle=True)

In [13]:
X_val[y_val == 0].head(10)

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
168,0.827377,1.355574,1.177469,0.432793,4.49154,-0.086664,-0.571933,0.771484,0.163067
500,0.827377,-0.737695,-0.438572,-0.474545,-0.473674,-0.491456,-0.571933,-0.742596,-0.71219
154,0.827377,-0.737695,0.831175,-0.474545,2.008933,-0.356469,-0.571933,-0.742596,-0.71219
264,-1.566107,-0.737695,0.792698,-0.474545,-0.473674,-0.648422,-0.571933,-0.742596,-0.71219
374,-1.566107,-0.737695,-0.592481,-0.474545,-0.473674,2.082512,1.000883,-0.742596,-0.71219
355,0.827377,-0.737695,0.0,-0.474545,-0.473674,-0.502949,1.000883,-0.742596,-0.71219
424,0.827377,1.355574,-0.130754,0.432793,0.76763,-0.358482,-0.571933,0.771484,0.163067
516,-1.566107,-0.737695,1.331378,-0.474545,-0.473674,0.036577,-0.571933,-0.742596,-0.71219
251,0.827377,-0.737695,0.0,-0.474545,-0.473674,-0.502445,-0.571933,-0.742596,-0.71219
120,0.827377,1.355574,-2.131568,3.154809,2.008933,-0.018709,-0.571933,2.285565,1.038323


In [14]:
model_lr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [15]:
model_lr.score(X_val, y_val)

0.83582089552238803

## Regularization (L1 and L2)

In [16]:
model_l1 = linear_model.LogisticRegression(penalty='l1', C=0.1) # C = 1/lambda
model_l2 = linear_model.LogisticRegression(penalty='l2', C=0.01) # C = 1/lambda

In [17]:
model_l1.fit(X_train, y_train)
model_l2.fit(X_train, y_train);

In [18]:
model_l1.score(X_val, y_val), model_l2.score(X_val, y_val)


(0.82462686567164178, 0.81343283582089554)

## First submission

In [19]:
submission = pd.read_csv("data/titanic_gender_submission.csv", index_col="PassengerId")
test = pd.read_csv("data/titanic_test_model.csv", index_col="PassengerId")

In [20]:
model_l1.fit(X,y)

LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [21]:
model_l1.score(X,y)

0.8125701459034792

In [22]:
prediction = model_l1.predict(test)

In [23]:
test.head()

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
892,0.827377,-0.737695,0.369449,-0.474545,-0.473674,-0.490783,-0.571933,-0.742596,-0.71219
893,0.827377,1.355574,1.331378,0.432793,-0.473674,-0.507479,1.000883,0.771484,0.163067
894,-0.369365,-0.737695,2.485693,-0.474545,-0.473674,-0.453367,-0.571933,-0.742596,-0.71219
895,0.827377,-0.737695,-0.207709,-0.474545,-0.473674,-0.474005,1.000883,-0.742596,-0.71219
896,0.827377,1.355574,-0.592481,0.432793,0.76763,-0.401017,1.000883,0.771484,0.163067


In [24]:
prediction = pd.DataFrame(data=prediction, index=test.index, columns=['Survived'])

In [25]:
## TEST
#prediction.Survived = 0 # result: 0.62679 (public leaderboard)

In [26]:
prediction.head()

Unnamed: 0_level_0,Survived
PassengerId,Unnamed: 1_level_1
892,0
893,1
894,0
895,0
896,1


In [27]:
prediction.to_csv("data/submission_01.csv", index=True, index_label="PassengerId")

## Error analysis
The concept is quite simple. You make predictions for the validation set. Then you compare your results with the correct labels. It takes a lot of time but can give you some additional insights.

In [37]:
X_val_prediction = model_l1.predict(X_val)

In [38]:
X_val_prediction = pd.DataFrame(data=X_val_prediction, index=X_val.index, columns=['Survived'])

In [39]:
X_val_prediction.head()

Unnamed: 0_level_0,Survived
PassengerId,Unnamed: 1_level_1
577,1
168,0
500,0
154,0
193,1


In [39]:
X_val_prediction['Survived'] == y_val[1]

PassengerId
577    False
168     True
500     True
154     True
193    False
751    False
264     True
208     True
374     True
355     True
508     True
424    False
413    False
516     True
59     False
251     True
120    False
623     True
305     True
847     True
316    False
40     False
503    False
405    False
112    False
586    False
177     True
60      True
320    False
872    False
       ...  
434     True
812     True
860     True
497    False
380     True
94      True
24      True
557    False
340     True
286     True
284     True
221     True
825     True
9      False
41     False
389     True
638     True
170     True
319    False
393     True
194    False
400    False
426     True
488     True
859    False
813     True
655    False
109     True
127     True
45     False
Name: Survived, Length: 268, dtype: bool

In [40]:
X_val[X_val_prediction['Survived'] == y_val[1]]

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
168,0.827377,1.355574,1.177469e+00,0.432793,4.491540,-0.086664,-0.571933,0.771484,0.163067
500,0.827377,-0.737695,-4.385719e-01,-0.474545,-0.473674,-0.491456,-0.571933,-0.742596,-0.712190
154,0.827377,-0.737695,8.311749e-01,-0.474545,2.008933,-0.356469,-0.571933,-0.742596,-0.712190
264,-1.566107,-0.737695,7.926977e-01,-0.474545,-0.473674,-0.648422,-0.571933,-0.742596,-0.712190
208,0.827377,-0.737695,-2.846632e-01,-0.474545,-0.473674,-0.270141,1.000883,-0.742596,-0.712190
355,0.827377,-0.737695,-2.232906e-16,-0.474545,-0.473674,-0.502949,1.000883,-0.742596,-0.712190
508,-1.566107,-0.737695,-2.232906e-16,-0.474545,-0.473674,-0.113846,-0.571933,-0.742596,-0.712190
516,-1.566107,-0.737695,1.331378e+00,-0.474545,-0.473674,0.036577,-0.571933,-0.742596,-0.712190
251,0.827377,-0.737695,-2.232906e-16,-0.474545,-0.473674,-0.502445,-0.571933,-0.742596,-0.712190
623,0.827377,-0.737695,-7.463893e-01,0.432793,0.767630,-0.331467,1.000883,-0.742596,-0.712190


## 2. Decison Trees

In [41]:
from sklearn.tree import DecisionTreeClassifier
model_dt = DecisionTreeClassifier(criterion='gini')

In [46]:
model_dt.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [47]:
model_dt.score(X_train, y_train) , model_dt.score(X_val, y_val)

(0.9887640449438202, 0.77238805970149249)

Our model is overfitting.
### Excercise: Create a new model which doesn't overfit

In [45]:
## Your Code here


## 3. K Nearest Neighbor

In [46]:
from sklearn.neighbors import KNeighborsClassifier
model_k = KNeighborsClassifier(n_neighbors=1)

In [47]:
model_k.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

In [48]:
model_k.score(X_train, y_train), model_k.score(X_val, y_val)

(0.9839486356340289, 0.7649253731343284)

In [49]:
train_pred = model_k.predict(X_train)

In [50]:
X_train[train_pred != y_train]

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22,1.682199,-0.737695,0.330972,-0.474545,-0.473674,-0.386671,-0.571933,-0.742596,-0.71219
580,-0.820037,-0.737695,0.177063,-0.474545,-0.473674,-0.488854,-0.571933,-0.742596,-0.71219
668,-0.820037,-0.737695,0.0,-0.474545,-0.473674,-0.491874,-0.571933,-0.742596,-0.71219
833,-0.820037,-0.737695,0.0,-0.474545,-0.473674,-0.502864,1.000883,-0.742596,-0.71219
644,-0.820037,-0.737695,0.0,-0.474545,-0.473674,0.489104,-0.571933,-0.742596,-0.71219
401,-0.820037,-0.737695,0.715743,-0.474545,-0.473674,-0.488854,-0.571933,-0.742596,-0.71219
829,-0.820037,-0.737695,0.0,-0.474545,-0.473674,-0.492378,2.573699,-0.742596,-0.71219
693,-0.820037,-0.737695,0.0,-0.474545,-0.473674,0.489104,-0.571933,-0.742596,-0.71219
569,-0.820037,-0.737695,0.0,-0.474545,-0.473674,-0.502864,1.000883,-0.742596,-0.71219
525,-0.820037,-0.737695,0.0,-0.474545,-0.473674,-0.502864,1.000883,-0.742596,-0.71219


### Excercise: Find the best parameter for k

In [51]:
## Your Code here

## 4. Hyperparameter optimization
Normal parameters: Parameters which can be learnt by the model  
Hyper paramaters: Parameters which define the structure or learning process of the model

In [52]:
from sklearn.model_selection import GridSearchCV

### KNN

In [56]:
## KNN
param_grid = [
  {'n_neighbors': [1,2,3,4,5,6,7,8]}]
gs_knn = GridSearchCV(model_k,param_grid=param_grid)

In [60]:
gs_knn.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [61]:
gs_knn.best_params_

{'n_neighbors': 6}

In [62]:
gs_knn.best_score_

0.8186195826645265

In [63]:
model_k = gs_knn.best_estimator_

### Decision Trees

In [64]:
param_grid = [
    {'max_depth': (range(1,10)),
     'max_features': (range(1,10)),
     'min_samples_leaf': (range(1,10)),}
    ]  

model_dt = DecisionTreeClassifier()

gs_dt = GridSearchCV(model_dt,param_grid)   

In [216]:
gs_dt.fit(X_train, y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'max_depth': range(1, 10), 'max_features': range(1, 10), 'min_samples_leaf': range(1, 10)}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [217]:
gs_dt.best_params_

{'max_depth': 4, 'max_features': 4, 'min_samples_leaf': 4}

### Further reading: RandomSearchCV, scikit-optimize

## 5. Ensemble methods

### Bagging and Random Forest

In [65]:
from sklearn.ensemble.forest import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators=15)

In [66]:
model_rf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=15, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [67]:
model_rf.score(X_val, y_val)

0.84701492537313428

### General bagging classifier (combine whatever models you want)

In [68]:
from sklearn.ensemble.bagging import BaggingClassifier
model_bagging = BaggingClassifier()

### Boosting and GBDT

In [72]:
from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
model_gbdt = GradientBoostingClassifier(learning_rate=0.1)

In [73]:
model_gbdt.fit(X_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [74]:
model_gbdt.score(X_val, y_val)

0.85447761194029848

In [75]:
model_gbdt.fit(X,y)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [76]:
prediction = model_gbdt.predict(test)

In [77]:
prediction = pd.DataFrame(data=prediction, index=test.index, columns=['Survived'])

In [78]:
prediction.head()

Unnamed: 0_level_0,Survived
PassengerId,Unnamed: 1_level_1
892,0
893,0
894,0
895,0
896,1


In [243]:
prediction.to_csv("data/submission_02.csv", index=True, index_label="PassengerId")

In [79]:
gb_grid_params = {'learning_rate': [0.1, 0.05, 0.02, 0.01],
              'max_depth': [4, 6, 8, 10],
              'min_samples_leaf': [20, 50,100,150],
              #'max_features': [1.0, 0.3, 0.1] 
              }

gb_gs = GradientBoostingClassifier(n_estimators = 600)

clf = GridSearchCV(model_gbdt, gb_grid_params, cv=2);

In [80]:
clf.fit(X,y)

GridSearchCV(cv=2, error_score='raise',
       estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'learning_rate': [0.1, 0.05, 0.02, 0.01], 'max_depth': [4, 6, 8, 10], 'min_samples_leaf': [20, 50, 100, 150]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [81]:
clf.best_params_

{'learning_rate': 0.05, 'max_depth': 8, 'min_samples_leaf': 20}

In [82]:
prediction_new = clf.predict(test)

In [83]:
prediction_new = pd.DataFrame(data=prediction_new, index=test.index, columns=['Survived'])

In [84]:
test[prediction_new['Survived'] != prediction['Survived']]

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,PersonType,Title
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
913,-0.820037,-0.737695,-1.592887,-0.474545,0.76763,-0.584579,1.000883,2.285565,1.91358
928,-0.820037,1.355574,0.0,-0.474545,-0.473674,-0.486337,1.000883,0.771484,1.038323
929,-0.820037,1.355574,-0.669435,-0.474545,-0.473674,-0.474005,1.000883,0.771484,1.038323
996,-0.820037,1.355574,-1.054207,0.432793,0.76763,-0.476941,2.573699,0.771484,0.163067
1003,-0.820037,1.355574,0.0,-0.474545,-0.473674,-0.49179,-0.571933,0.771484,1.038323
1045,-0.820037,1.355574,0.48488,-0.474545,2.008933,-0.403115,1.000883,0.771484,0.163067
1052,-0.820037,1.355574,0.0,-0.474545,-0.473674,-0.492714,-0.571933,0.771484,1.038323
1061,-0.820037,1.355574,-0.592481,-0.474545,-0.473674,-0.467965,1.000883,0.771484,1.038323
1084,-0.820037,-0.737695,-1.400501,0.432793,0.76763,-0.356469,1.000883,2.285565,1.91358
1108,-0.820037,1.355574,0.0,-0.474545,-0.473674,-0.489776,-0.571933,0.771484,1.038323


In [85]:
prediction_new.to_csv("data/submission_03.csv", index=True, index_label="PassengerId")