In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC

In [3]:
X,Y = make_classification(n_samples = 10000,n_features=10,n_informative=3)


In [4]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [5]:
X_train.shape,Y_train.shape

((8000, 10), (8000,))

In [6]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train,Y_train)
y_pred = dt.predict(X_test)
print(accuracy_score(Y_test,y_pred))

0.899


# Bagging

In [7]:
bag = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators =500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42
)

In [8]:
bag.fit(X_train,Y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.25,
                  n_estimators=500, random_state=42)

In [9]:
y_pred1 = bag.predict(X_test)
accuracy_score(Y_test,y_pred1)

0.9255

In [10]:
bag.estimators_samples_[0].shape

(2000,)

In [11]:
bag.estimators_features_[0].shape

(10,)

# Using SVM

In [12]:
bag1= BaggingClassifier(
    base_estimator = SVC(),
    n_estimators= 300,
    max_samples = 0.30,
    bootstrap = True,
    random_state=42,
    n_jobs = -1
)

In [13]:
bag1.fit(X_train,Y_train)

BaggingClassifier(base_estimator=SVC(), max_samples=0.3, n_estimators=300,
                  n_jobs=-1, random_state=42)

In [14]:
y_pred2 = bag1.predict(X_test)
accuracy_score(Y_test,y_pred2)

0.9035

In [15]:
bag1.estimators_features_[0].shape

(10,)

In [16]:
bag1.estimators_samples_[0].shape

(2400,)

# Pasting Using DecisionTree

In [17]:
bag2 = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators =500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose = 1,
    n_jobs = -1
)

In [18]:
bag2.fit(X_train,Y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    4.4s remaining:    4.4s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    4.5s finished


BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                  max_samples=0.25, n_estimators=500, n_jobs=-1,
                  random_state=42, verbose=1)

In [19]:
y_pred3 = bag2.predict(X_test)
accuracy_score(Y_test,y_pred3)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.2s remaining:    0.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.4s finished


0.9265

# Random Subspaces

In [20]:
bag3 = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators =500,
    max_samples=1.0,
    bootstrap=False,
    max_features = 0.5,
    bootstrap_features=True,
    random_state=42,
    n_jobs = -1
)

In [21]:
bag3.fit(X_train,Y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                  bootstrap_features=True, max_features=0.5, n_estimators=500,
                  n_jobs=-1, random_state=42)

In [22]:
y_pred3 = bag3.predict(X_test)
accuracy_score(Y_test,y_pred3)

0.91

# Random Patches

<p>We do both column and rom sampling here</p>

In [23]:
bag4 = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators =500,
    max_samples=0.25,
    bootstrap=False,
    max_features = 0.5,
    bootstrap_features=True,
    random_state=42,
    n_jobs = -1
)

In [24]:
bag4.fit(X_train,Y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                  bootstrap_features=True, max_features=0.5, max_samples=0.25,
                  n_estimators=500, n_jobs=-1, random_state=42)

In [25]:
y_pred5 = bag4.predict(X_test)
accuracy_score(Y_test,y_pred5)

0.901

# OOB Score

In [26]:
bag5 = BaggingClassifier(
    base_estimator=DecisionTreeClassifier(),
    n_estimators =500,
    max_samples=0.25,
    bootstrap=True,
    oob_score=True,
    random_state=42,
    n_jobs = -1
)

In [27]:
bag5.fit(X_train,Y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.25,
                  n_estimators=500, n_jobs=-1, oob_score=True, random_state=42)

In [28]:
bag5.oob_score_

0.924625

In [29]:
y_pred6 = bag5.predict(X_test)
accuracy_score(Y_test,y_pred6)

0.9255