
# Contraception Dataset



### Import all modules.


In [None]:

from time import time
import numpy as np
from pandas import read_csv
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint as sp_randint



### Define global functions to be used in the project.


In [None]:

def report(results, n_top=3) -> None:
    """
    Utility function to report best scores of Grid/Random Search CV.
    
    :param results: the search results.
    :param n_top: the number of top scores to show.
    """
    for i in range(1, n_top + 1):
        results = np.flatnonzero(results['rank_test_score'] == i)
        for result in results:
            print('Model with rank: {0}'.format(i))
            print('Mean validation score: {0:.3f} (std: {1:.3f})'.format(
                  results['mean_test_score'][result],
                  results['std_test_score'][result]))
            print('Parameters: {0}'.format(results['params'][result]))
            print('')



### Prepare the dataset.


In [None]:

# Read the dataset.
dataset = read_csv('datasets/contraception.csv', engine='python')
# Get x and y.
X, y = dataset.iloc[:, :-1].values, dataset.iloc[:, -1].values
# Split to training and test pairs.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)



### Create the Bagging Ensemble.


In [16]:

# Specify parameters and distributions to sample from, candidates to be created and classifier to be used.
param_dist = {'max_depth': sp_randint(4, 30),
              'max_features': sp_randint(1, 9),
              'min_samples_split': sp_randint(2, 11),
              'criterion': ['gini', 'entropy'],
              'n_estimators': sp_randint(10, 80)}
candidates = 8
clf = RandomForestClassifier(random_state=0)

# Run a random search CV.
random_search = RandomizedSearchCV(clf, param_dist, candidates, cv=10, n_jobs=-1, verbose=5)
start = time()
random_search.fit(X, y)
print('RandomizedSearchCV took %.2f seconds for %d candidates parameter settings.'.format((time() - start), candidates))
report(random_search.cv_results_)





KeyboardInterrupt: 

In [None]:

# Create a bagging DecisionTree classifier.
bagging = BaggingClassifier(clf, n_estimators=100, n_jobs=-1, random_state=0)
model = bagging.fit(X_train,y_train)
y_pred = model.predict(X_test)
print('Final Result: '.format(classification_report(y_test,y_pred)))