# Bootstrap: How Reliable are the results?

In this section I perform a bootsrap to get an idea of how dispered the scoring metrics are. The steps are:
- Load the features and target matrices
- Randomly split in train and test
- Fit to train data
- Predict the test data
- Make note of metrics
- Rinse and repeat

In [1]:
import pandas as pd
import numpy as np
import glob
import os
from time import time
import pickle

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_selector, make_column_transformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix,plot_roc_curve,plot_precision_recall_curve,plot_confusion_matrix, classification_report

from tqdm import tqdm


import xgboost as xgb

### Utils

In [2]:
def load_obj(path ):
    with open(path, 'rb') as f:
        return pickle.load(f)

In [3]:
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
#load test data
X = load_obj(path=os.path.join('drive','MyDrive', 'ROC_Derivation','X_AllData_raw.pkl'))
y = load_obj(path=os.path.join('drive','MyDrive', 'ROC_Derivation', 'y_AllData.pkl'))

### Pipeline

In [6]:
#Control the balance of positive and negative weights, useful for unbalanced classes
#A typical value to consider:
# sum(negative instances) / sum(positive instances)

#hard encoded from prior observatios
scale_pos_weight = 88.83

    
#define preprocessor
preprocessor = ColumnTransformer([('tfidfvect',
                                   TfidfVectorizer(ngram_range=(1,3),
                                                   max_df=0.9,
                                                   min_df=0.1, max_features=200),
                                   'description' #apply transformation to this column
                                      )
                             ],
                             remainder=MinMaxScaler(),
                             n_jobs=-1
                            )

#define pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                          ('clf', xgb.XGBClassifier(n_estimators=100,
                                                    max_depth=12,
                                                    subsample=0.9,
                                                    scale_pos_weight = scale_pos_weight,
                                                    eta=0.9,
                                                    num_boost_round=15,
                                                    tree_method='gpu_hist'
                                                   )
                          )
                              ])

## Bootstrap Trials

In [None]:
f1_arr = []
precision_arr = []
recall_arr = []

for i in tqdm(range(867, 1001, 1)):
    
    #make split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.05,
                                                        stratify=y)
    
    #fit data
    pipeline.fit(X_train, y_train)
    
    #make a prediction from sampled data
    y_pred = pipeline.predict(X_test)
    
    report_dict = classification_report(y_test,
                                        y_pred, 
                                        output_dict=True)

    save_obj(obj = report_dict, path=os.path.join('drive',
                                             'MyDrive', 
                                             'ROC_Derivation', 
                                             'classification_dicts',
                                             'report_dict'+ str(i)+'.pkl'))
     
    
    #add the metrics to the arrays
    # f1_arr.append(report_dict['1']['f1-score'])
    # precision_arr.append(report_dict['1']['precision'])
    # recall_arr.append(report_dict['1']['recall'])



  5%|▍         | 432/9134 [4:18:14<86:14:33, 35.68s/it]