In [356]:
import pandas as pd
import numpy as np
from numpy import argmax
from numpy import sqrt
import math
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.utils import resample
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from matplotlib import pyplot
import pickle
import csv
import warnings
warnings.filterwarnings("ignore")

In [292]:
def output_values(Y_data):
    Y_t = []
    for e in Y_data:
        if e == 'passed':
            Y_t.append(1)
        else:
            Y_t.append(0) 
    return Y_t

In [293]:
def get_pass_streak(y_project):
    p = y_project[0]
    pass_streak = [y_project[0]]
    for i in range(1, len(y_project)):
        if y_project[i] == 1:
            p += 1
        else:
            p = 0
        pass_streak.append(p)
    return pass_streak

In [449]:
cloud_controller = pd.read_csv('metrics_data/cloud_controller_ng_metrics.csv')

with open('../cloud_controller_ng_indexes.pkl', 'rb') as load_file:
    cloud_train_build_ids = pickle.load(load_file)
    cloud_test_build_ids = pickle.load(load_file)

cloud_controller = cloud_controller [ cloud_controller['tr_build_id'].isin(cloud_train_build_ids)]
cloud_train_build_ids = cloud_controller['tr_build_id'].tolist()
res_cloud_controller = pd.read_csv('../data/cloud_controller_ng.csv', usecols = ['tr_build_id', 'tr_status'])
y_cloud_controller = res_cloud_controller [ res_cloud_controller['tr_build_id'].isin(cloud_train_build_ids)]['tr_status'].tolist()
y_cloud_controller = output_values(y_cloud_controller)
cloud_controller['tr_status'] = y_cloud_controller

In [450]:
geoserver = pd.read_csv('metrics_data/geoserver_metrics.csv')

with open('../geoserver_indexes.pkl', 'rb') as load_file:
    geoserver_train_build_ids = pickle.load(load_file)
    geoserver_test_build_ids = pickle.load(load_file)

geoserver = geoserver [ geoserver['tr_build_id'].isin(geoserver_train_build_ids)]
geoserver_train_build_ids = geoserver['tr_build_id'].tolist()
res_geoserver = pd.read_csv('../data/geoserver.csv', usecols = ['tr_build_id', 'tr_status'])
y_geoserver = res_geoserver [ res_geoserver['tr_build_id'].isin(geoserver_train_build_ids)]['tr_status'].tolist()
y_geoserver = output_values(y_geoserver)
geoserver['tr_status'] = y_geoserver

In [451]:
gradle = pd.read_csv('metrics_data/gradle_metrics copy 2.csv')

with open('../gradle_indexes.pkl', 'rb') as load_file:
    gradle_train_build_ids = pickle.load(load_file)
    gradle_test_build_ids = pickle.load(load_file)

gradle = gradle [ gradle['tr_build_id'].isin(gradle_train_build_ids)]
gradle_train_build_ids = gradle['tr_build_id'].tolist()
res_gradle = pd.read_csv('../data/gradle.csv', usecols = ['tr_build_id', 'tr_status'])
y_gradle = res_gradle [ res_gradle['tr_build_id'].isin(gradle_train_build_ids)]['tr_status'].tolist()
y_gradle = output_values(y_gradle)
gradle['tr_status'] = y_gradle

In [452]:
projects = [gradle, cloud_controller, geoserver]

In [453]:
gradle.drop('num_commits', inplace=True, axis=1)
gradle.drop('reviewer_experience', inplace=True, axis=1)
gradle.drop('num_of_reviewers', inplace=True, axis=1)

cloud_controller.drop('num_commits', inplace=True, axis=1)
cloud_controller.drop('reviewer_experience', inplace=True, axis=1)
cloud_controller.drop('num_of_reviewers', inplace=True, axis=1)

geoserver.drop('num_commits', inplace=True, axis=1)
geoserver.drop('reviewer_experience', inplace=True, axis=1)
geoserver.drop('num_of_reviewers', inplace=True, axis=1)

In [454]:
gradle['num_of_passes'] = get_pass_streak(y_gradle)
cloud_controller['num_of_passes'] = get_pass_streak(y_cloud_controller)
geoserver['num_of_passes'] = get_pass_streak(y_geoserver)

In [439]:
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
max_depth = [int(x) for x in np.linspace(10, 110, num = 5)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]

In [440]:
param_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth
             }

In [441]:
forest = RandomForestClassifier()
grid_search = GridSearchCV(estimator = forest, param_grid = param_grid, 
                      cv = 3, n_jobs = -1, verbose = 2)

In [303]:
#estimating for geoserver

X_train = geoserver
y_train = y_geoserver

geoserver = pd.read_csv('metrics_data/geoserver_metrics.csv')
X_test = np.array(geoserver [ geoserver['tr_build_id'].isin(geoserver_test_build_ids)])
geoserver_train_build_ids = geoserver['tr_build_id'].tolist()
res_geoserver = pd.read_csv('../data/geoserver.csv', usecols = ['tr_build_id', 'tr_status'])
y_geoserver = res_geoserver [ res_geoserver['tr_build_id'].isin(geoserver_test_build_ids)]['tr_status'].tolist()
y_test = np.array(output_values(y_geoserver))

In [304]:
print(X_train)

      tr_build_id  patch_size  num_of_comments  test_file_changes  \
0        11905123           0                0                  0   
1        11905554           0                0                  0   
2        11909050           0                0                 31   
3        11939345         397                0                 21   
4        11939991           0                0                  0   
...           ...         ...              ...                ...   
2085    146183118          37                0                  0   
2086    146593522          13                0                 15   
2087    146603882           0                0                  0   
2088    146629589          43                0                 39   
2089    146642184           9                0                104   

      files_added  files_deleted  frequency_file_change  developer_experience  \
0               0              0                      1                     0   
1        

In [305]:
sample_size = int(len(geoserver)*0.7)

best_precision = 0
best_recall = 0
best_accuracy = 0
best_threshold = 0

best_precision_sample = np.array([0])
best_precision_sample_result = np.array([0])
best_recall_sample = np.array([0])
best_recall_sample_result = np.array([0])
best_accuracy_sample = np.array([0])
best_accuracy_sample_result = np.array([0])

best_precision_estimator = 0
best_recall_estimator = 0
best_accuracy_estimator = 0

count = 0

#bootstrapping 100 times
for i in range(100):
    
    file_name = 'rq2_model_' + str(i+1) + '_model.pkl'
    pickle_dump_objects = []
    
    #getting required sample from training data
    sample_train = resample(X_train, replace=True, n_samples=sample_size)
    #sample_train_result = np.array(sample_train['tr_status'])
    sample_train_result = sample_train['tr_status']
    
    #getting the build_ids that are used in training 
    build_ids = sample_train['tr_build_id'].tolist()
    
    #extracting remaining build id's to use for testing
    sample_test = X_train [~X_train['tr_build_id'].isin(build_ids)]
    #sample_test_result = np.array(sample_test['tr_status'])
    sample_test_result = sample_test['tr_status']

    
    print(len(sample_train))
    print(len(sample_test))
    
    #dropping result column and build ids column
    sample_train.drop('tr_status', inplace=True, axis=1)
    sample_train.drop('tr_build_id', inplace=True, axis=1)
    sample_test.drop('tr_status', inplace=True, axis=1)
    sample_test.drop('tr_build_id', inplace=True, axis=1)
    
    
    pickle_dump_objects.append(sample_train)
    pickle_dump_objects.append(sample_train_result)
    pickle_dump_objects.append(sample_test)
    pickle_dump_objects.append(sample_test_result)
    
    #training
    grid_search.fit(sample_train, sample_train_result)
    
    #predicting with sample_test
    sample_pred_result = grid_search.predict_proba(sample_test)
    
    pickle_dump_objects.append(grid_search.best_estimator_)
    pickle_dump_objects.append(sample_pred_result)
    
    
    #getting the best threshold
    pred_vals = sample_pred_result[:, 1]
    fpr, tpr, thresholds = roc_curve(sample_test_result, pred_vals)
    gmeans = sqrt(tpr * (1-fpr))
    ix = argmax(gmeans)
    best_threshold = thresholds[ix]
    
    final_pred_result = []
    #threshold setting
    for i in range(len(pred_vals)):
        if pred_vals[i] > best_threshold:
            final_pred_result.append(1)
        else:
            final_pred_result.append(0)
    
    if count < 3:
        print(sample_test_result)
        print(final_pred_result)
        count += 1
    
    pickle_dump_objects.append(final_pred_result)
    
    with open(file_name, 'wb') as dump_file:
        for obj in pickle_dump_objects:
            pickle.dump(obj, dump_file)
    
    accuracy = accuracy_score(sample_test_result, final_pred_result)
    precision = precision_score(sample_test_result, final_pred_result)
    recall = recall_score(sample_test_result, final_pred_result)
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_accuracy_sample = sample_train
        best_accuracy_sample_result = sample_train_result
        best_accuracy_estimator = grid_search.best_estimator_
        
    if precision > best_precision:
        best_precision = precision
        best_precision_sample = sample_train
        best_precision_sample_result = sample_train_result
        best_precision_estimator = grid_search.best_estimator_
        best_threshold = thresholds[ix]
        
    if recall > best_recall:
        best_recall = recall
        best_recall_sample = sample_train
        best_recall_sample_result = sample_train_result
        best_recall_estimator = grid_search.best_estimator_
    
    print(precision, recall, accuracy)
        
    

1463
892
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1       0
2       0
4       0
6       0
7       0
       ..
2077    1
2078    1
2079    1
2086    0
2089    0
Name: tr_status, Length: 892, dtype: int64
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


0       0
3       0
4       0
5       0
7       0
       ..
2080    1
2082    0
2083    1
2085    0
2089    0
Name: tr_status, Length: 904, dtype: int64
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


0       0
1       0
3       0
6       0
10      0
       ..
2078    1
2082    0
2084    0
2085    0
2087    0
Name: tr_status, Length: 900, dtype: int64
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979508196721312 0.9989270386266095
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978118161925602 0.9989154013015185
1463
902
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977777777777778 0.9988913525498891
1463
906
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977578475336323 0.9988962472406181
1463
895
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977116704805492 0.9988826815642458
1463
905
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977924944812362 0.9988950276243094
1463
867
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977628635346756 0.9988465974625144
1463
916
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978768577494692 0.9989082969432315
1463
897
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978118161925602 0.9988851727982163
1463
908
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978354978354979 0.998898678414097
1463
921
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979716024340771 0.998914223669924
1463
897
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977827050997783 0.9988851727982163
1463
927
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979381443298969 0.9989212513484358
1463
894
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978540772532188 0.9988814317673378
1463
917
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977924944812362 0.9989094874591058
1463
921
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978813559322034 0.998914223669924
1463
925
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997872340425532 0.9989189189189189
1463
928
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979550102249489 0.9989224137931034
1463
899
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978260869565218 0.9988876529477196
1463
917
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979035639412998 0.9989094874591058
1463
911
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977324263038548 0.9989023051591658
1463
914
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977728285077951 0.9989059080962801
1463
900
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977924944812362 0.9988888888888889
1463
906
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978354978354979 0.9988962472406181
1463
889
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997716894977169 0.9988751406074241
1463
906
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977924944812362 0.9988962472406181
1463
947
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979633401221996 0.9989440337909187
1463
917
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978991596638656 0.9989094874591058
1463
914
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977777777777778 0.9989059080962801
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978494623655914 0.9989154013015185
1463
928
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997872340425532 0.9989224137931034
1463
918
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978070175438597 0.9989106753812637
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978902953586498 0.9989154013015185
1463
946
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979123173277662 0.9989429175475687
1463
904
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979757085020243 0.9988938053097345
1463
913
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.5s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.4s
[CV] END ...................

1.0 0.9977876106194691 0.9989047097480832
1463
909
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978021978021978 0.9988998899889989
1463
905
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.5s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.2s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.0s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.4s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.5s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.6s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.6s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   1.8s
[CV] END ...................

1.0 0.9977827050997783 0.9988950276243094
1463
910
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.6s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=600; total time=   0.5s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.5s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.6s
[CV] END ...................

1.0 0.9979338842975206 0.9989010989010989
1463
912
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977924944812362 0.9989035087719298
1463
900
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978070175438597 0.9988888888888889
1463
873
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978070175438597 0.9988545246277205
1463
958
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979123173277662 0.9989561586638831
1463
917
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997867803837953 0.9989094874591058
1463
923
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979423868312757 0.9989165763813651
1463
911
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979253112033195 0.9989023051591658
1463
909
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977827050997783 0.9988998899889989
1463
898
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978494623655914 0.9988864142538976
1463
925
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979508196721312 0.9989189189189189
1463
873
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9976415094339622 0.9988545246277205
1463
925
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979253112033195 0.9989189189189189
1463
921
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979591836734694 0.998914223669924
1463
943
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979338842975206 0.9989395546129375
1463
917
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978902953586498 0.9989094874591058
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978768577494692 0.9989154013015185
1463
897
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977876106194691 0.9988851727982163
1463
919
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978213507625272 0.998911860718172
1463
924
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978813559322034 0.9989177489177489
1463
905
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978260869565218 0.9988950276243094
1463
908
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978540772532188 0.998898678414097
1463
903
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997867803837953 0.9988925802879292
1463
903
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978813559322034 0.9988925802879292
1463
924
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978213507625272 0.9989177489177489
1463
920
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977528089887641 0.9989130434782608
1463
903
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978070175438597 0.9988925802879292
1463
908
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978858350951374 0.998898678414097
1463
881
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9976798143851509 0.9988649262202043
1463
912
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977528089887641 0.9989035087719298
1463
900
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977728285077951 0.9988888888888889
1463
921
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978632478632479 0.998914223669924
1463
913
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978494623655914 0.9989047097480832
1463
918
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


[CV] END .....................max_depth=85, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.4s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.0s
[CV] END ....................max_depth=110, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=110, n_estimators=800; total time=   0.9s
[CV] END ...................max_depth=110, n_estimators=1400; total time=   1.6s
[CV] END ...................max_depth=110, n_estimators=2000; total time=   2.3s
[CV] END ..................max_depth=None, n_estimators=1000; total time=   1.2s
[CV] END ..................max_depth=None, n_estimators=1400; total time=   1.6s
[CV] END ..................max_depth=None, n_estimators=2000; total time=   1.9s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END ...................

1.0 0.9979035639412998 0.9989106753812637
1463
918
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


[CV] END ..................max_depth=None, n_estimators=2000; total time=   1.7s
[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   2.0s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.6s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.6s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.3s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.1s
[CV] END ...................

[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.3s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.2s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.4s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.0s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.5s
[CV] END ....................max_depth=110, n_estimators=800; total time=   0.9s
[CV] END ...................

1.0 0.9977728285077951 0.9989106753812637
1463
916
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.5s
[CV] END ....................max_depth=110, n_estimators=600; total time=   0.7s
[CV] END ...................max_depth=110, n_estimators=1000; total time=   1.2s
[CV] END ...................max_depth=110, n_estimators=1400; total time=   1.6s
[CV] END ...................max_depth=110, n_estimators=2000; total time=   2.4s
[CV] END ..................max_depth=None, n_estimators=1200; total time=   1.5s
[CV] END ..................max_depth=None, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.6s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.6s
[CV] END ...................

1.0 0.9978494623655914 0.9989082969432315
1463
914
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.5s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.6s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.9s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   2.1s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   2.1s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   2.0s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1600; total time=   1.8s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.3s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.3s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   1.8s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   2.2s
[CV

[CV] END ..................max_depth=None, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.3s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.6s
[CV] END ...................

[CV] END ..................max_depth=None, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.3s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.3s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.4s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.5s
[CV] END ...................

[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.4s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.7s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.9s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.3s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.6s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.1s
[CV] END ....................max_depth=110, n_estimators=800; total time=   0.9s
[CV] END ...................max_depth=110, n_estimators=1400; total time=   1.6s
[CV] END ...................

1.0 0.997872340425532 0.9989059080962801
1463
912
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978448275862069 0.9989035087719298
1463
906
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977876106194691 0.9988962472406181
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997907949790795 0.9989154013015185
1463
920
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978448275862069 0.9989130434782608
1463
908
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977528089887641 0.998898678414097
1463
928
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9957173447537473 0.9978448275862069
1463
908
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978768577494692 0.998898678414097
1463
919
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979035639412998 0.998911860718172
1463
906
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978991596638656 0.9988962472406181
1463
911
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978494623655914 0.9989023051591658
1463
894
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979035639412998 0.9988814317673378
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979166666666667 0.9989154013015185
1463
888
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978118161925602 0.9988738738738738
1463
897
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997872340425532 0.9988851727982163
1463
922
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979253112033195 0.9989154013015185
1463
897
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9979166666666667 0.9988851727982163
1463
910
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978586723768736 0.9989010989010989
1463
909
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978540772532188 0.9988998899889989
1463
911
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.997872340425532 0.9989023051591658
1463
898
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9976958525345622 0.9988864142538976
1463
892
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9977777777777778 0.9988789237668162
1463
912
Fitting 3 folds for each of 60 candidates, totalling 180 fits


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


1.0 0.9978401727861771 0.9989035087719298
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.5s
[CV] END ....................max_depth=110, n_estimators=600; total time=   0.7s
[CV] END ...................max_depth=110, n_estimators=1000; total time=   1.1s
[CV] END ...................max_depth=110, n_estimators=1600; total time=   1.8s
[CV] END ...................max_depth=None, n_estimators=200; total time=   0.2s
[CV] END ...................max_depth=None, n_estimators=200; total time=   0.2s
[CV] END ...................max_depth=None, n_estimators=400; total time=   0.4s
[CV] END ...................max_depth=None, n_estimators=400; total time=   0.4s
[CV] END ...................max_depth=None, n_estimators=800; total time=   0.9s
[CV] END ..................max_depth=None, n_estimators=1400; total time=   1.6s
[CV] END ..................max_depth=None, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=10, n_estimators=600; total

[CV] END .....................max_depth=85, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   1.8s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=110, n_estimators=600; total time=   0.7s
[CV] END ...................max_depth=110, n_estimators=1000; total time=   1.1s
[CV] END ...................max_depth=110, n_estimators=1600; total time=   1.8s
[CV] END ...................max_depth=110, n_estimators=2000; total time=   2.2s
[CV] END ..................max_depth=None, n_estimators=1200; total time=   1.3s
[CV] END ..................max_depth=None, n_estimators=1800; total time=   1.9s
[CV] END ...................

[CV] END ...................max_depth=None, n_estimators=400; total time=   0.5s
[CV] END ...................max_depth=None, n_estimators=600; total time=   0.7s
[CV] END ..................max_depth=None, n_estimators=1000; total time=   1.1s
[CV] END ..................max_depth=None, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.5s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.6s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.9s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.9s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.9s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.2s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.5s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   2.0s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.3s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.0s
[CV] END ...................

[CV] END .....................max_depth=85, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.2s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=110, n_estimators=600; total time=   0.7s
[CV] END ...................max_depth=110, n_estimators=1000; total time=   1.1s
[CV] END ...................max_depth=110, n_estimators=1600; total time=   1.8s
[CV] END ...................max_depth=110, n_estimators=2000; total time=   2.1s
[CV] END ..................max_depth=None, n_estimators=1200; total time=   1.3s
[CV] END ..................max_depth=None, n_estimators=1800; total time=   1.9s
[CV] END ...................

[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.5s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.9s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.3s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.0s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=110, n_estimators=800; total time=   0.9s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.3s
[CV] END ...................max_depth=110, n_estimators=1800; total time=   2.0s
[CV] END ...................

In [306]:
with open('dump_rq2_ml_streak_chron_model', 'wb') as dump_file:
    pickle.dump(best_precision_estimator, dump_file)
    pickle.dump(best_recall_estimator, dump_file)
    pickle.dump(best_accuracy_estimator, dump_file)
    pickle.dump(best_precision, dump_file)
    pickle.dump(best_precision_sample, dump_file)
    pickle.dump(best_precision_sample_result, dump_file)
    pickle.dump(best_accuracy, dump_file)
    pickle.dump(best_accuracy_sample, dump_file)
    pickle.dump(best_accuracy_sample_result, dump_file)
    pickle.dump(best_recall, dump_file)
    pickle.dump(best_recall_sample, dump_file)
    pickle.dump(best_recall_sample_result, dump_file)

In [395]:
geoserver = pd.read_csv('metrics_data/geoserver_metrics.csv')
geoserver.drop('num_commits', inplace=True, axis=1)
geoserver.drop('reviewer_experience', inplace=True, axis=1)
geoserver.drop('num_of_reviewers', inplace=True, axis=1)


X_test = geoserver [ geoserver['tr_build_id'].isin(geoserver_test_build_ids)]
#print(X_test)
geoserver_test_build_ids = X_test['tr_build_id'].tolist()

X_test.drop('tr_build_id', inplace=True, axis=1)

res_geoserver = pd.read_csv('../data/geoserver.csv', usecols = ['tr_build_id', 'tr_status'])
y_geoserver = res_geoserver [ res_geoserver['tr_build_id'].isin(geoserver_test_build_ids)]['tr_status'].tolist()

y_test = output_values(y_geoserver)
X_test['num_of_passes'] = get_pass_streak(y_test)
print(X_test)

      patch_size  num_of_comments  test_file_changes  files_added  \
1712           0                0                  0            0   
1713          18                0                  0            0   
1714           4                0                  0            0   
1715          10                0                 20            0   
1716           0                0                  0            0   
...          ...              ...                ...          ...   
1860           3                0                 53            1   
1861           0                0                  0            0   
1862           7                0                 34            0   
1863           0                0                  0            0   
1864         898                0                 54            3   

      files_deleted  frequency_file_change  developer_experience  \
1712              0                    233                  1266   
1713              0                

In [384]:
from sklearn.metrics import roc_auc_score
with open('Pickled_data/rq2_model_1_model.pkl', 'rb') as load_file:
    sample_train = pickle.load(load_file)
    sample_train_result = pickle.load(load_file)
    sample_test = pickle.load(load_file)
    sample_test_result = pickle.load(load_file)
    estimator = pickle.load(load_file)
    pred_result = pickle.load(load_file)



sample_pred_result = grid_search.predict_proba(sample_test)

#getting the best threshold
pred_vals = sample_pred_result[:, 1]
fpr, tpr, thresholds = roc_curve(sample_test_result, pred_vals)
gmeans = sqrt(tpr * (1-fpr))
ix = argmax(gmeans)
best_threshold = thresholds[ix]

final_pred_result = []
#threshold setting
for i in range(len(pred_vals)):
    if pred_vals[i] > best_threshold:
        final_pred_result.append(1)
    else:
        final_pred_result.append(0)

roc_auc_score(sample_test_result, final_pred_result)

0.9988938053097345

In [394]:
# estimating the most precise estimator for testing geoserver



estimator = best_precision_estimator
estimator.fit(best_precision_sample, best_precision_sample_result)

pred_results = estimator.predict_proba(X_test)

pred_vals = pred_results[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, pred_vals)
gmeans = sqrt(tpr * (1-fpr))
ix = argmax(gmeans)
best_threshold = thresholds[ix]

final_pred_result = []
#threshold setting
for i in range(len(pred_vals)):
    if pred_vals[i] > best_threshold:
        final_pred_result.append(1)
    else:
        final_pred_result.append(0)

print(best_threshold)
print(roc_auc_score(y_test, final_pred_result))
print(precision_score(y_test, final_pred_result))
print(accuracy_score(y_test, final_pred_result))
print(f1_score(y_test, final_pred_result))

0.84
0.9939024390243902
1.0
0.9934640522875817
0.9938650306748467


In [389]:
print(final_pred_result)

[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [390]:
print(y_test)

[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


# Predictor for Gradle

In [396]:
X_train = gradle
y_train = y_gradle

gradle = pd.read_csv('metrics_data/gradle_metrics copy 2.csv')
X_test = np.array(gradle [ gradle['tr_build_id'].isin(gradle_test_build_ids)])
gradle_train_build_ids = gradle['tr_build_id'].tolist()
res_gradle = pd.read_csv('../data/gradle.csv', usecols = ['tr_build_id', 'tr_status'])
y_gradle = res_gradle [ res_gradle['tr_build_id'].isin(gradle_test_build_ids)]['tr_status'].tolist()
y_test = np.array(output_values(y_gradle))

In [399]:
sample_size = int(len(gradle)*0.7)

best_precision = 0
best_recall = 0
best_accuracy = 0
best_threshold = 0

best_precision_sample = np.array([0])
best_precision_sample_result = np.array([0])
best_recall_sample = np.array([0])
best_recall_sample_result = np.array([0])
best_accuracy_sample = np.array([0])
best_accuracy_sample_result = np.array([0])

best_precision_estimator = 0
best_recall_estimator = 0
best_accuracy_estimator = 0

count = 0

#bootstrapping 100 times
for i in range(100):
    
    file_name = 'Gradle_Pickle/rq2_gradle_model_' + str(i+1) + '_model.pkl'
    pickle_dump_objects = []
    
    #getting required sample from training data
    sample_train = resample(X_train, replace=True, n_samples=sample_size)
    #sample_train_result = np.array(sample_train['tr_status'])
    sample_train_result = sample_train['tr_status']
    
    #getting the build_ids that are used in training 
    build_ids = sample_train['tr_build_id'].tolist()
    
    #extracting remaining build id's to use for testing
    sample_test = X_train [~X_train['tr_build_id'].isin(build_ids)]
    #sample_test_result = np.array(sample_test['tr_status'])
    sample_test_result = sample_test['tr_status']

    
    print(len(sample_train))
    print(len(sample_test))
    
    #dropping result column and build ids column
    sample_train.drop('tr_status', inplace=True, axis=1)
    sample_train.drop('tr_build_id', inplace=True, axis=1)
    sample_test.drop('tr_status', inplace=True, axis=1)
    sample_test.drop('tr_build_id', inplace=True, axis=1)
    
    
    pickle_dump_objects.append(sample_train)
    pickle_dump_objects.append(sample_train_result)
    pickle_dump_objects.append(sample_test)
    pickle_dump_objects.append(sample_test_result)
    
    #training
    grid_search.fit(sample_train, sample_train_result)
    
    #predicting with sample_test
    sample_pred_result = grid_search.predict_proba(sample_test)
    
    pickle_dump_objects.append(grid_search.best_estimator_)
    pickle_dump_objects.append(sample_pred_result)
    
    
    #getting the best threshold
    pred_vals = sample_pred_result[:, 1]
    fpr, tpr, thresholds = roc_curve(sample_test_result, pred_vals)
    gmeans = sqrt(tpr * (1-fpr))
    ix = argmax(gmeans)
    best_threshold = thresholds[ix]
    
    final_pred_result = []
    #threshold setting
    for i in range(len(pred_vals)):
        if pred_vals[i] > best_threshold:
            final_pred_result.append(1)
        else:
            final_pred_result.append(0)
    
    if count < 3:
        print(list(sample_test_result))
        print(final_pred_result)
        count += 1
    
    pickle_dump_objects.append(final_pred_result)
    
    with open(file_name, 'wb') as dump_file:
        for obj in pickle_dump_objects:
            pickle.dump(obj, dump_file)
    
    accuracy = accuracy_score(sample_test_result, final_pred_result)
    precision = precision_score(sample_test_result, final_pred_result)
    recall = recall_score(sample_test_result, final_pred_result)
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_accuracy_sample = sample_train
        best_accuracy_sample_result = sample_train_result
        best_accuracy_estimator = grid_search.best_estimator_
        
    if precision > best_precision:
        best_precision = precision
        best_precision_sample = sample_train
        best_precision_sample_result = sample_train_result
        best_precision_estimator = grid_search.best_estimator_
        best_threshold = thresholds[ix]
        
    if recall > best_recall:
        best_recall = recall
        best_recall_sample = sample_train
        best_recall_sample_result = sample_train_result
        best_recall_estimator = grid_search.best_estimator_
    
    print(precision, recall, accuracy)

798
454
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1.0 0.9976905311778291 0.9977728285077951
798
456
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977272727272727 0.9978070175438597
798
446
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976958525345622 0.9977578475336323
798
438
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976470588235294 0.997716894977169
798
461
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.997737556561086 0.9978308026030369
798
431
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9975903614457832 0.9976798143851509
798
440
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976525821596244 0.9977272727272727
798
461
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977528089887641 0.9978308026030369
798
449
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976798143851509 0.9977728285077951
798
470
Fitting 3 folds for each of 60 candidates, totalling 180

[CV] END .....................max_depth=10, n_estimators=600; total time=   0.5s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.0s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.3s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.4s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   1.8s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   1.7s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.3s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.8s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.2s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.1s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   2.3s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.4s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.5s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   1.7s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   0.9s
[CV] END ...................

1.0 0.9978070175438597 0.9978768577494692
798
448
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.6s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.2s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.3s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   2.2s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.6s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.

[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.0s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.8s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   1.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.6s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.3s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.7s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=200; total time=   0.1s
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.5s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.0s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.5s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.3s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.5s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   1.7s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.3s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.8s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.4s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.4s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   2.1s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.5s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.5s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   0.9s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.3s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.8s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.6s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   1.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.4s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.3s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.7s
[CV] END ...................

1.0 0.9955555555555555 0.9957446808510638
798
470
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977924944812362 0.997872340425532
798
447
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976905311778291 0.9977628635346756
798
434
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976359338061466 0.9976958525345622
798
461
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.997737556561086 0.9978308026030369
798
442
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976635514018691 0.997737556561086
798
460
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977528089887641 0.9978260869565218
798
439
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976359338061466 0.9977220956719818
798
460
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977272727272727 0.9978260869565218
798
450
Fitting 3 folds for each of 60 candidates, totalling 180 

1.0 0.9977426636568849 0.9978118161925602
798
468
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   1.5s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   1.9s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.2s
[CV] END ...................max_depth=110, n_estimators=1800; total time=   1.

[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.3s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.3s
[CV] END ...................

[CV] END ....................max_depth=10, n_estimators=1800; total time=   1.7s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.6s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.6s
[CV] END .....................max_depth=85, n_estimators=800; total time=   1.2s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   3.1s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   2.1s
[CV] END ...................max_depth=110, n_estimators=1000; total time=   1.0s
[CV] END ...................

[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.2s
[CV] END ...................max_depth=110, n_estimators=1800; total time=   1.9s
[CV] END ...................max_depth=None, n_estimators=600; total time=   0.6s
[CV] END ..................max_depth=None, n_estimators=1000; total time=   1.0s
[CV] END ..................max_depth=None, n_estimators=1600; total time=   1.6s
[CV] END ..................max_depth=None, n_estimators=2000; total time=   1.5s
[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.5s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.5s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.2s
[CV] END ...................

1.0 0.9977578475336323 0.9978632478632479
798
461
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977477477477478 0.9978308026030369
798
440
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   1.8s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=110, n_estimators=800; total time=   0.8s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.2s
[CV] END ...................max_depth=110, n_estimators=1800; total time=   1.8s
[CV] END ...................max_depth=None, n_e

[CV] END .....................max_depth=60, n_estimators=600; total time=   0.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.2s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.6s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=110, n_estimators=800; total time=   0.8s
[CV] END ...................max_depth=110, n_estimators=1400; total time=   1.4s
[CV] END ...................max_depth=110, n_estimators=2000; total time=   2.2s
[CV] END ...................max_depth=None, n_estimators=800; total time=   0.9s
[CV] END ..................m

[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   1.6s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.2s
[CV] END ....................max_depth=110, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=110, n_estimators=600; total time=   0.6s
[CV] END ...................max_depth=110, n_estimators=1000; total time=   1.0s
[CV] END ...................max_depth=110, n_estimators=1600; total time=   1.6s
[CV] END ...................max_depth=110, n_estimators=2000; total time=   2.3s
[CV] END ..................m

[CV] END ..................max_depth=None, n_estimators=1200; total time=   1.2s
[CV] END ..................max_depth=None, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.2s
[CV] END ...................

1.0 0.9976635514018691 0.9977272727272727
798
448
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976689976689976 0.9977678571428571
798
459
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.997737556561086 0.9978213507625272
798
461
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977477477477478 0.9978308026030369
798
452
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976635514018691 0.9977876106194691
798
450
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9976958525345622 0.9977777777777778
798
464
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977678571428571 0.9978448275862069
798
464
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977728285077951 0.9978448275862069
798
455
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9977220956719818 0.9978021978021978
798
471
Fitting 3 folds for each of 60 candidates, totalling 18

In [426]:
gradle = pd.read_csv('metrics_data/gradle_metrics copy 2.csv')
gradle.drop('num_commits', inplace=True, axis=1)
gradle.drop('reviewer_experience', inplace=True, axis=1)
gradle.drop('num_of_reviewers', inplace=True, axis=1)


X_test = gradle [ gradle['tr_build_id'].isin(gradle_test_build_ids)]
#print(X_test)
gradle_test_build_ids = X_test['tr_build_id'].tolist()

X_test.drop('tr_build_id', inplace=True, axis=1)

res_gradle = pd.read_csv('../data/gradle.csv', usecols = ['tr_build_id', 'tr_status'])
y_gradle = res_gradle [ res_gradle['tr_build_id'].isin(gradle_test_build_ids)]['tr_status'].tolist()

y_test = output_values(y_gradle)
#X_test['num_of_passes'] = get_pass_streak(y_test)
print(X_test)

      patch_size  num_of_comments  test_file_changes  files_added  \
1008         212                0                  0            0   
1009          37                0                  0            0   
1010           6                0                  0            0   
1011           0                0                  0            0   
1012         315                0                  4            0   
...          ...              ...                ...          ...   
1135         139                0                 13            1   
1136           6                0                  0            0   
1137         168                0                  0            1   
1138         122                0                  0            0   
1139         144                0                  0            0   

      files_deleted  frequency_file_change  developer_experience  
1008              0                     83                     0  
1009              0                  

In [429]:
estimator = best_precision_estimator
estimator.fit(best_precision_sample, best_precision_sample_result)

queue = 1
model_best_threshold = best_threshold
final_pred_result = []
for val in range(len(X_test)):
    row = X_test.iloc[val]
    row['num_of_passes'] = queue
    build = np.array(row)
    build.reshape(-1, 1)
    pred = estimator.predict_proba([build])
    
    if pred[0][1] > best_threshold:
        final_pred_result.append(1)
        queue += 1
    else:
        final_pred_result.append(0)
        queue = 0    

print(y_test)
print(final_pred_result)

print(model_best_threshold)
print(roc_auc_score(y_test, final_pred_result))
print(precision_score(y_test, final_pred_result))
print(accuracy_score(y_test, final_pred_result))
print(f1_score(y_test, final_pred_result))

print('\n\n\n\n')
X_test['num_of_passes'] = get_pass_streak(y_test)

pred_results = estimator.predict_proba(X_test)

pred_vals = pred_results[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, pred_vals)
gmeans = sqrt(tpr * (1-fpr))
ix = argmax(gmeans)
current_best_threshold = thresholds[ix]

final_pred_result = []
#threshold setting
for i in range(len(pred_vals)):
    if pred_vals[i] > current_best_threshold:
        final_pred_result.append(1)
    else:
        final_pred_result.append(0)

print(final_pred_result)
print(current_best_threshold)
print(roc_auc_score(y_test, final_pred_result))
print(precision_score(y_test, final_pred_result))
print(accuracy_score(y_test, final_pred_result))
print(f1_score(y_test, final_pred_result))

[1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0.92
0.5
0.946969696969697
0.946969696969697
0.9727626459143969





[1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [428]:
grid_search.best_estimator_

RandomForestClassifier(max_depth=10, n_estimators=200)

      patch_size  num_of_comments  test_file_changes  files_added  \
1031           0                0                 58            0   
1032           0                0                  4            0   
1033           2                0                  0            0   
1050           0                0                 53            0   
1051         296                0               1301            6   
...          ...              ...                ...          ...   
1201           2                0                 24            0   
1202           0                0                  8            0   
1203           0                0                  0            0   
1204          74                0                246            0   
1205          11                0                 68            0   

      files_deleted  frequency_file_change  num_of_passes  
1031              0                     44              1  
1032              0                      4         

In [457]:
X_train = cloud_controller
X_train.drop('developer_experience', inplace=True, axis=1)
y_train = y_cloud_controller

geoserver = pd.read_csv('metrics_data/cloud_controller_ng_metrics.csv')
X_test = np.array(cloud_controller [ cloud_controller['tr_build_id'].isin(cloud_test_build_ids)])
cloud_train_build_ids = cloud_controller['tr_build_id'].tolist()
res_cloud_controller = pd.read_csv('../data/cloud_controller_ng.csv', usecols = ['tr_build_id', 'tr_status'])
y_cloud_controller = res_cloud_controller [ res_cloud_controller['tr_build_id'].isin(cloud_test_build_ids)]['tr_status'].tolist()
y_test = np.array(output_values(y_cloud_controller))

In [461]:
sample_size = int(len(cloud_controller)*0.7)

best_precision = 0
best_recall = 0
best_accuracy = 0
best_threshold = 0

best_precision_sample = np.array([0])
best_precision_sample_result = np.array([0])
best_recall_sample = np.array([0])
best_recall_sample_result = np.array([0])
best_accuracy_sample = np.array([0])
best_accuracy_sample_result = np.array([0])

best_precision_estimator = 0
best_recall_estimator = 0
best_accuracy_estimator = 0

count = 0

#bootstrapping 100 times
for i in range(100):
    
    file_name = 'Cloud_Pickle/rq2_gradle_model_' + str(i+1) + '_model.pkl'
    pickle_dump_objects = []
    
    #getting required sample from training data
    sample_train = resample(X_train, replace=True, n_samples=sample_size)
    #print(sample_train)
    #sample_train_result = np.array(sample_train['tr_status'])
    sample_train_result = sample_train['tr_status']
    
    #getting the build_ids that are used in training 
    build_ids = sample_train['tr_build_id'].tolist()
    
    #extracting remaining build id's to use for testing
    sample_test = X_train [~X_train['tr_build_id'].isin(build_ids)]
    #sample_test_result = np.array(sample_test['tr_status'])
    sample_test_result = sample_test['tr_status']

    
    print(len(sample_train))
    print(len(sample_test))
    
    #dropping result column and build ids column
    sample_train.drop('tr_status', inplace=True, axis=1)
    sample_train.drop('tr_build_id', inplace=True, axis=1)
    sample_test.drop('tr_status', inplace=True, axis=1)
    sample_test.drop('tr_build_id', inplace=True, axis=1)
    
    
    pickle_dump_objects.append(sample_train)
    pickle_dump_objects.append(sample_train_result)
    pickle_dump_objects.append(sample_test)
    pickle_dump_objects.append(sample_test_result)
    
    #training
    grid_search.fit(sample_train, sample_train_result)
    
    #predicting with sample_test
    sample_pred_result = grid_search.predict_proba(sample_test)
    
    pickle_dump_objects.append(grid_search.best_estimator_)
    pickle_dump_objects.append(sample_pred_result)
    
    
    #getting the best threshold
    pred_vals = sample_pred_result[:, 1]
    fpr, tpr, thresholds = roc_curve(sample_test_result, pred_vals)
    gmeans = sqrt(tpr * (1-fpr))
    ix = argmax(gmeans)
    best_threshold = thresholds[ix]
    
    final_pred_result = []
    #threshold setting
    for i in range(len(pred_vals)):
        if pred_vals[i] > best_threshold:
            final_pred_result.append(1)
        else:
            final_pred_result.append(0)
    
    if count < 3:
        print(list(sample_test_result))
        print(final_pred_result)
        count += 1
    
    pickle_dump_objects.append(final_pred_result)
    
    with open(file_name, 'wb') as dump_file:
        for obj in pickle_dump_objects:
            pickle.dump(obj, dump_file)
    
    accuracy = accuracy_score(sample_test_result, final_pred_result)
    precision = precision_score(sample_test_result, final_pred_result)
    recall = recall_score(sample_test_result, final_pred_result)
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_accuracy_sample = sample_train
        best_accuracy_sample_result = sample_train_result
        best_accuracy_estimator = grid_search.best_estimator_
        
    if precision > best_precision:
        best_precision = precision
        best_precision_sample = sample_train
        best_precision_sample_result = sample_train_result
        best_precision_estimator = grid_search.best_estimator_
        best_threshold = thresholds[ix]
        
    if recall > best_recall:
        best_recall = recall
        best_recall_sample = sample_train
        best_recall_sample_result = sample_train_result
        best_recall_estimator = grid_search.best_estimator_
    
    print(precision, recall, accuracy)

1554
1113
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 

1.0 0.9989235737351991 0.999104744852283
1554
1107
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9989010989010989 0.999096657633243
1554
1090
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988851727982163 0.9990825688073395
1554
1106
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988925802879292 0.9990958408679927
1554
1090
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988713318284425 0.9990825688073395
1554
1088
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988814317673378 0.9990808823529411
1554
1107
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988826815642458 0.999096657633243
1554
1098
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988888888888889 0.9990892531876139
1554
1118
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988974641675854 0.9991055456171736
1554
1128
Fitting 3 folds for each of 60 candidat

[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   2.0s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.9s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.0s
[CV] END ...................

1.0 0.9989010989010989 0.9990958408679927
1554
1098
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=10, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.0s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.6s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.1s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   

[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.3s
[CV] END ....................max_depth=10, n_estimators=1800; total time=   2.0s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.9s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.1s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   2.0s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.3s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.4s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=10, n_estimators=800; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.1s
[CV] END .....................max_depth=35, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.5s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.0s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   1.7s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.2s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.5s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   1.8s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.7s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.3s
[CV] END ...................

[CV] END .....................max_depth=10, n_estimators=400; total time=   0.4s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.4s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.2s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.3s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.8s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=60, n_estimators=800; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.3s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   1.6s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.4s
[CV] END ...................

1.0 0.9989071038251366 0.9990892531876139
1554
1110
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.6s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   0.9s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   1.7s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.4s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.2s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   1.9s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   

1.0 0.9988789237668162 0.9990990990990991
1554
1097
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988700564971752 0.9990884229717412
1554
1126
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9989035087719298 0.9991119005328597
1554
1115
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988801791713325 0.9991031390134529
1554
1097
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988738738738738 0.9990884229717412
1554
1103
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988674971687429 0.99909338168631
1554
1102
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988962472406181 0.9990925589836661
1554
1099
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.998876404494382 0.9990900818926297
1554
1105
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.998898678414097 0.9990950226244344
1554
1092
Fitting 3 folds for each of 60 candidate

1.0 0.9988726042841037 0.9990867579908675
1554
1125
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END ....................max_depth=10, n_estimators=1800; total time=   2.2s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.7s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.5s
[CV] END .....................max_depth=60, n_estimators=800; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.7s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.5s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.3s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   2.5s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   

1.0 0.9989094874591058 0.9991111111111111
1554
1112
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.7s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.4s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   2.6s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.5s
[CV] END ...................max_depth=110, n_estimators=1600; total time=   2.0s
[CV] END ...................max_depth=None, n_estimators=200; total time=   

[CV] END ..................max_depth=None, n_estimators=1000; total time=   1.3s
[CV] END ..................max_depth=None, n_estimators=1600; total time=   2.1s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   1.0s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.9s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.0s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.1s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   2.1s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.6s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.5s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   2.3s
[CV] END ...................

[CV] END .....................max_depth=35, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=35, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.1s
[CV] END .....................max_depth=35, n_estimators=800; total time=   1.0s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.5s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.5s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   2.0s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.5s
[CV] END ...................

1.0 0.9989023051591658 0.9991007194244604
1554
1101
Fitting 3 folds for each of 60 candidates, totalling 180 fits
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.3s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.5s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.6s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.3s
[CV] END ....................max_depth=110, n_estimators=400; total time=   

[CV] END ...................max_depth=None, n_estimators=800; total time=   1.0s
[CV] END ..................max_depth=None, n_estimators=1200; total time=   1.5s
[CV] END ..................max_depth=None, n_estimators=1800; total time=   2.2s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END .....................max_depth=10, n_estimators=800; total time=   1.0s
[CV] END ....................max_depth=10, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=10, n_estimators=1600; total time=   2.0s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.4s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.3s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   2.0s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.5s
[CV] END ...................

[CV] END ....................max_depth=10, n_estimators=1800; total time=   2.3s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.5s
[CV] END ....................max_depth=35, n_estimators=1400; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.5s
[CV] END .....................max_depth=60, n_estimators=800; total time=   1.0s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.5s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.2s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   2.0s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.3s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.3s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.3s
[CV] END ...................

1.0 0.9988674971687429 0.9990917347865577
1554
1098
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988839285714286 0.9990892531876139
1554
1104
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988814317673378 0.9990942028985508
1554
1117
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988925802879292 0.999104744852283
1554
1089
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988700564971752 0.9990817263544536
1554
1108
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988776655443322 0.9990974729241877
1554
1105
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988925802879292 0.9990950226244344
1554
1095
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988950276243094 0.9990867579908675
1554
1112
Fitting 3 folds for each of 60 candidates, totalling 180 fits
1.0 0.9988814317673378 0.9991007194244604
1554
1109
Fitting 3 folds for each of 60 candid

[CV] END ....................max_depth=10, n_estimators=1000; total time=   1.3s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.6s
[CV] END ....................max_depth=10, n_estimators=2000; total time=   2.7s
[CV] END ....................max_depth=35, n_estimators=1600; total time=   2.2s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=60, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=60, n_estimators=400; total time=   0.6s
[CV] END .....................max_depth=60, n_estimators=800; total time=   1.1s
[CV] END ....................max_depth=60, n_estimators=1400; total time=   2.0s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   2.5s
[CV] END .....................max_depth=85, n_estimators=800; total time=   1.1s
[CV] END ...................

[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.6s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.4s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.8s
[CV] END ....................max_depth=85, n_estimators=2000; total time=   2.8s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.7s
[CV] END ...................max_depth=110, n_estimators=1600; total time=   2.2s
[CV] END ...................max_depth=None, n_estimators=200; total time=   0.3s
[CV] END ...................max_depth=None, n_estimators=400; total time=   0.5s
[CV] END ...................max_depth=None, n_estimators=600; total time=   0.8s
[CV] END ..................max_depth=None, n_estimators=1000; total time=   1.3s
[CV] END ..................max_depth=None, n_estimators=1600; total time=   2.1s
[CV] END .....................max_depth=10, n_estimators=200; total time=   0.2s
[CV] END ...................

[CV] END ....................max_depth=60, n_estimators=2000; total time=   2.6s
[CV] END ....................max_depth=85, n_estimators=1400; total time=   1.9s
[CV] END ....................max_depth=85, n_estimators=1800; total time=   2.4s
[CV] END ....................max_depth=110, n_estimators=800; total time=   1.1s
[CV] END ...................max_depth=110, n_estimators=1200; total time=   1.6s
[CV] END ...................max_depth=110, n_estimators=1800; total time=   2.3s
[CV] END ...................max_depth=None, n_estimators=600; total time=   0.8s
[CV] END ...................max_depth=None, n_estimators=800; total time=   1.0s
[CV] END ..................max_depth=None, n_estimators=1400; total time=   1.9s
[CV] END ..................max_depth=None, n_estimators=2000; total time=   2.3s
[CV] END .....................max_depth=10, n_estimators=600; total time=   0.7s
[CV] END ....................max_depth=10, n_estimators=1000; total time=   1.3s
[CV] END ...................

[CV] END .....................max_depth=35, n_estimators=600; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1000; total time=   1.3s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.6s
[CV] END ....................max_depth=35, n_estimators=1800; total time=   2.3s
[CV] END .....................max_depth=60, n_estimators=600; total time=   0.8s
[CV] END ....................max_depth=60, n_estimators=1200; total time=   1.6s
[CV] END ....................max_depth=60, n_estimators=1800; total time=   2.5s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=85, n_estimators=800; total time=   1.1s
[CV] END ....................max_depth=85, n_estimators=1200; total time=   1.6s
[CV] END ....................max_depth=85, n_estimators=1600; total time=   2.1s
[CV] END ....................max_depth=110, n_estimators=200; total time=   0.3s
[CV] END ...................

[CV] END .....................max_depth=35, n_estimators=600; total time=   0.8s
[CV] END .....................max_depth=35, n_estimators=600; total time=   0.8s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=1200; total time=   1.7s
[CV] END ....................max_depth=35, n_estimators=2000; total time=   2.7s
[CV] END ....................max_depth=60, n_estimators=1000; total time=   1.3s
[CV] END ....................max_depth=60, n_estimators=1600; total time=   2.1s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=85, n_estimators=200; total time=   0.3s
[CV] END .....................max_depth=85, n_estimators=400; total time=   0.5s
[CV] END .....................max_depth=85, n_estimators=600; total time=   0.8s
[CV] END ....................max_depth=85, n_estimators=1000; total time=   1.5s
[CV] END ...................

In [465]:


cloud_controller = pd.read_csv('metrics_data/cloud_controller_ng_metrics.csv')
cloud_controller.drop('num_commits', inplace=True, axis=1)
cloud_controller.drop('reviewer_experience', inplace=True, axis=1)
cloud_controller.drop('num_of_reviewers', inplace=True, axis=1)
cloud_controller.drop('developer_experience', inplace=True, axis=1)


X_test = cloud_controller [ cloud_controller['tr_build_id'].isin(cloud_test_build_ids)]
#print(X_test)
cloud_test_build_ids = X_test['tr_build_id'].tolist()

X_test.drop('tr_build_id', inplace=True, axis=1)

res_cloud_controller = pd.read_csv('../data/cloud_controller_ng.csv', usecols = ['tr_build_id', 'tr_status'])
y_cloud_controller = res_cloud_controller [ res_cloud_controller['tr_build_id'].isin(cloud_test_build_ids)]['tr_status'].tolist()

y_test = output_values(y_cloud_controller)
X_test['num_of_passes'] = get_pass_streak(y_test)
print(X_train.iloc[1030])

tr_build_id              20317358
patch_size                      2
num_of_comments                 0
test_file_changes               0
files_added                     0
files_deleted                   0
frequency_file_change          42
tr_status                       1
num_of_passes                   4
Name: 1030, dtype: int64


In [466]:
estimator = best_precision_estimator
estimator.fit(best_precision_sample, best_precision_sample_result)

queue = 4
model_best_threshold = best_threshold
final_pred_result = []
for val in range(len(X_test)):
    row = X_test.iloc[val]
    row['num_of_passes'] = queue
    build = np.array(row)
    build.reshape(-1, 1)
    pred = estimator.predict_proba([build])
    
    if pred[0][1] > best_threshold:
        final_pred_result.append(1)
        queue += 1
    else:
        final_pred_result.append(0)
        queue = 0    

print(y_test)
print(final_pred_result)

print(model_best_threshold)
print(roc_auc_score(y_test, final_pred_result))
print(precision_score(y_test, final_pred_result))
print(accuracy_score(y_test, final_pred_result))
print(f1_score(y_test, final_pred_result))

print('\n\n\n\n')
X_test['num_of_passes'] = get_pass_streak(y_test)

pred_results = estimator.predict_proba(X_test)

pred_vals = pred_results[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, pred_vals)
gmeans = sqrt(tpr * (1-fpr))
ix = argmax(gmeans)
current_best_threshold = thresholds[ix]

final_pred_result = []
#threshold setting
for i in range(len(pred_vals)):
    if pred_vals[i] > current_best_threshold:
        final_pred_result.append(1)
    else:
        final_pred_result.append(0)

print(final_pred_result)
print(current_best_threshold)
print(roc_auc_score(y_test, final_pred_result))
print(precision_score(y_test, final_pred_result))
print(accuracy_score(y_test, final_pred_result))
print(f1_score(y_test, final_pred_result))

[1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
0.863288740245262
0.5
0.8037974683544303
0.8037974

In [475]:
def total_hybrid_metrics(y_res):
    total = len(y_res)
    builds = 0
    max_batch_size = 4
    
    cur_batch = []
    for i in range(len(y_res)):
        if y_res[i] == 0:
            if len(cur_batch) < max_batch_size:
                #print('appended')
                cur_batch.append(y_res[i])
            
            if len(cur_batch) == max_batch_size:
                builds += 1
                if 0 in cur_batch:
                    builds += 4
            
                cur_batch.clear()
            
    if len(cur_batch) != 0:
        builds += 1
        if 0 in cur_batch:
            builds += len(cur_batch)
    print(builds)
    print(total)
    print(((total-builds)/total)*100)

In [476]:
total_hybrid_metrics([1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

10
132
92.42424242424242


In [477]:
#geoserver
total_hybrid_metrics([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

90
153
41.17647058823529


In [479]:
#cloud
total_hybrid_metrics([1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1])

40
158
74.68354430379746
