## Bovine Tuberculosis

<!--
import data_analytics.github as github
print(github.create_jupyter_notebook_header("markcrowe-com", "agriculture-data-analytics", "notebooks/notebook-3-02-ml-bovine-tuberculosis.ipynb", "master"))
-->
<table style="margin: auto;"><tr><td><a href="https://mybinder.org/v2/gh/markcrowe-com/agriculture-data-analytics/master?filepath=notebooks/notebook-3-02-ml-bovine-tuberculosis.ipynb" target="_parent"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder"/></a></td><td>online editors</td><td><a href="https://colab.research.google.com/github/markcrowe-com/agriculture-data-analytics/blob/master/notebooks/notebook-3-02-ml-bovine-tuberculosis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr></table>

### Objective

The objective is to build a machine learning (ML) model of the [bovine-tuberculosis-eda-output.csv](./../artifacts/bovine-tuberculosis-eda-output.csv).

### Setup

Import required third party Python libraries, import supporting functions and sets up data source file paths.

In [1]:
# Local
#!pip install -r script/requirements.txt --quiet --user
# Remote option
#!pip install -r https://github.com/markcrowe-com/agriculture-data-analytics/blob/master/notebooks/script/requirements.txt --quiet --user

In [2]:
from agriculture_data_analytics.project_manager import *
from agriculture_data_analytics.dataframe_labels import *
from pandas import DataFrame
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import data_analytics.github as github
import numpy
import os
import pandas

In [3]:
artifact_manager: ProjectArtifactManager = ProjectArtifactManager()
artifact_manager.is_remote = True
github.display_jupyter_notebook_data_sources(
    [artifact_manager.get_county_bovine_tuberculosis_eda_filepath()])
artifact_manager.is_remote = False

https://github.com/markcrowe-com/agriculture-data-analytics/artifacts/county-bovine-tuberculosis-eda-output.csv?raw=true


### Load dataframe

In [4]:
filepath: str = artifact_manager.get_county_bovine_tuberculosis_eda_filepath()
county_bovine_tuberculosis_dataframe: DataFrame = pandas.read_csv(filepath)

In [5]:
print("Row, Column Count:", county_bovine_tuberculosis_dataframe.shape[0])

Row, Column Count: 319


### Check the types for machine learning

In [6]:
county_bovine_tuberculosis_dataframe.dtypes

Year                                   int64
Veterinary Office                     object
Animal Count                         float64
Herd Incidence Rate                  float64
Restricted Herds at end of Year      float64
Restricted Herds at start of Year    float64
Herds Tested                         float64
Herds Count                          float64
Reactors per 1000 Tests A.P.T.       float64
Reactors to date                     float64
Tests on Animals                     float64
dtype: object

Veterinary Office is an object, specifically a string. We must encode it as a number for machine learning.

In [7]:
dummy_values_dataframe = county_bovine_tuberculosis_dataframe[[
    "Veterinary Office"
]]

county_bovine_tuberculosis_dataframe.drop('Veterinary Office',
                                          axis=1,
                                          inplace=True)

dummy_values_dataframe = pandas.get_dummies(dummy_values_dataframe,
                                            columns=["Veterinary Office"],
                                            prefix=["Veterinary Office "])

In [8]:
county_bovine_tuberculosis_dataframe = county_bovine_tuberculosis_dataframe.join(dummy_values_dataframe)

### Set Year as Index

In [9]:
county_bovine_tuberculosis_dataframe.set_index(YEAR, drop=True, inplace=True)
county_bovine_tuberculosis_dataframe.head()

Unnamed: 0_level_0,Animal Count,Herd Incidence Rate,Restricted Herds at end of Year,Restricted Herds at start of Year,Herds Tested,Herds Count,Reactors per 1000 Tests A.P.T.,Reactors to date,Tests on Animals,Veterinary Office _Carlow,...,Veterinary Office _Offaly,Veterinary Office _Roscommon,Veterinary Office _Sligo,Veterinary Office _Tipperary North,Veterinary Office _Tipperary South,Veterinary Office _Waterford,Veterinary Office _Westmeath,Veterinary Office _Wexford,Veterinary Office _Wicklow E,Veterinary Office _Wicklow W
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010,86258.0,4.02,28.0,52.0,1295.0,1353.0,1.14,124.0,108584.0,1,...,0,0,0,0,0,0,0,0,0,0
2010,202119.0,5.32,124.0,257.0,4832.0,4915.0,3.13,981.0,313822.0,0,...,0,0,0,0,0,0,0,0,0,0
2010,237260.0,5.71,175.0,350.0,6134.0,6282.0,5.05,1947.0,385705.0,0,...,0,0,0,0,0,0,0,0,0,0
2010,462707.0,4.43,119.0,259.0,5849.0,5986.0,1.62,1078.0,664648.0,0,...,0,0,0,0,0,0,0,0,0,0
2010,417478.0,6.3,216.0,385.0,6107.0,6310.0,2.72,1592.0,586105.0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
county_bovine_tuberculosis_dataframe.dtypes

Animal Count                          float64
Herd Incidence Rate                   float64
Restricted Herds at end of Year       float64
Restricted Herds at start of Year     float64
Herds Tested                          float64
Herds Count                           float64
Reactors per 1000 Tests A.P.T.        float64
Reactors to date                      float64
Tests on Animals                      float64
Veterinary Office _Carlow               uint8
Veterinary Office _Cavan                uint8
Veterinary Office _Clare                uint8
Veterinary Office _Cork North           uint8
Veterinary Office _Cork South           uint8
Veterinary Office _Donegal              uint8
Veterinary Office _Dublin               uint8
Veterinary Office _Galway               uint8
Veterinary Office _Kerry                uint8
Veterinary Office _Kildare              uint8
Veterinary Office _Kilkenny             uint8
Veterinary Office _Laois                uint8
Veterinary Office _Leitrim        

In [11]:
county_bovine_tuberculosis_dataframe.isnull().sum()

Animal Count                          0
Herd Incidence Rate                   0
Restricted Herds at end of Year       0
Restricted Herds at start of Year     0
Herds Tested                          0
Herds Count                           0
Reactors per 1000 Tests A.P.T.        0
Reactors to date                      0
Tests on Animals                      0
Veterinary Office _Carlow             0
Veterinary Office _Cavan              0
Veterinary Office _Clare              0
Veterinary Office _Cork North         0
Veterinary Office _Cork South         0
Veterinary Office _Donegal            0
Veterinary Office _Dublin             0
Veterinary Office _Galway             0
Veterinary Office _Kerry              0
Veterinary Office _Kildare            0
Veterinary Office _Kilkenny           0
Veterinary Office _Laois              0
Veterinary Office _Leitrim            0
Veterinary Office _Limerick           0
Veterinary Office _Longford           0
Veterinary Office _Louth              0


### Define 20% Training set 80% Test set

In [12]:
# define target & feature variables

X = county_bovine_tuberculosis_dataframe.iloc[:, 2:].values
Y = county_bovine_tuberculosis_dataframe.iloc[:, 1].values.reshape(-1)
print(numpy.shape(X))
print(numpy.shape(Y))

# split train test split 20
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=2021)

(319, 36)
(319,)


### Model 1 RandomForest Regressor

In [13]:
random_forest_regressor = RandomForestRegressor(random_state=2021)

In [14]:
random_forest_params = {
    'n_estimators': [100, 200, 500],
    'max_features': ['auto', 'sqrt']
}

In [15]:
grid_search_cv = GridSearchCV(estimator=random_forest_regressor,
                              param_grid=random_forest_params)

In [16]:
# np.isnan(X_train).sum()
# np.nan_to_num(X_train)
# np.nan_to_num(Y_train)
grid_search_cv.fit(X_train, Y_train)  #do not run becuase of null values

GridSearchCV(estimator=RandomForestRegressor(random_state=2021),
             param_grid={'max_features': ['auto', 'sqrt'],
                         'n_estimators': [100, 200, 500]})

In [17]:
# print best model
print(grid_search_cv.best_estimator_)
print('Best model score', grid_search_cv.best_score_)

RandomForestRegressor(max_features='sqrt', n_estimators=500, random_state=2021)
Best model score 0.8539819096646728


### Model 2 XGBOOST Regressor

In [18]:
xgb_regressor = XGBRegressor(random_state=2021)

In [19]:
# make a search space of parameters to loop over

xgb_regressor_params = {
    'n_estimators': [20, 40, 80, 160, 340, 500],
    'max_depth': [3, 6, 9],
    'gamma': [0.01, 0.1],
    'learning_rate': [0.001, 0.01, 0.1, 1]
}

In [20]:
grid_search_cv = GridSearchCV(
    estimator=xgb_regressor,
    param_grid=xgb_regressor_params,
    #n_jobs=-1,
    scoring=['r2', 'neg_root_mean_squared_error'],
    refit='r2',
    cv=5,
    verbose=4)

In [21]:
grid_search_cv.fit(X_train, Y_train)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-4.259) r2: (test=-4.564) total time=   0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-4.545) r2: (test=-3.497) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-4.679) r2: (test=-2.330) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-4.834) r2: (test=-2.500) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-4.630) r2: (test=-2.896) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=40; neg_root_mean_squared_error: (test=-4.183) r2: (test=-4.367) total time=   0.0s
[

[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-3.507) r2: (test=-0.871) total time=   0.5s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-3.779) r2: (test=-1.138) total time=   0.5s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-3.552) r2: (test=-1.293) total time=   0.6s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-2.729) r2: (test=-1.284) total time=   0.8s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-3.089) r2: (test=-1.078) total time=   0.9s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-3.034) r2: (test=-0.400) total time=   0.9s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=6,

[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.291) r2: (test=0.489) total time=   0.2s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.576) r2: (test=0.459) total time=   0.2s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.371) r2: (test=0.714) total time=   0.2s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.960) r2: (test=0.425) total time=   0.2s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.682) r2: (test=0.486) total time=   0.2s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=340; neg_root_mean_squared_error: (test=-0.806) r2: (test=0.801) total time=   0.5s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators

[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-3.611) r2: (test=-0.952) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-3.381) r2: (test=-1.077) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-2.078) r2: (test=-0.324) total time=   0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-2.497) r2: (test=-0.357) total time=   0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-2.314) r2: (test=0.186) total time=   0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-2.752) r2: (test=-0.134) total time=   0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=

[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1.532) r2: (test=0.648) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1.351) r2: (test=0.668) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-0.631) r2: (test=0.878) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-0.917) r2: (test=0.817) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-0.956) r2: (test=0.861) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-1.213) r2: (test=0.780) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root

[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-0.937) r2: (test=0.866) total time=   1.1s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-1.104) r2: (test=0.818) total time=   1.2s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-1.155) r2: (test=0.758) total time=   1.2s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-0.935) r2: (test=0.732) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1.576) r2: (test=0.459) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1.107) r2: (test=0.814) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; neg_root_mean

[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1.417) r2: (test=0.695) total time=   0.8s
[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1.094) r2: (test=0.821) total time=   0.6s
[CV 5/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1.581) r2: (test=0.546) total time=   0.6s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-1.024) r2: (test=0.678) total time=   1.1s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-1.599) r2: (test=0.444) total time=   0.9s
[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-1.417) r2: (test=0.695) total time=   1.1s
[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=500; neg_root_mean_s

[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-4.058) r2: (test=-2.585) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-4.118) r2: (test=-1.579) total time=   0.2s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-4.322) r2: (test=-1.797) total time=   0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-4.122) r2: (test=-2.088) total time=   0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=340; neg_root_mean_squared_error: (test=-3.198) r2: (test=-2.136) total time=   0.5s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=340; neg_root_mean_squared_error: (test=-3.506) r2: (test=-1.677) total time=   0.5s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_esti

[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-4.026) r2: (test=-3.970) total time=   0.1s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-4.324) r2: (test=-3.070) total time=   0.1s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-4.433) r2: (test=-1.988) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-4.604) r2: (test=-2.175) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-4.398) r2: (test=-2.515) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=160; neg_root_mean_squared_error: (test=-3.740) r2: (test=-3.291) total time=   0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimator

[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-4.190) r2: (test=-1.630) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-3.979) r2: (test=-1.878) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-2.994) r2: (test=-1.750) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-3.333) r2: (test=-1.419) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-3.321) r2: (test=-0.678) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-3.617) r2: (test=-0.959) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=40; ne

[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-1.030) r2: (test=0.839) total time=   1.1s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-1.200) r2: (test=0.784) total time=   1.2s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-1.186) r2: (test=0.744) total time=   1.2s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1.018) r2: (test=0.682) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1.317) r2: (test=0.623) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1.203) r2: (test=0.780) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=3, n_estimators=20; neg_root_

[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-0.858) r2: (test=0.840) total time=   0.6s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-0.974) r2: (test=0.856) total time=   0.7s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1.091) r2: (test=0.822) total time=   0.6s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1.139) r2: (test=0.764) total time=   0.7s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-0.658) r2: (test=0.867) total time=   1.0s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-0.858) r2: (test=0.840) total time=   0.9s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=500; neg_root

[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-0.900) r2: (test=0.752) total time=   0.2s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.555) r2: (test=0.474) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-0.975) r2: (test=0.856) total time=   0.2s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-0.928) r2: (test=0.871) total time=   0.2s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1.097) r2: (test=0.781) total time=   0.2s
[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=340; neg_root_mean_squared_error: (test=-0.900) r2: (test=0.752) total time=   0.5s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=340; neg_root_mean_squared_

[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-0.911) r2: (test=0.746) total time=   0.1s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1.507) r2: (test=0.506) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1.572) r2: (test=0.624) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1.195) r2: (test=0.786) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1.239) r2: (test=0.721) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=160; neg_root_mean_squared_error: (test=-0.911) r2: (test=0.746) total time=   0.4s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=160; neg_root_mean_squared_error

GridSearchCV(cv=5,
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,
                                    n...
                                    num_parallel_tree=None, predictor=None,
                                    random_state=2021, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                 

In [22]:
# print best model
print(grid_search_cv.best_estimator_)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0.01, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=340, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=2021,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)


In [23]:
# print best parameters
print('Best model Parameters', grid_search_cv.best_params_)
# best score
print('Best model R2 score', grid_search_cv.best_score_)

# write the Grid Search results to csv to choose best model with least resource consumption
bovine_tuberculosis_xgb_grid_search_dataframe = DataFrame(
    grid_search_cv.cv_results_)
bovine_tuberculosis_xgb_grid_search_dataframe = bovine_tuberculosis_xgb_grid_search_dataframe.sort_values(
    'rank_test_r2')

Best model Parameters {'gamma': 0.01, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 340}
Best model R2 score 0.8492299232624821


In [24]:
bovine_tuberculosis_xgb_grid_search_dataframe.to_csv(
    './../artifacts/grid-search-xgb-county-bovine-tuberculosis-results.csv')