## Read full dataset of Agriculture Production since 1991 to 2020

<!--
import data_analytics.github as github
print(github.create_jupyter_notebook_header("markcrowe-com", "agriculture-data-analytics", "notebooks/notebook-3-03-ml-milk-production.ipynb", "master"))
-->
<table style="margin: auto;"><tr><td><a href="https://mybinder.org/v2/gh/markcrowe-com/agriculture-data-analytics/master?filepath=notebooks/notebook-3-03-ml-milk-production.ipynb" target="_parent"><img src="https://mybinder.org/badge_logo.svg" alt="Open In Binder"/></a></td><td>online editors</td><td><a href="https://colab.research.google.com/github/markcrowe-com/agriculture-data-analytics/blob/master/notebooks/notebook-3-03-ml-milk-production.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr></table>

### Setup

Import required third party Python libraries, import supporting functions and sets up data source file paths.

In [1]:
# Local
#!pip install -r script/requirements.txt --quiet --user
# Remote option
#!pip install -r https://github.com/markcrowe-com/agriculture-data-analytics/blob/master/notebooks/script/requirements.txt --quiet --user

In [2]:
import pandas as pd
import numpy as np
import data_analytics.exploratory_data_analysis as eda
import data_analytics.exploratory_data_analysis_reports as eda_reports

### Load dataframe

In [3]:
df = pd.read_csv("./../artifacts/TA_inputoutputvalue_1990_2021_CSO.csv")
print("data dimensions \n",df.shape)
print()
print("data column info \n",df.info)

data dimensions 
 (32, 57)

data column info 
 <bound method DataFrame.info of     Unnamed: 0  Year          UNIT  Agricultural Output at Basic Prices  \
0            0  1990  Euro Million                               5200.0   
1            1  1991  Euro Million                               4994.4   
2            2  1992  Euro Million                               5374.0   
3            3  1993  Euro Million                               5625.9   
4            4  1994  Euro Million                               5781.5   
5            5  1995  Euro Million                               6035.5   
6            6  1996  Euro Million                               6134.7   
7            7  1997  Euro Million                               5753.1   
8            8  1998  Euro Million                               5831.7   
9            9  1999  Euro Million                               5651.4   
10          10  2000  Euro Million                               5985.5   
11          11  2001 

In [4]:
df.head()
eda_reports.print_dataframe_analysis_report(df)

Unnamed: 0.1,Unnamed: 0,Year,UNIT,Agricultural Output at Basic Prices,All Cereals,All Crops,All Livestock,All Livestock Products,All Livestock Products - Milk,All Livestock Products Other Products (excluding Milk),...,Livestock - Horses,Livestock - Pig,Livestock - Poultry,Livestock - Sheep,Net Value Added at Basic Prices,Operating Surplus,Other Subsidies Less Taxes on Production,Subsidies less Taxes on Products,Subsidies on Products,Taxes on Products
0,0,1990,Euro Million,5200.0,216.7,1123.5,2201.9,1360.0,1316.3,43.7,...,83.2,237.2,117.1,188.8,2306.3,1943.5,14.8,333.9,408.9,75.0
1,1,1991,Euro Million,4994.4,218.6,1127.3,2114.0,1301.9,1258.9,43.0,...,70.2,242.1,125.5,198.9,2126.2,1774.0,10.3,279.1,357.3,78.2
2,2,1992,Euro Million,5374.0,235.4,1154.4,2262.5,1410.7,1373.1,37.6,...,61.4,280.4,124.5,192.8,2484.9,2179.4,31.4,366.6,446.0,79.5
3,3,1993,Euro Million,5625.9,165.7,1101.7,2449.6,1476.7,1439.0,37.7,...,84.5,257.9,116.0,232.4,2560.6,2247.9,26.3,398.4,466.4,68.0
4,4,1994,Euro Million,5781.5,127.3,1157.0,2319.3,1487.6,1446.2,41.4,...,75.9,264.5,134.6,230.2,2513.7,2278.5,110.0,612.3,666.0,53.7


Unnamed: 0                                                                 int64
Year                                                                       int64
UNIT                                                                      object
Agricultural Output at Basic Prices                                      float64
All Cereals                                                              float64
All Crops                                                                float64
All Livestock                                                            float64
All Livestock Products                                                   float64
All Livestock Products - Milk                                            float64
All Livestock Products Other Products (excluding Milk)                   float64
Compensation of Employees                                                float64
Contract Work                                                            float64
Crops - Barley              

Unnamed: 0                                                                0
Year                                                                      0
UNIT                                                                      0
Agricultural Output at Basic Prices                                       0
All Cereals                                                               0
All Crops                                                                 0
All Livestock                                                             0
All Livestock Products                                                    0
All Livestock Products - Milk                                             0
All Livestock Products Other Products (excluding Milk)                    0
Compensation of Employees                                                 0
Contract Work                                                             0
Crops - Barley                                                            1
Crops - Fora

## Production of Milk

In [5]:
## Extract milk production dataset
# drop redundunt columns
df = df.drop('Unnamed: 0',axis = 1)

# extract milk dataset
df_milk = df[['Year',
#              'UNIT',
              'All Livestock Products - Milk',
              'Taxes on Products',
              'Subsidies on Products',
              'Compensation of Employees',
              'Contract Work',
              'Entrepreneurial Income',
              'Factor Income',
              'Fixed Capital Consumption - Farm Buildings',
              'Fixed Capital Consumption - Machinery, Equipment, etc',
              'Interest less FISIM',
              'Operating Surplus',
              'Livestock - Cattle',
              'Livestock - Sheep',
              'Land Rental',
              'Intermediate Consumption - Contract Work',
              'Intermediate Consumption - Crop Protection Products',
              'Intermediate Consumption - Energy and Lubricants',
              'Intermediate Consumption - Feeding Stuffs',
              'Intermediate Consumption - Fertilisers',
              'Intermediate Consumption - Financial Intermediation Services Indirect',
              'Intermediate Consumption - Forage Plants',
              'Intermediate Consumption - Maintenance and Repairs',
              'Intermediate Consumption - Seeds',
              'Intermediate Consumption - Services',
              'Intermediate Consumption - Veterinary Expenses',
              'Intermediate Consumption - Other Goods (Detergents, Small Tools, etc)',
              'Intermediate Consumption - Other Goods and Services'
              
             ]]
# Assign year as index
df_milk.set_index('Year',drop=True,inplace=True)

print("Milk production dataset dimenssions \n", df_milk.shape)

Milk production dataset dimenssions 
 (32, 27)


In [6]:
eda_reports.print_dataframe_analysis_report(df_milk)

Unnamed: 0_level_0,All Livestock Products - Milk,Taxes on Products,Subsidies on Products,Compensation of Employees,Contract Work,Entrepreneurial Income,Factor Income,Fixed Capital Consumption - Farm Buildings,"Fixed Capital Consumption - Machinery, Equipment, etc",Interest less FISIM,...,Intermediate Consumption - Feeding Stuffs,Intermediate Consumption - Fertilisers,Intermediate Consumption - Financial Intermediation Services Indirect,Intermediate Consumption - Forage Plants,Intermediate Consumption - Maintenance and Repairs,Intermediate Consumption - Seeds,Intermediate Consumption - Services,Intermediate Consumption - Veterinary Expenses,"Intermediate Consumption - Other Goods (Detergents, Small Tools, etc)",Intermediate Consumption - Other Goods and Services
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1990,1316.3,75.0,408.9,377.6,180.7,1577.2,2321.1,106.4,325.5,267.4,...,642.2,326.0,34.0,553.1,203.9,42.0,109.1,85.2,73.2,182.3
1991,1258.9,78.2,357.3,362.5,172.0,1440.2,2136.5,113.2,331.7,237.2,...,646.5,319.1,39.0,525.6,184.6,43.8,110.2,91.9,75.3,185.5
1992,1373.1,79.5,446.0,336.8,179.8,1842.6,2516.2,117.8,332.4,237.6,...,668.5,305.1,43.0,533.8,175.1,46.4,110.8,93.7,72.2,183.0
1993,1439.0,68.0,466.4,339.0,199.5,1985.6,2586.9,121.9,335.5,162.5,...,733.5,301.2,49.0,572.9,195.7,46.9,113.3,100.2,79.8,193.1
1994,1446.2,53.7,666.0,345.2,205.3,2051.7,2623.7,125.0,346.5,119.3,...,818.6,316.3,56.0,604.6,220.3,50.0,118.5,108.7,81.8,200.3


All Livestock Products - Milk                                            float64
Taxes on Products                                                        float64
Subsidies on Products                                                    float64
Compensation of Employees                                                float64
Contract Work                                                            float64
Entrepreneurial Income                                                   float64
Factor Income                                                            float64
Fixed Capital Consumption - Farm Buildings                               float64
Fixed Capital Consumption - Machinery, Equipment, etc                    float64
Interest less FISIM                                                      float64
Operating Surplus                                                        float64
Livestock - Cattle                                                       float64
Livestock - Sheep           

All Livestock Products - Milk                                             0
Taxes on Products                                                         1
Subsidies on Products                                                     1
Compensation of Employees                                                 0
Contract Work                                                             0
Entrepreneurial Income                                                    0
Factor Income                                                             0
Fixed Capital Consumption - Farm Buildings                                1
Fixed Capital Consumption - Machinery, Equipment, etc                     1
Interest less FISIM                                                       0
Operating Surplus                                                         0
Livestock - Cattle                                                        0
Livestock - Sheep                                                         0
Land Rental 

In [7]:
df_milk["Intermediate Consumption - Services"].unique()

array([109.1, 110.2, 110.8, 113.3, 118.5, 124.9, 140.1, 142.2, 147.6,
       153. , 175.8, 200.2, 210.2, 212.8, 205.7,   nan])

In [8]:
df_milk = df[['Year',
#              'UNIT',
              'All Livestock Products - Milk',
              'Taxes on Products',
              'Subsidies on Products',
              'Compensation of Employees',
              'Contract Work',
              'Entrepreneurial Income',
              'Factor Income',
              'Fixed Capital Consumption - Farm Buildings',
              'Fixed Capital Consumption - Machinery, Equipment, etc',
              'Interest less FISIM',
              'Operating Surplus',
              'Livestock - Cattle',
              'Livestock - Sheep',
              'Land Rental',
              'Intermediate Consumption - Contract Work',
              'Intermediate Consumption - Crop Protection Products',
              'Intermediate Consumption - Energy and Lubricants',
              'Intermediate Consumption - Feeding Stuffs',
              'Intermediate Consumption - Fertilisers',
              'Intermediate Consumption - Financial Intermediation Services Indirect',
              'Intermediate Consumption - Forage Plants',
              'Intermediate Consumption - Maintenance and Repairs',
              'Intermediate Consumption - Seeds',
              'Intermediate Consumption - Services',
              'Intermediate Consumption - Veterinary Expenses',
              'Intermediate Consumption - Other Goods (Detergents, Small Tools, etc)',
              'Intermediate Consumption - Other Goods and Services'
              
             ]]

### Define 20% Training set 80% Test set

In [9]:
# define target & feature variables

X = df_milk.iloc[:,2:].values
Y = df_milk.iloc[:,1].values.reshape(-1,1)
print(np.shape(X))
print(np.shape(Y))

# split train test split 20
from sklearn.model_selection import train_test_split
X_train, X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=2021)

(32, 26)
(32, 1)


### Model 1 RandomForest Regressor

In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor


In [11]:
rf_model_milk = RandomForestRegressor(random_state=2021)

In [12]:
params_rf_milk = {'n_estimators':[100,200,500]
            ,'max_features':['auto','sqrt']
            }

In [13]:
GS_rf_milk = GridSearchCV(estimator= rf_model_milk,
                     param_grid= params_rf_milk
                     )

In [14]:
# np.isnan(X_train).sum()
# np.nan_to_num(X_train)
# np.nan_to_num(Y_train)
GS_rf_milk.fit(X_train,Y_train) #do not run becuase of null values

30 fits failed out of a total of 30.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 681, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 327, in fit
    X, y = self._validate_data(
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py", line 576, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 956, in check_X_y
    X = check_array(
  File "C:\ProgramDat

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [15]:
# print best model
print(GS_rf_milk.best_estimator_)
print('Best model score', GS_rf_milk.best_score_)

RandomForestRegressor(random_state=2021)
Best model score nan


### Model 2 XGBOOST Regressor

In [16]:
# xgboost
#!pip install xgboost

In [17]:
from xgboost import XGBRegressor
xgb_model_milk = XGBRegressor(random_state=2021)

In [18]:
# make a search space of parameters to loop over

params_xgb_milk = {'n_estimators':[20,40,80,160,340,500],
             'max_depth':[3,6,9],
             'gamma':[0.01,0.1],
             'learning_rate':[0.001,0.01,0.1,1]
             }

In [19]:
GS_xgb_milk = GridSearchCV(estimator=xgb_model_milk,
                     param_grid=params_xgb_milk,
                     #n_jobs=-1,
                     scoring=['r2','neg_root_mean_squared_error'],
                     refit= 'r2',
                     cv=5,
                     verbose=4
                     )

In [20]:
GS_xgb_milk.fit(X_train,Y_train);

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1719.914) r2: (test=-14.187) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1566.839) r2: (test=-33.486) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-2071.229) r2: (test=-6.465) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-2077.487) r2: (test=-13.834) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-1585.432) r2: (test=-49.993) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=3, n_estimators=40; neg_root_mean_squared_error: (test=-1689.258) r2: (test=-13.65

[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1164.893) r2: (test=-18.062) total time=   0.4s
[CV 3/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1657.422) r2: (test=-3.780) total time=   0.4s
[CV 4/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1620.849) r2: (test=-8.030) total time=   0.4s
[CV 5/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-1162.349) r2: (test=-26.409) total time=   0.4s
[CV 1/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-1132.534) r2: (test=-5.585) total time=   0.6s
[CV 2/5] END gamma=0.01, learning_rate=0.001, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-1007.256) r2: (test=-13.252) total time=   0.6s
[CV 3/5] END gamma=0.01, learning_rat

[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=80; neg_root_mean_squared_error: (test=-747.977) r2: (test=-10.350) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-414.072) r2: (test=0.120) total time=   0.1s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-381.344) r2: (test=-1.043) total time=   0.1s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-813.342) r2: (test=-0.151) total time=   0.1s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-561.985) r2: (test=-0.086) total time=   0.1s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-357.010) r2: (test=-1.586) total time=   0.1s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_dept

[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-1590.027) r2: (test=-3.399) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-1544.109) r2: (test=-7.195) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-1094.871) r2: (test=-23.319) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-854.619) r2: (test=-2.750) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-765.723) r2: (test=-7.236) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.01, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1233.907) r2: (test=-1.649) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.01, max_dept

[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-366.616) r2: (test=0.538) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-250.556) r2: (test=-0.274) total time=   0.0s
[CV 1/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-144.539) r2: (test=0.893) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-139.069) r2: (test=0.728) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-418.410) r2: (test=0.695) total time=   0.0s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators=40; neg_root_mean_squared_error: (test=-178.677) r2: (test=0.890) total time=   0.0s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=6, n_estimators

[CV 2/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-136.663) r2: (test=0.738) total time=   0.7s
[CV 3/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-309.625) r2: (test=0.833) total time=   0.7s
[CV 4/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-141.555) r2: (test=0.931) total time=   0.7s
[CV 5/5] END gamma=0.01, learning_rate=0.1, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-93.466) r2: (test=0.823) total time=   0.7s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-59.514) r2: (test=0.982) total time=   0.0s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; neg_root_mean_squared_error: (test=-188.421) r2: (test=0.501) total time=   0.0s
[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=3, n_estimators=20; 

[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-62.138) r2: (test=0.980) total time=   0.5s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-240.123) r2: (test=0.190) total time=   0.5s
[CV 3/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-383.943) r2: (test=0.743) total time=   0.5s
[CV 4/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-378.726) r2: (test=0.507) total time=   0.5s
[CV 5/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-196.671) r2: (test=0.215) total time=   0.5s
[CV 1/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=500; neg_root_mean_squared_error: (test=-62.138) r2: (test=0.980) total time=   0.7s
[CV 2/5] END gamma=0.01, learning_rate=1, max_depth=6, n_estimators=500; neg_r

[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=80; neg_root_mean_squared_error: (test=-1496.025) r2: (test=-44.404) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1524.862) r2: (test=-10.938) total time=   0.2s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1371.015) r2: (test=-25.405) total time=   0.3s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1875.857) r2: (test=-5.123) total time=   0.1s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1873.511) r2: (test=-11.064) total time=   0.1s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-1384.416) r2: (test=-37.882) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=0.00

[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-2041.490) r2: (test=-6.252) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-2046.958) r2: (test=-13.401) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-1555.068) r2: (test=-48.059) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1632.439) r2: (test=-12.682) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1479.245) r2: (test=-29.738) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=0.001, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-1983.885) r2: (test=-5.849) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=0.001, max

[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=3, n_estimators=500; neg_root_mean_squared_error: (test=-99.698) r2: (test=0.798) total time=   0.6s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1472.910) r2: (test=-10.138) total time=   0.0s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1320.170) r2: (test=-23.483) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1824.311) r2: (test=-4.791) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1813.650) r2: (test=-10.306) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=6, n_estimators=20; neg_root_mean_squared_error: (test=-1330.828) r2: (test=-34.930) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=6,

[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=340; neg_root_mean_squared_error: (test=-470.049) r2: (test=0.616) total time=   0.4s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=340; neg_root_mean_squared_error: (test=-197.637) r2: (test=0.866) total time=   0.4s
[CV 5/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=340; neg_root_mean_squared_error: (test=-111.484) r2: (test=0.748) total time=   0.4s
[CV 1/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-130.016) r2: (test=0.913) total time=   0.6s
[CV 2/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-137.169) r2: (test=0.736) total time=   0.7s
[CV 3/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estimators=500; neg_root_mean_squared_error: (test=-382.781) r2: (test=0.745) total time=   0.6s
[CV 4/5] END gamma=0.1, learning_rate=0.01, max_depth=9, n_estim

[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=160; neg_root_mean_squared_error: (test=-136.589) r2: (test=0.738) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=160; neg_root_mean_squared_error: (test=-309.805) r2: (test=0.833) total time=   0.2s
[CV 4/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=160; neg_root_mean_squared_error: (test=-141.579) r2: (test=0.931) total time=   0.2s
[CV 5/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=160; neg_root_mean_squared_error: (test=-94.244) r2: (test=0.820) total time=   0.2s
[CV 1/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-127.565) r2: (test=0.916) total time=   0.5s
[CV 2/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=340; neg_root_mean_squared_error: (test=-136.583) r2: (test=0.738) total time=   0.5s
[CV 3/5] END gamma=0.1, learning_rate=0.1, max_depth=6, n_estimators=34

[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=80; neg_root_mean_squared_error: (test=-188.432) r2: (test=0.501) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=80; neg_root_mean_squared_error: (test=-420.475) r2: (test=0.692) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=80; neg_root_mean_squared_error: (test=-373.435) r2: (test=0.521) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=80; neg_root_mean_squared_error: (test=-160.335) r2: (test=0.478) total time=   0.1s
[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-59.475) r2: (test=0.982) total time=   0.2s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_squared_error: (test=-188.432) r2: (test=0.501) total time=   0.2s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=3, n_estimators=160; neg_root_mean_s

[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-238.403) r2: (test=0.202) total time=   0.0s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-387.297) r2: (test=0.739) total time=   0.0s
[CV 4/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-379.309) r2: (test=0.505) total time=   0.0s
[CV 5/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=40; neg_root_mean_squared_error: (test=-205.499) r2: (test=0.143) total time=   0.0s
[CV 1/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-61.971) r2: (test=0.980) total time=   0.1s
[CV 2/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squared_error: (test=-238.403) r2: (test=0.202) total time=   0.1s
[CV 3/5] END gamma=0.1, learning_rate=1, max_depth=9, n_estimators=80; neg_root_mean_squa

In [21]:
# print best model
print(GS_xgb_milk.best_estimator_)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0.01, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1, max_delta_step=0,
             max_depth=9, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=340, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=2021,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)


In [24]:
# print best parameters
print('Best model Parameters',GS_xgb_milk.best_params_)
# best score
print('Best model R2 score',GS_xgb_milk.best_score_)

# write the Grid Search results to csv to choose best model with least resource consumption
GS_xgb_df_milk = pd.DataFrame(GS_xgb_milk.cv_results_)
GS_xgb_df_milk = GS_xgb_df_milk.sort_values('rank_test_r2')

Best model Parameters {'gamma': 0.01, 'learning_rate': 0.1, 'max_depth': 9, 'n_estimators': 340}
Best model R2 score 0.8483316258834732


In [26]:
GS_xgb_df_milk.to_csv('./../artifacts/grid-search-xgb-milk-results.csv')

In [27]:
predict(X_test)

NameError: name 'predict' is not defined