# Load Libraries

In [11]:
import pandas as pd
import numpy as numpy
import xgboost as xgb #contains both XGBClassifier and XGBRegressor

# Load Dataset 

In [12]:
data = pd.read_csv('train_yaOffsB.csv')

In [13]:
#Get Target data 
y = data['Crop_Damage']

#Load X Variables into a Pandas Dataframe with columns 
X = data.drop(['ID','Crop_Damage'], axis = 1)

# Divide Data into Train and Test 

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101, stratify = y)

In [15]:
print(f'X_train : {X_train.shape}')
print(f'y_train : {y_train.shape}')
print(f'X_test : {X_test.shape}')
print(f'y_test : {y_test.shape}')

X_train : (71086, 8)
y_train : (71086,)
X_test : (17772, 8)
y_test : (17772,)


# Build Model with Tuning

# Random Grid Search

In [22]:
from sklearn.model_selection import GridSearchCV

In [46]:
param_test1 = {
 'max_depth':range(3,10,2),
 'min_child_weight':range(1,6,2),
 'reg_alpha':[0, 0.001, 0.005, 0.01, 0.05]
}

In [47]:
xgbModel = xgb.XGBClassifier()

In [49]:
gsearch1 = GridSearchCV(estimator = xgb.XGBClassifier( learning_rate =0.1, n_estimators=140, 
 min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8,
 objective= 'multi:softprob', nthread=4, scale_pos_weight=1, seed=27), 
 param_grid = param_test1, n_jobs=4, cv=5)

In [50]:
gsearch1.fit(X_train,y_train,early_stopping_rounds=30, 
             eval_set=[(X_test, y_test)], verbose=False )

KeyboardInterrupt: 

In [None]:
gsearch1.best_estimator_ , gsearch1.best_params_, gsearch1.best_score_

## Trail 1 - Accuracy

In [None]:
print (f'Train Accuracy - : {gsearch1.score(X_train,y_train):.3f}')
print (f'Test Accuracy - : {gsearch1.score(X_test,y_test):.3f}')

### Prediction of Test

In [35]:
test = pd.read_csv('test_pFkWwen.csv')

In [36]:
sub_test = test.drop(['ID'], axis = 1)

In [38]:
sub_test_pred = gsearch1.predict(sub_test).astype(int)

In [39]:
AllSub = pd.DataFrame({ 'ID': test['ID'],
                       'Crop_Damage' : sub_test_pred
    
})

AllSub.to_csv("AV_JH_MK_in_Agri_XGB_sub3.csv", index = False)

# END

More on Hyperparameter Tuning - https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/