Prediction of Forest Cover Type using XGBoost

In [1]:
#Import all the dependencies
import xgboost as xgb
from sklearn.datasets import fetch_covtype #Forest covtype is dataset of multi-class classification
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
#Get data using sklearn datasets
data = fetch_covtype(as_frame=True)
# data

In [5]:
#Define features and target from the dataset
X = data.data
y = data.target

In [None]:
#Print the features first five rows to view the data
X.head()

In [6]:
#Encoding the cataegorical variables 
e = LabelEncoder()
ynew = e.fit_transform(y)

In [7]:
#Split train and test data, test size is 20 percent and train is 80 percent
xtrain, xtest, ytrain, ytest = train_test_split(X,ynew, test_size= 0.2, random_state= 42)

In [11]:
#Build model
model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class = 7,
    n_estimators = 100,
    max_depth = 6,
    learning_rate =0.1,
    subsample = 0.8,
    colsample_bytree = 0.8,
    early_stopping_rounds=10,
    random_state = 42
)

In [12]:
#Fit the model with traindata
model.fit(
    xtrain,ytrain,
    eval_set=[(xtest,ytest)],
    verbose=True)

[0]	validation_0-mlogloss:1.75951
[1]	validation_0-mlogloss:1.62840
[2]	validation_0-mlogloss:1.50944
[3]	validation_0-mlogloss:1.40690
[4]	validation_0-mlogloss:1.32478
[5]	validation_0-mlogloss:1.25304
[6]	validation_0-mlogloss:1.18553
[7]	validation_0-mlogloss:1.12574
[8]	validation_0-mlogloss:1.07936
[9]	validation_0-mlogloss:1.03193
[10]	validation_0-mlogloss:0.99015
[11]	validation_0-mlogloss:0.95427
[12]	validation_0-mlogloss:0.91867
[13]	validation_0-mlogloss:0.88842
[14]	validation_0-mlogloss:0.85929
[15]	validation_0-mlogloss:0.83440
[16]	validation_0-mlogloss:0.80938
[17]	validation_0-mlogloss:0.78695
[18]	validation_0-mlogloss:0.76799
[19]	validation_0-mlogloss:0.75121
[20]	validation_0-mlogloss:0.73439
[21]	validation_0-mlogloss:0.71910
[22]	validation_0-mlogloss:0.70393
[23]	validation_0-mlogloss:0.69048
[24]	validation_0-mlogloss:0.67709
[25]	validation_0-mlogloss:0.66447
[26]	validation_0-mlogloss:0.65382
[27]	validation_0-mlogloss:0.64362
[28]	validation_0-mlogloss:0.6

In [13]:
#Predict and evaluate
ypred = model.predict(xtest)

In [14]:
#Determine accuracy of the model
accuracy = accuracy_score(ytest,ypred)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.8149
