#### Import Necessary Libraries

In [1]:
import os 
import gcsfs
import unittest
import pandas as pd
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

#### Create Class for model prediction


In [2]:
class PetFinderClassifier:
    
    def __init__(self):
        """init constructor"""
        self.data = None
        self.features = None
        self.labels = None
        self.predictor = None

    # data loader method
    def load_data(self, path):
        """data loader method"""
        fs = gcsfs.GCSFileSystem(project='XGBoost_Classifier')

        with fs.open(path) as f:
            self.data = pd.read_csv(f)
            
            # Split Data into Training Features and Label
            self.features = self.data.iloc[:, :-1]
            self.labels = self.data.iloc[:, -1]
            
            # Convert training features from sting to category
            self.features = self.features.astype("category")

            # Encode Label column from string to integers
            label_encoder = LabelEncoder()
            label_encoder = label_encoder.fit(self.labels)
            self.labels = label_encoder.transform(self.labels)
        
        return self.data, self.features, self.labels, label_encoder
        
    
    
    def load_model_from_disk(self, path):
        """load model from disk """
        self.new_model = XGBClassifier()
        self.new_model.load_model(path)
        return self.new_model
    
    
    
    def prediction(self, path):
        """ predict data on trained model """
        self.new_data, self.new_features, self.new_labels, label_encoder = self.load_data(path)
        
        save_data = self.new_data.copy(deep=True)

        best_tree = self.new_model.best_ntree_limit
        
        y_pred = self.new_model.predict(self.new_features, iteration_range=(0,best_tree))

        self.predictor = pd.DataFrame(y_pred, columns=["Adopted_prediction"])

        
        self.new_data['Adopted_prediction'] = self.predictor['Adopted_prediction'].apply(lambda x: 'No' if x == 0 else 'Yes')        
        
        if not os.path.exists("output"):
            os.makedirs("output") 

        self.new_data.to_csv("output/results.csv")

        return save_data, self.new_features, self.new_data, label_encoder
      

In [3]:
# create classifier object
Classifier = PetFinderClassifier()

In [4]:
# Load trained model from disk
path = "artifacts/model/xgboost_classifier.json"

Classifier.load_model_from_disk(path)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=True,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='gpu_hist', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

In [5]:
# Load data from google cloud
path = "gs://cloud-samples-data/ai-platform-unified/datasets/tabular/petfinder-tabular-classification.csv"

# make prediction on trained model 
real_data, features, result, label_encoder = Classifier.prediction(path) 

In [6]:
# display result
result.head()

Unnamed: 0,Type,Age,Breed1,Gender,Color1,Color2,MaturitySize,FurLength,Vaccinated,Sterilized,Health,Fee,PhotoAmt,Adopted,Adopted_prediction
0,Cat,3,Tabby,Male,Black,White,Small,Short,No,No,Healthy,100,1,Yes,Yes
1,Cat,1,Domestic Medium Hair,Male,Black,Brown,Medium,Medium,Not Sure,Not Sure,Healthy,0,2,Yes,Yes
2,Dog,1,Mixed Breed,Male,Brown,White,Medium,Medium,Yes,No,Healthy,0,7,Yes,Yes
3,Dog,4,Mixed Breed,Female,Black,Brown,Medium,Short,Yes,No,Healthy,150,8,Yes,Yes
4,Dog,1,Mixed Breed,Male,Black,No Color,Medium,Short,No,No,Healthy,0,3,Yes,Yes


#### Create Unit Test Class

In [7]:
class TestInt(unittest.TestCase):
    
    def setUp(self):
        """create global variable for unit test"""
        self.Classifier = PetFinderClassifier()

        path = "artifacts/model/xgboost_classifier.json"
        self.Classifier.load_model_from_disk(path)
    
        path = "gs://cloud-samples-data/ai-platform-unified/datasets/tabular/petfinder-tabular-classification.csv"
        self.real_data, self.features, self.result, self.le  = self.Classifier.prediction(path)
    
    
    
    def test_inputData(self):
        """ Check input data dimension """
        self.assertEqual((11537, 14), self.real_data.shape)
  
      
    def test_inputFeatures(self):
        """ Check if training features exclude label """
        self.assertEqual((11537, 13), self.features.shape)
        
    def test_resultDimension(self):
        """ Check if final output result includes Adopted prediction column """
        self.assertEqual((11537, 15), self.result.shape)
        
        
    def test_predictorProbability(self):
        """ Check predicted probablity values are either 1 or 0 """
        #proba = list(self.Classifier.predictor['Adopted_prediction'].unique())
        value = list(label_encoder.transform(['No', 'Yes']))
        self.assertEqual(value, [0, 1])
        
        
    def test_predictorValue(self):
        """ Check predicted classes are either YES or NO """
        #values = list(self.result['Adopted_prediction'].unique())
        classes = list(label_encoder.classes_)
        self.assertEqual(classes, ['No', 'Yes'])        



In [8]:
# Call unittest class
unittest.main(argv=[''], verbosity=2, exit=False)


test_inputData (__main__.TestInt)
Check input data dimension ... ok
test_inputFeatures (__main__.TestInt)
Check if training features exclude label ... ok
test_predictorProbability (__main__.TestInt)
Check predicted probablity values are either 1 or 0 ... ok
test_predictorValue (__main__.TestInt)
Check predicted classes are either YES or NO ... ok
test_resultDimension (__main__.TestInt)
Check if final output result includes Adopted prediction column ... ok

----------------------------------------------------------------------
Ran 5 tests in 2.883s

OK


<unittest.main.TestProgram at 0x2997bcbf9a0>