## Imports

In [None]:
!pip install xgboost

In [None]:
import pandas as pd
import xgboost as xgb
import numpy as np
import collections

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.utils import shuffle

from google.cloud import aiplatform

In [None]:
GCP_PROJECT = 'dt-tu-sandbox-dev'
VERSION_NAME = 'v1'
MODEL_NAME = 'mortgage_approval_xgboost'
REGION = "europe-west2"

In [None]:
aiplatform.init(project=GCP_PROJECT, location=REGION)

# Training the Model

## Download Data

In [None]:
!gsutil cp 'gs://mortgage_dataset_files/mortgage-small.csv' .

## Load Data

In [None]:
COLUMN_NAMES = collections.OrderedDict({
 'as_of_year': np.int16,
 'agency_code': 'category',
 'loan_type': 'category',
 'property_type': 'category',
 'loan_purpose': 'category',
 'occupancy': np.int8,
 'loan_amt_thousands': np.float64,
 'preapproval': 'category',
 'county_code': np.float64,
 'applicant_income_thousands': np.float64,
 'purchaser_type': 'category',
 'hoepa_status': 'category',
 'lien_status': 'category',
 'population': np.float64,
 'ffiec_median_fam_income': np.float64,
 'tract_to_msa_income_pct': np.float64,
 'num_owner_occupied_units': np.float64,
 'num_1_to_4_family_units': np.float64,
 'approved': np.int8
})

In [None]:
data = pd.read_csv(
 'mortgage-small.csv',
 index_col=False,
 dtype=COLUMN_NAMES
)
data = data.dropna()
data = shuffle(data, random_state=2)
data.head(2)

## Data Validation

In [None]:
# Class labels - 0: denied, 1: approved
print(data['approved'].value_counts())

labels = data['approved'].values
data = data.drop(columns=['approved'])

## Feature Engineering

In [None]:
dummy_columns = list(data.dtypes[data.dtypes == 'category'].index)
data = pd.get_dummies(data, columns=dummy_columns)

data.head(2)

## Train-Test Split

In [None]:
x,y = data.values,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

## Modelling

In [None]:
model = xgb.XGBClassifier(
    objective='reg:logistic'
)

In [None]:
model.fit(x_train, y_train)

## Evaluation

In [None]:
y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred.round())
print(acc, '\n')

## Save Model

In [None]:
model.save_model('model.bst')

# Deploy the model to Vertex Endpoints

In [None]:
model = aiplatform.Model.upload_xgboost_model_file(
    display_name = MODEL_NAME,
    model_file_path="./model.bst",
    sync=True
)

print(model)

In [None]:
endpoint = model.deploy()

In [None]:
example_input = [
    [2016.0, 1.0, 346.0, 27.0, 211.0, 4530.0, 86700.0, 132.13, 1289.0, 1408.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0]
  ]

In [None]:
endpoint.predict(example_input)

endpoint.delete(force=True)
model.delete()