# Classifier: 

## 1.a. Import: Libraries

In [2]:
#data organizing
import pandas #storage
import numpy as np #data-type conversion
from os import getcwd

#preprocessing - data splitting
from sklearn.model_selection import train_test_split

#classifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend
from tensorflow import feature_column
import tensorflow as tf

#classification result - statistical
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score

In [2]:
tf.compat.v1.enable_eager_execution()
tf.compat.v1.reset_default_graph()
tf.compat.v1.disable_eager_execution()
#tf.python.framework.ops.disable_eager_mode

https://www.tensorflow.org/tutorials/estimator/premade

https://www.tensorflow.org/tutorials/structured_data/feature_columns

https://www.tensorflow.org/api_docs/python/tf/estimator/DNNClassifier

https://stackoverflow.com/questions/56612386/defining-the-input-function-for-tensorflow-pre-made-estimator



## 1.b. Import: Dataset

In [3]:
#dtype changed from int64 to int32 to save space and speed up computation, no data was lost
def cvDefPay(prediction):
    mapper = {0: False, 1: True}
    return mapper.get(prediction)

url = getcwd() + '\\default of credit card clients.xls'
ccd = pandas.read_excel(io = url, \
                        sheet_name='Data', header = 1, index_col = 0, \
                        dtype = {'LIMIT_BAL': np.int32, 'AGE': np.int32, 'BILL_AMT1': np.int32, 'BILL_AMT2': np.int32, 'BILL_AMT3': np.int32, 'BILL_AMT4': np.int32, 'BILL_AMT5': np.int32, 'BILL_AMT6': np.int32, 'PAY_AMT1': np.int32, 'PAY_AMT2': np.int32, 'PAY_AMT3': np.int32, 'PAY_AMT4': np.int32, 'PAY_AMT5': np.int32, 'PAY_AMT6': np.int32})
                        #,converters = {'default payment next month': cvDefPay})

In [4]:
ccd.rename(columns = {'PAY_0': 'PAY_1'}, inplace = True)
ccd.rename(columns = {'default payment next month': 'default_payment_next_month'}, inplace = True)

## 3.a. Data Splitting

Data is split before oversampling to avoid synthetic datapoints in test dataset.

Test dataset is separated even though GridSearchCV uses Stratified K-Fold cross-validation so that model's accuracy can be tested independently.

In [5]:
ccdY = pandas.DataFrame(ccd['default_payment_next_month'])
ccdX = ccd.drop(['default_payment_next_month'], axis = 'columns')

In [6]:
trainX, testX, trainY, testY = train_test_split(ccdX, ccdY, test_size = 0.25, stratify = ccdY, random_state = 44)

trainX, validationX, trainY, validationY = train_test_split(trainX, trainY, test_size = 0.25, stratify = trainY, random_state = 44)

https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors

train_data = tf.data.Dataset.from_tensors((trainX.values, trainY.values))

validation_data = tf.data.Dataset.from_tensors((validationX.values, validationY.values))

test_data = tf.data.Dataset.from_tensors((testX.values, testY.values))

In [7]:
def input_fnx(features, labels, validation=True, batch_size=256):
    train_data = tf.data.Dataset.from_tensors((trainX.values, trainY.values))
    validation_data = tf.data.Dataset.from_tensors((validationX.values, validationY.values))
    test_data = tf.data.Dataset.from_tensors((testX.values, testY.values))
    #An input function for training or evaluating
    # Convert the inputs to a Dataset.
    dataset = train_data

    # Shuffle and repeat if you are in training mode.
    if validation:
        dataset = validation_data
    
    return dataset.batch(batch_size)

## 4. Classifier

In [8]:
limit_bal = feature_column.embedding_column('LIMIT_BAL', dimension = ccd['LIMIT_BAL'].nunique())

sex1 = feature_column.categorical_column_with_vocabulary_list('SEX', [1, 2])
sex = feature_column.indicator_column(sex1)

education1 = feature_column.categorical_column_with_vocabulary_list('EDUCATION', [0, 1, 2, 3, 4, 5, 6])
education = feature_column.indicator_column(education1)

marriage1 = feature_column.categorical_column_with_vocabulary_list('MARRIAGE', [0, 1, 2, 3])
marriage = feature_column.indicator_column(marriage1)

age = feature_column.numeric_column('AGE')

pay_11 = feature_column.categorical_column_with_vocabulary_list('PAY_1', [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
pay_1 = feature_column.indicator_column(pay_11)

pay_21 = feature_column.categorical_column_with_vocabulary_list('PAY_2', [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
pay_2 = feature_column.indicator_column(pay_21)

pay_31 = feature_column.categorical_column_with_vocabulary_list('PAY_3', [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
pay_3 = feature_column.indicator_column(pay_31)

pay_41 = feature_column.categorical_column_with_vocabulary_list('PAY_4', [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
pay_4 = feature_column.indicator_column(pay_41)

pay_51 = feature_column.categorical_column_with_vocabulary_list('PAY_5', [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
pay_5 = feature_column.indicator_column(pay_51)

pay_61 = feature_column.categorical_column_with_vocabulary_list('PAY_6', [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
pay_6 = feature_column.indicator_column(pay_61)

bill_amt1 = feature_column.numeric_column('BILL_AMT1')
bill_amt2 = feature_column.numeric_column('BILL_AMT2')
bill_amt3 = feature_column.numeric_column('BILL_AMT3')
bill_amt4 = feature_column.numeric_column('BILL_AMT4')
bill_amt5 = feature_column.numeric_column('BILL_AMT5')
bill_amt6 = feature_column.numeric_column('BILL_AMT6')

pay_amt1 = feature_column.numeric_column('PAY_AMT1')
pay_amt2 = feature_column.numeric_column('PAY_AMT2')
pay_amt3 = feature_column.numeric_column('PAY_AMT3')
pay_amt4 = feature_column.numeric_column('PAY_AMT4')
pay_amt5 = feature_column.numeric_column('PAY_AMT5')
pay_amt6 = feature_column.numeric_column('PAY_AMT6')

ccd_feature_columns = [limit_bal, sex, education, marriage, age,
                   pay_1, pay_2, pay_3, pay_4, pay_5, pay_5,
                   bill_amt1, bill_amt2, bill_amt3, bill_amt4, bill_amt5, bill_amt6,
                   pay_amt1, pay_amt2, pay_amt3, pay_amt4, pay_amt5, pay_amt6]

#initial_feature_count = 23
#dimention_reduced_count = 5

In [9]:
classifier = tf.estimator.DNNClassifier(feature_columns = ccd_feature_columns,
                                         hidden_units = [23, 5],
                                         n_classes = 2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ANKITA~1\\AppData\\Local\\Temp\\tmpw8_0vyfo', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [10]:
classifier.train(input_fn = lambda: input_fnx(features = trainX, labels = trainY, validation = False), steps=25)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.


ValueError: features should be a dictionary of `Tensor`s. Given type: <class 'tensorflow.python.framework.ops.Tensor'>

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))