# AFL Game Outcome Predictor Training
Heavily adapted from:
https://www.tensorflow.org/tutorials/structured_data/feature_columns

### Import Libraries and Setup

In [205]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split

# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('target')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

### Load Data

In [206]:
csv_file = '../data/BankChurners_sanitized.csv'

dataframe = pd.read_csv(csv_file)

dataframe['target'] = np.where(dataframe['Attrition_Flag']=='Attrited Customer', 1, 0)

attrited_customers = dataframe[dataframe['Attrition_Flag']=='Attrited Customer']

existing_customers = dataframe[dataframe['Attrition_Flag']=='Existing Customer']

# lets make a dataset that is biased towards Attrited Customers
# We'd prefer to incorrectly identify a customer as attrited than an attrited customer as existing.

existing_customers = existing_customers.sample(round(len(attrited_customers)*0.8))

merged_dataframe = pd.concat([attrited_customers, existing_customers])

dataframe = dataframe.drop(columns=['Attrition_Flag'])

biased_dataframe = merged_dataframe.drop(columns=['Attrition_Flag'])

### Create and Train Models

In [207]:
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)

train_biased, test_biased = train_test_split(biased_dataframe, test_size=0.2)
train_biased, val_biased = train_test_split(biased_dataframe, test_size=0.2)

print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

6480 train examples
1621 validation examples
2026 test examples


In [208]:
feature_columns = []
for header in ['Customer_Age',
    'Dependent_count',
    'Months_on_book',
    'Total_Relationship_Count',
    'Months_Inactive_12_mon',
    'Contacts_Count_12_mon',
    'Credit_Limit',
    'Total_Revolving_Bal',
    'Avg_Open_To_Buy',
    'Total_Amt_Chng_Q4_Q1',
    'Total_Trans_Amt',
    'Total_Trans_Ct',
    'Total_Ct_Chng_Q4_Q1',
    'Avg_Utilization_Ratio'
]:
    feature_columns.append(feature_column.numeric_column(header))

for header in ['Gender', 'Education_Level', 'Marital_Status', 'Income_Category', 'Card_Category']:
    categorical_column = feature_column.categorical_column_with_vocabulary_list(
        header, dataframe[header].unique())
    indicator_column = feature_column.indicator_column(categorical_column)
    feature_columns.append(indicator_column)

In [209]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

batch_size = 2048
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

train_biased_ds = df_to_dataset(train_biased, batch_size=batch_size)
val_biased_ds = df_to_dataset(val_biased, shuffle=False, batch_size=batch_size)

attrited_customers_ds = df_to_dataset(attrited_customers, shuffle=False, batch_size=len(attrited_customers))


In [210]:
model = tf.keras.Sequential([
    feature_layer,
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dropout(.1),
    layers.Dense(1)
])

biased_model = tf.keras.Sequential([
    feature_layer,
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dropout(.1),
    layers.Dense(1)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

biased_model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_ds,
          validation_data=val_ds,
          epochs=100,
          verbose=0)


biased_model.fit(train_biased_ds,
          validation_data=val_biased_ds,
          epochs=100,
          verbose=0)

Consider rewriting this model with the Functional API.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.


<tensorflow.python.keras.callbacks.History at 0x157ab4c2af0>

### Evaluate Raw Data Model

In [211]:
loss, accuracy = model.evaluate(test_ds)
print("Unbiased Accuracy Standard", accuracy)
loss, accuracy = model.evaluate(attrited_customers_ds)
print("Unbiased Accuracy Attrited", accuracy)

Unbiased Accuracy Standard 0.8529121279716492
Consider rewriting this model with the Functional API.
Unbiased Accuracy Attrited 0.030731407925486565


### Evaluate Rebiased Data Model

In [212]:
loss, accuracy = biased_model.evaluate(test_ds) # test against identical test data
print("Rebiased Accuracy Standard", accuracy)
loss, accuracy = biased_model.evaluate(attrited_customers_ds)
print("Rebiased Accuracy Attrited", accuracy)

Rebiased Accuracy Standard 0.800098717212677
Consider rewriting this model with the Functional API.
Rebiased Accuracy Attrited 0.6416717767715454
