## Credit Card Fraud Detection

In [None]:
# import required libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from google.datalab.ml import TensorBoard

import shutil
import pandas as pd
import tensorflow as tf

print(tf.__version__)

In [None]:
# configure logging
tf.logging.set_verbosity(tf.logging.INFO)
pd.options.display.max_rows = 50
pd.options.display.float_format = '{:.1f}'.format

In [None]:
OUTDIR = './trained_model'

In [None]:
# makes result reproducible
#np.random.seed(seed=1) 
#tf.random.set_random_seed(seed)

In [None]:
df_train = pd.read_csv('./data/preprocess/creditcard_train.csv')
df_train.head()

In [None]:
df_train.describe()

In [None]:
df_test = pd.read_csv('./data/preprocess/creditcard_test.csv')
df_test.head()

In [None]:
df_test.describe()

In [None]:
INPUT_COLUMNS = [
  tf.feature_column.numeric_column("Time", dtype=tf.float32),
  tf.feature_column.numeric_column("V1", dtype=tf.float32),
  tf.feature_column.numeric_column("V2", dtype=tf.float32),
  tf.feature_column.numeric_column("V3", dtype=tf.float32),
  tf.feature_column.numeric_column("V4", dtype=tf.float32),
  tf.feature_column.numeric_column("V5", dtype=tf.float32),
  tf.feature_column.numeric_column("V6", dtype=tf.float32),
  tf.feature_column.numeric_column("V7", dtype=tf.float32),
  tf.feature_column.numeric_column("V9", dtype=tf.float32),
  tf.feature_column.numeric_column("V10", dtype=tf.float32),
  tf.feature_column.numeric_column("V11", dtype=tf.float32),
  tf.feature_column.numeric_column("V12", dtype=tf.float32),
  tf.feature_column.numeric_column("V14", dtype=tf.float32),
  tf.feature_column.numeric_column("V16", dtype=tf.float32),
  tf.feature_column.numeric_column("V17", dtype=tf.float32),
  tf.feature_column.numeric_column("V18", dtype=tf.float32),
  tf.feature_column.numeric_column("V19", dtype=tf.float32),
  tf.feature_column.numeric_column("V21", dtype=tf.float32),
  tf.feature_column.numeric_column("Amount", dtype=tf.float32)
]

In [None]:
ADDITIONAL_COLUMNS = [
  tf.feature_column.numeric_column("V1_", dtype=tf.float32),
  tf.feature_column.numeric_column("V2_", dtype=tf.float32),
  tf.feature_column.numeric_column("V3_", dtype=tf.float32),
  tf.feature_column.numeric_column("V4_", dtype=tf.float32),
  tf.feature_column.numeric_column("V5_", dtype=tf.float32),
  tf.feature_column.numeric_column("V6_", dtype=tf.float32),
  tf.feature_column.numeric_column("V7_", dtype=tf.float32),
  tf.feature_column.numeric_column("V9_", dtype=tf.float32),
  tf.feature_column.numeric_column("V10_", dtype=tf.float32),
  tf.feature_column.numeric_column("V11_", dtype=tf.float32),
  tf.feature_column.numeric_column("V12_", dtype=tf.float32),
  tf.feature_column.numeric_column("V14_", dtype=tf.float32),
  tf.feature_column.numeric_column("V16_", dtype=tf.float32),
  tf.feature_column.numeric_column("V17_", dtype=tf.float32),
  tf.feature_column.numeric_column("V18_", dtype=tf.float32),
  tf.feature_column.numeric_column("V19_", dtype=tf.float32),
  tf.feature_column.numeric_column("V21_", dtype=tf.float32),
  tf.feature_column.numeric_column("Amount_max_fraud", dtype=tf.float32)
]

In [None]:
# Define your feature columns
def create_feature_cols():
  return INPUT_COLUMNS + ADDITIONAL_COLUMNS

In [None]:
def add_more_features(df):
  "Add additional features"
  # ToDo Add additional standardized columns
  df['Amount_max_fraud'] = 1
  df.loc[df.Amount <= 2125.87, 'Amount_max_fraud'] = 0
  df['V1_'] = df.V1.map(lambda x: 1 if x < -3 else 0)
  df['V2_'] = df.V2.map(lambda x: 1 if x > 2.5 else 0)
  df['V3_'] = df.V3.map(lambda x: 1 if x < -4 else 0)
  df['V4_'] = df.V4.map(lambda x: 1 if x > 2.5 else 0)
  df['V5_'] = df.V5.map(lambda x: 1 if x < -4.5 else 0)
  df['V6_'] = df.V6.map(lambda x: 1 if x < -2.5 else 0)
  df['V7_'] = df.V7.map(lambda x: 1 if x < -3 else 0)
  df['V9_'] = df.V9.map(lambda x: 1 if x < -2 else 0)
  df['V10_'] = df.V10.map(lambda x: 1 if x < -2.5 else 0)
  df['V11_'] = df.V11.map(lambda x: 1 if x > 2 else 0)
  df['V12_'] = df.V12.map(lambda x: 1 if x < -2 else 0)
  df['V14_'] = df.V14.map(lambda x: 1 if x < -2.5 else 0)
  df['V16_'] = df.V16.map(lambda x: 1 if x < -2 else 0)
  df['V17_'] = df.V17.map(lambda x: 1 if x < -2 else 0)
  df['V18_'] = df.V18.map(lambda x: 1 if x < -2 else 0)
  df['V19_'] = df.V19.map(lambda x: 1 if x > 1.5 else 0)
  df['V21_'] = df.V21.map(lambda x: 1 if x > 0.6 else 0)
  return df

In [None]:
def transform_feature_cols(df):
  """
  Transform each feature in features so that it has a mean of 0 and standard deviation of 1; 
  This helps with training the neural network.
  """
  features = df.columns.values
  for feature in features:
    if feature != 'Class':
      mean, std = df[feature].mean(), df[feature].std()
      df.loc[:, feature] = (df[feature] - mean) / std
  return df

In [None]:
def train_input_fn(df):
  return tf.estimator.inputs.pandas_input_fn(
    x = transform_feature_cols(add_more_features(df)),
    y = df['Class'],
    batch_size = 256,
    num_epochs = None,
    shuffle = True,
    queue_capacity = 1000,
    num_threads = 1
  )

In [None]:
def eval_input_fn(df):
  return tf.estimator.inputs.pandas_input_fn(
    x = transform_feature_cols(add_more_features(df)),
    y = df['Class'],
    batch_size = 128,
    num_epochs = 1,
    shuffle = False,
    num_threads = 1
  )

In [None]:
def serving_input_fn():
  """Servers prediction service"""
  feature_placeholders = {
      column.name: tf.placeholder(tf.float32, [None]) for column in INPUT_COLUMNS
  }
  features = add_more_features(pd.DataFrame.from_dict(feature_placeholders.copy()))
  return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [None]:
# ToDo impelement for custom estimator
def dnn_model(img, mode, hparams):
  pass

In [None]:
# ToDo impelement for custom estimator
def transaction_classifier(features, labels, mode, params):
  """Transaction classifier either fraud or normal"""
  pass

In [None]:
def train_and_evaluate(output_dir, num_train_steps):
  """Training and evalucation function"""
  EVAL_INTERVAL = 10
  feature_columns = create_feature_cols()
  estimator = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[37, 18, 9],
    optimizer=tf.train.AdamOptimizer(1e-4),
    n_classes=2,
    dropout=0.1,
    model_dir=output_dir)
    
  train_spec=tf.estimator.TrainSpec(input_fn=train_input_fn(df_train), max_steps=num_train_steps)
  # ToDo Fix issue of add engg feature with panda read input functions
  # exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  eval_spec=tf.estimator.EvalSpec(input_fn=eval_input_fn(df_test), throttle_secs=EVAL_INTERVAL, exporters=exporter)
  
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [None]:
# Launch tensorboard
#TensorBoard().start(OUTDIR)

In [None]:
# Run the model
shutil.rmtree(OUTDIR, ignore_errors = True)
train_and_evaluate(OUTDIR, 10000)

In [None]:
for pid in TensorBoard.list()['pid']:
  TensorBoard().stop(pid)
  print('Stopped TensorBoard with pid {}'.format(pid))

<pre>
# Copyright 2018 Atos. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
</pre>