<a href="https://colab.research.google.com/github/Soothysay/Automated-ANN/blob/master/Fraud_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import keras
from keras import layers
from keras import models
from keras import utils
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import Flatten,Dense,Activation,Embedding,LeakyReLU,BatchNormalization,Dropout
from keras.activations import relu,sigmoid,elu,softmax,tanh,softplus
from keras.regularizers import l2
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras import datasets
from keras.callbacks import LearningRateScheduler
from keras.callbacks import History
from keras import losses
from keras import optimizers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
import seaborn as sns
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

Using TensorFlow backend.
  import pandas.util.testing as tm


In [0]:
def read():
  df=pd.read_csv('/content/drive/My Drive/ANN/PS_20174392719_1491204439457_log.csv')
  # list of columns whose data type is object i.e. string
  listOfColumnNames = list((df.dtypes[df.dtypes == np.object]).index)
  #filteredColumns = df.dtypes[df.dtypes == np.object]
  #listOfColumnNames = list(filteredColumns.index)
  #Removing the Name and Surname Columns (You can add more redundant parameters)
  if 'nameOrig' in listOfColumnNames:
    df=df.drop(['nameOrig'],axis=1)
  if 'nameDest' in listOfColumnNames:
    df=df.drop(['nameDest'],axis=1)
  if 'nameOrig' in listOfColumnNames:
    listOfColumnNames.remove('nameOrig')
  if 'nameDest' in listOfColumnNames:
    listOfColumnNames.remove('nameDest')
  #Replacing the object type values with classified values (Performance can be improved)
  for i in range(len(listOfColumnNames)):
    s=set(df[listOfColumnNames[i]])
    s=list(s)
    for j in range(len(s)):
      df=df.replace(to_replace=s[j],value=(j+1))
  #Done with data shaping
  return df

In [0]:
def divide(path,divider):
  #Division of X and Y
  x=path.drop(divider,axis=1)
  type(divider)
  y=path[divider]
  return x,y

In [0]:
# Splitting the dataset into the Training set and Test set
def splitter(x,y):
  
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
  # Feature Scaling
  sc = StandardScaler()
  x_train = sc.fit_transform(x_train)
  x_test = sc.transform(x_test)
  return x_train,y_train,x_test,y_test

In [0]:
def inputter(df):
  print("Enter the column you wish to use as  result")
  print("Your options are:",df.columns)
  divider=input()
  #We need to preprocess data a bit more if data is biased towards a particular outcome
  nonf=df[df[divider]==0]
  fra=df[df[divider]==1]
  #I have just taken 10. The value can change
  if nonf.shape>=(10*fra.shape): 
    nonf=nonf.sample(2*fra.shape[0])
    #print(nonf.shape)
    df=fra.append(nonf,ignore_index=True)
  if fra.shape>=(10*nonf.shape):
    fra=fra.sample(2*nonf.shape[0])
    #print(fra.shape)
    df=nonf.append(fra,ignore_index=True)
  return df,divider

In [0]:
def plotter(model):
  #Summarize history for accuracy
  plt.plot(model.history['accuracy'])
  plt.plot(model.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()
  #Summarize history for loss
  plt.plot(model.history['loss'])
  plt.plot(model.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.show()


In [0]:
def create_model(activation='relu',optimizer='adam',init_mode='uniform',dropout_rate=0.0,n=10,p=10,neuron=12,learning_rate=0.002):
  #Creating the Artificial Neural Network using Keras for hyperparameter tuning
  model=Sequential()
  #Input Layer
  model.add(Dense(neuron,input_dim=p,kernel_initializer=init_mode, activation=activation))
  #Adding hidden layers
  for i in range(2,n):
    model.add(Dense(neuron,kernel_initializer=init_mode,activation=activation))
    model.add(Dropout(dropout_rate))
  #Adding output layer
  model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
  #As output will have 2 values, so using binary crossentropy
  if(optimizer=='SGD'):
    opti=keras.optimizers.SGD(learning_rate=learning_rate)
  if(optimizer=='Adam'):
    opti=keras.optimizers.Adam(learning_rate=learning_rate)
  if(optimizer=='Adamax'):
    opti=keras.optimizers.Adamax(learning_rate=learning_rate)
  if(optimizer=='Nadam'):
    opti=keras.optimizers.Nadam(learning_rate=learning_rate)
  if(optimizer=='Adagrad'):
    opti=keras.optimizers.Adagrad(learning_rate=learning_rate)
  if(optimizer=='RMSprop'):
    opti=keras.optimizers.RMSprop(learning_rate=learning_rate)
  model.compile(loss='binary_crossentropy', optimizer=opti, metrics=['accuracy'])
  return(model)

In [0]:
def tuner(p,x_test,y_test,epochs,batch_size):
  model = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch_size, verbose=0)
  # Define the grid search parameters
  activation = ['softmax', 'relu', 'tanh', 'sigmoid','elu','softplus']
  optimizer = ['SGD', 'Adam', 'Adamax', 'Nadam','Adagrad','RMSprop']
  init_mode = ['glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
  # User Automated hyperparameters
  n=list()
  dropout_rate=list()
  neuron=list()
  learning_rate=list()
  while(True):
    print("Input n:")
    n.append(int(input()))
    print("Enter number of neurons in each layer")
    neuron.append(int(input()))
    co=input("Continue?(Y/N)")
    if(co=='N'):
      break
  while(True):
    print("Input Dropout percentage:")
    dropout_rate.append((float(input())*0.01))
    co=input("Continue?(Y/N)")
    if(co=='N'):
      break
  #Put learning_rate
  while(True):
    print("Input Learning rate:")
    learning_rate.append(float(input()))
    co=input("Continue?(Y/N)")
    if(co=='N'):
      break
  #Creation of variable parameters
  param_grid = dict(activation=activation,optimizer=optimizer,init_mode=init_mode,dropout_rate=dropout_rate,n=n,p=p,neuron=neuron,learning_rate=learning_rate)
  #Invoking Randomized search with cross validation value 3 (Baad mein mai ne user se hi puch liya)
  cv=int(input("Enter the value of the number of folds you want:"))
  grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid,n_jobs=-1,cv=cv)
  #grid=GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1,cv=cv)
  grid_result = grid.fit(x_test,y_test)
  # summarize results
  print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
  means = grid_result.cv_results_['mean_test_score']
  stds = grid_result.cv_results_['std_test_score']
  params = grid_result.cv_results_['params']
  for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
  cv_results_df = pd.DataFrame(grid_result.cv_results_)
  cv_results_df.to_csv('gridsearch.csv')
  print(cv_results_df)
  #plot_grid_search(grid_result.cv_results_, activation, optimizer, 'Activation Function', 'Optimizer')
      #Add GridsearchCV
      

In [0]:
def plot_grid_search(cv_results, grid_param_1, grid_param_2, name_param_1, name_param_2):
    # Get Test Scores Mean and std for each grid search
    scores_mean = cv_results['mean_test_score']
    #scores_mean = np.array(scores_mean).reshape(len(grid_param_2),len(grid_param_1))

    scores_sd = cv_results['std_test_score']
    #scores_sd = np.array(scores_sd).reshape(len(grid_param_2),len(grid_param_1))

    # Plot Grid search scores
    _, ax = plt.subplots(1,1)

    # Param1 is the X-axis, Param 2 is represented as a different curve (color line)
    for idx, val in enumerate(grid_param_2):
        ax.plot(grid_param_1, scores_mean[idx,:], '-o', label= name_param_2 + ': ' + str(val))

    ax.set_title("Grid Search Scores", fontsize=20, fontweight='bold')
    ax.set_xlabel(name_param_1, fontsize=16)
    ax.set_ylabel('CV Average Score', fontsize=16)
    ax.legend(loc="best", fontsize=15)
    ax.grid('on')


In [0]:
def crossmat(m,x_test,y_test):
  y_pred = m.predict(x_test)
  y_pred = (y_pred > 0.5)
  from sklearn.metrics import confusion_matrix
  #Creating confusion matrices
  cm = confusion_matrix(y_test, y_pred)
  #As binary classification
  TP = cm[1, 1]
  TN = cm[0, 0]
  FP = cm[0, 1]
  FN = cm[1, 0]
  a=cm.shape
  #Calculating false positives
  corrPred=0
  falsePred=0
  for row in range(a[0]):
    for c in range(a[1]):
      if row == c:
        corrPred +=cm[row,c]
      else:
        falsePred += cm[row,c]
  print('Correct predictions: ', corrPred)
  print('False predictions', falsePred)
  # Calculate the Accuracy
  from sklearn.metrics import accuracy_score
  score=accuracy_score(y_pred,y_test)
  #Calculate sensitivity
  from sklearn.metrics import recall_score
  sensitivity=recall_score(y_pred,y_test)
  #Calculate precision
  from sklearn.metrics import precision_score
  precision=precision_score(y_pred,y_test)
  #Calculate specificity
  specificity = TN / (TN + FP)
  #Calculate Cohen's Kappa Score
  from sklearn.metrics import cohen_kappa_score
  kappa=cohen_kappa_score(y_pred,y_test)
  #Showing classification Report 
  from sklearn.metrics import classification_report
  target_names = ['class 0', 'class 1']
  print(classification_report(y_pred, y_test, target_names=target_names))

  return cm,score,sensitivity,precision,specificity,kappa

In [0]:
def binary_class(x_train,nodes,activation,n):
  #Creating customized ANN Model
  model=Sequential()
  for i in range(len(nodes)):
    if(i==0):
      if(activation=='sigmoid'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'glorot_uniform',activation='sigmoid',input_dim = len(x_train[1])))
      if(activation=='relu'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'he_uniform',activation='relu',input_dim = len(x_train[1])))
      if(activation=='tanh'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'glorot_normal',activation='tanh',input_dim = len(x_train[1])))
      if(activation=='softmax'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'glorot_normal',activation='softmax',input_dim = len(x_train[1])))
      if(activation== 'elu'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'he_normal',activation='elu',input_dim = len(x_train[1])))
      if(activation=='softplus'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'he_normal',activation='softplus',input_dim = len(x_train[1])))
    else:
      if(activation=='sigmoid'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'glorot_uniform',activation='sigmoid'))
      if(activation=='relu'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'he_uniform',activation='relu'))
      if(activation=='tanh'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'glorot_normal',activation='tanh'))
      if(activation=='softmax'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'glorot_uniform',activation='softmax'))
      if(activation=='elu'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'he_normal',activation='elu'))
      if(activation=='softplus'):
        model.add(Dense(units = nodes[i], kernel_initializer = 'he_normal',activation='softplus'))
    model.add(Dropout(n))
  #Adding output layer
  model.add(Dense(units=1, kernel_initializer = 'glorot_uniform',activation='sigmoid'))
  #model.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])
  return model

In [0]:
def optibin(model,opt,x_train,y_train,spl,bs,epochs,x_test,y_test):
  #Choosing the proper optimizer to use
  if(opt=='sgd'):
    print("Enter Momentum:")
    mom=float(input())
    lr=float(input("Enter value of Learning rate:"))
    opti=keras.optimizers.SGD(learning_rate=lr, momentum=mom, nesterov=False)
  if(opt=='Adam'):
    lr=float(input("Enter value of Learning rate:"))
    opti=keras.optimizers.Adam(learning_rate=lr)
  if(opt=='Adamax'):
    lr=float(input("Enter value of Learning rate:"))
    beta_1=float(input("Enter value of beta 1 (Generally close to 1)"))
    beta_2=float(input("Enter value of beta 2 (Generally close to 1)"))
    opti=keras.optimizers.Adamax(learning_rate=lr, beta_1=beta_1, beta_2=beta_2)
  if(opt=='Nadam'):
    lr=float(input("Enter value of Learning rate:"))
    beta_1=float(input("Enter value of beta 1 (Generally close to 1)"))
    beta_2=float(input("Enter value of beta 2 (Generally close to 1)"))
    opti=keras.optimizers.Nadam(learning_rate=lr, beta_1=beta_1, beta_2=beta_2)
  if(opt=='RMSprop'):
    lr=float(input("Enter value of Learning rate:"))
    opti=keras.optimizers.RMSprop(learning_rate=lr)
  if(opt=='Adagrad'):
    lr=float(input("Enter value of Learning rate:"))
    opti=keras.optimizers.Adagrad(learning_rate=lr)
  model.compile(optimizer = opti, loss = 'binary_crossentropy', metrics = ['accuracy'])
  model_history=model.fit(x_train, y_train,validation_split=spl, batch_size = bs,epochs = epochs)
  return model_history, model
 # model.compile(optimizer = 'Adamax', loss = 'binary_crossentropy', metrics = ['accuracy'])


In [0]:
def main():
  seed = 7
  np.random.seed(seed)
  df=read()
  while True:
    print("Ready to work?(Y/N)")
    work=input()
    if(work=='N'):
      break
    df,divider=inputter(df)
    #Dividing dataset to x and y
    x,y=divide(df,divider)
    #Scaling the dataset and division of training and test dataset
    x_train,y_train,x_test,y_test=splitter(x,y)
    print("Choose appropriate Option:")
    print("1. Tune to determine best model using hyperparameter tuning")
    print("2. Train and test your custom model")
    option=input()
    if(option=='1'):
      #Hyperparameter tuning
      p=list()
      p.append(len(x_test[1]))
      print("Enter number of epochs (Try keeping between 40-100):")
      epochs=int(input())
      print("Enter batch size (between 10-20):")
      batch_size=int(input())
      # create function for tuning
      tuner(p,x_test,y_test,epochs,batch_size)
    if(option=='2'):
      print("Number of layers you want") #Between 2 to 9
      nl=int(input())
      nodes=list()
      for i in range(nl):
        nodes.append(int(input("Enter number of neurons in layer: ")))
      epochs=int(input("Enter the number of epochs you want"))
      bs=int(input("Enter the batch size you want"))
      spl=int(input("Enter the percentage of data you want to use for validation"))
      spl=spl*0.01
      print("Choose the activation function for your layers")
      print("1. sigmoid")
      print("2. relu (Recommended. Most worked activation function in industry)")
      print("3. tanh")
      print("4. softmax")
      print("5. elu")
      print("6. softplus")
      activation=input()
      n=int(input("Enter percentage of dropout"))
      n=n*0.01
      model=binary_class(x_train,nodes,activation,n)
      print("Enter optimizer:")
      print("1. sgd")
      print("2. Adam (Hyperparameter tuning shows generally best)")
      print( "3. Adamax")
      print("4. Nadam")
      print("5. Adagrad")
      print("6. RMSprop")
      opt=input()
      #model_history=model.fit(x_train, y_train,validation_split=spl, batch_size = bs,epochs = epochs)
      m1,m=optibin(model,opt,x_train,y_train,spl,bs,epochs,x_test,y_test)
      #from ann_visualizer.visualize import ann_viz;
      #ann_viz(m1, title="My first neural network")
      print(m1.history.keys())
      plotter(m1)

      cm,score,sensitivity,precision,specificity,kappa=crossmat(m,x_test,y_test)
      print("Confusion Matrix=",cm)
      print("Accuracy Score=",score)
      print("Sensitivity Score=",sensitivity)
      print("Precision Score=",precision)
      print("Specificity Score=",specificity)
      print("Cohen's Kappa Score=",kappa)
  print("Thank You!")
  

In [0]:
main()

Ready to work?(Y/N)
Y
Enter the column you wish to use as  result
Your options are: Index(['step', 'type', 'amount', 'oldbalanceOrg', 'newbalanceOrig',
       'oldbalanceDest', 'newbalanceDest', 'isFraud', 'isFlaggedFraud'],
      dtype='object')
isFraud
Choose appropriate Option:
1. Tune to determine best model using hyperparameter tuning
2. Train and test your custom model
1
Enter number of epochs (Try keeping between 40-100):
40
Enter batch size (between 10-20):
10
Input n:
4
Enter number of neurons in each layer
12
Continue?(Y/N)Y
Input n:
5
Enter number of neurons in each layer
10
Continue?(Y/N)N
Input Dropout percentage:
10
Continue?(Y/N)N
Input Learning rate:
0.001
Continue?(Y/N)Y
Input Learning rate:
0.002
Continue?(Y/N)N
Enter the value of the number of folds you want:3




Best: 0.966518 using {'p': 8, 'optimizer': 'Nadam', 'neuron': 10, 'n': 5, 'learning_rate': 0.001, 'init_mode': 'he_uniform', 'dropout_rate': 0.1, 'activation': 'tanh'}
0.693385 (0.014848) with: {'p': 8, 'optimizer': 'SGD', 'neuron': 12, 'n': 4, 'learning_rate': 0.001, 'init_mode': 'glorot_uniform', 'dropout_rate': 0.1, 'activation': 'relu'}
0.883519 (0.012010) with: {'p': 8, 'optimizer': 'RMSprop', 'neuron': 12, 'n': 5, 'learning_rate': 0.001, 'init_mode': 'glorot_normal', 'dropout_rate': 0.1, 'activation': 'sigmoid'}
0.784092 (0.014923) with: {'p': 8, 'optimizer': 'Adagrad', 'neuron': 10, 'n': 4, 'learning_rate': 0.002, 'init_mode': 'he_uniform', 'dropout_rate': 0.1, 'activation': 'softplus'}
0.910103 (0.009472) with: {'p': 8, 'optimizer': 'Adam', 'neuron': 10, 'n': 4, 'learning_rate': 0.001, 'init_mode': 'glorot_uniform', 'dropout_rate': 0.1, 'activation': 'softplus'}
0.928977 (0.001611) with: {'p': 8, 'optimizer': 'Adam', 'neuron': 12, 'n': 5, 'learning_rate': 0.002, 'init_mode': 'h

KeyboardInterrupt: ignored