In [25]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score

In [39]:
# Feedforward Neural Network
class FNN():
  # this method is a constructor and is used to create an instance of the feedforward neural network model containing the training data
  # parameters
  # learning_rate: this contains a float which is used to change how big of a jump the data will make
  # epoch: this contains a integer which is used to repeat the training epoch times
  # x: this contains the training dataset examples
  # y: this contains the training dataset answers
  def __init__(self, learning_rate, epoch, x, y):
    self.learning_rate = learning_rate
    self.epoch = epoch
    self.x = x
    self.y = y
    self.number_data, self.number_feature = x.shape
    self.input_weights = np.zeros((self.number_feature, 4))
    self.bias = 1
    self.bias_hidden = 1
    self.bias_output = 1
    self.hidden_weights = np.zeros(4)
    self.hidden_weights2 = np.zeros((4, 4))

  # this method is used for back propagation
  # parameters
  # input_hidden2: this contains an array that contains the input to hidden values but with relu applied to it
  # hidden2_hidden: this contains an array that contains the hidden to hidden values but with relu applied to it
  # output_output: this contains an array that contains the hidden to output values but with sigmoid applied to it
  def backward_pass(self, input_hidden2, hidden2_hidden,hidden_output):
    # Calculate errors
    # x
    output_error = hidden_output - self.y
    # x by 4
    hidden_error = np.outer(output_error, self.hidden_weights)
    # x by 4
    hidden_error2 = np.dot(hidden_error, self.hidden_weights2)

    # Calculating gradients. x by 4
    output_weights_gradient = (1/self.number_data) * np.dot(hidden2_hidden.T, output_error)
    output_grad_bias = (1/self.number_data) * np.sum(output_error)

    # x by 4
    hidden_weights_gradient = (1/self.number_data) * np.dot(input_hidden2.T, hidden_error)
    hidden_grad_bias = (1/self.number_data) * np.sum(hidden_error)

    # 4 by features
    hidden_weights_gradient2 = (1/self.number_data) * np.dot(self.x.T, hidden_error2)
    hidden_grad_bias2 = (1/self.number_data) * np.sum(hidden_error2)

    # Updating weights and biases
    self.hidden_weights -= self.learning_rate * output_weights_gradient
    self.bias_output -= self.learning_rate * output_grad_bias

    self.hidden_weights2 -= self.learning_rate * hidden_weights_gradient
    self.bias_hidden -= self.learning_rate * hidden_grad_bias

    self.input_weights -= self.learning_rate * hidden_weights_gradient2
    self.bias -= self.learning_rate * hidden_grad_bias2


  # trains the model
  def fit(self):
    for p in range(self.epoch):

      # input to hidden. x by 4
      hidden_input = np.dot(self.x, self.input_weights) + self.bias
      # x by 4
      relu_activation = np.maximum(0, hidden_input)

      # hidden2 to hidden. x by 4
      hidden_input2 = np.dot(relu_activation, self.hidden_weights2) + self.bias_hidden
      # x by 4
      relu_activation2 = 1 / (1 + np.exp(-hidden_input2))

      # hidden to output using sigmoid since it is a binary classification. x
      output_hidden = np.dot(relu_activation2, self.hidden_weights) + self.bias_output
      # x
      Sigmoid = 1 / (1 + np.exp(-output_hidden))
      # gradiants
      self.backward_pass(relu_activation, relu_activation2, Sigmoid)


  # finds the accuracy with the testing dataset
  # parameters
  # x_test: contains the testing dataset examples
  # y_test: contains the testing dataset answers
  def accuracy(self, x_test, y_test):
    # calculate the outputs
    hidden_input = np.dot(x_test, self.input_weights) + self.bias
    relu_activation = np.maximum(0, hidden_input)

    # hidden2 to hidden. x by 4
    hidden_input2 = np.dot(relu_activation, self.hidden_weights2) + self.bias_hidden
    # x by 4
    relu_activation2 = 1 / (1 + np.exp(-hidden_input2))

    # hidden to output
    output_hidden = np.dot(relu_activation2, self.hidden_weights) + self.bias_output
    Sigmoid = 1 / (1 + np.exp(-output_hidden))

    accuracy = 0
    TP = 0
    FP = 0
    TN = 0
    FN = 0
    # finds accuracy
    for i in range(len(Sigmoid)):
      # uses the threshold to determine if it is 1 or 0
      if Sigmoid[i] > .5:
        # checks if the guess was correct, if it was increment by 1
        print("Fraud")
        if y_test[i] == 1:
          accuracy += 1
          TP += 1
        else:
          FP += 1
      else:
        # checks if the guess was correct, if it was increment by 1
        print("Not Fraud")
        if y_test[i] == 0:
          accuracy += 1
          TN += 1
        else:
          FN += 1
    print(TP)
    print(FP)
    print(FN)
    f1 = TP / (TP + ((1/2) * (FP + FN)))
    # returns the percentage
    return (accuracy / len(y_test)) * 100, f1
  
  # this method will save the weights of the model into a python file
    def saveWeights(self):
        # this is a data type that stores key pair values so it stores w1 with the w1 weights
        save = {
            "w1": self.w1,
            "w2": self.w2
        }

        # opens the json file in write mode and closes it when finished
        with open("save.json", "w") as f:
            # stores save in the json file
            json.dump(save, f)


    # this method will load the weights of the model into the code
    def loadWeights(self):
        # opens the json file in read mode and closes it when finished
        with open("save.json", "r") as f:
            # loads what is stored in the json file into the save
            save = json.load(f)
            # set the weights stored in save
            self.w1 = save["w1"]
            self.w2 = save["w2"]

In [27]:
def get_data(filename):
    df = pd.read_csv(filename)
    x = df.drop(columns=['merchant','first','last','gender','street','city','state','zip','job','dob','trans_num','unix_time','trans_date_trans_time','cc_num'])
    # y = df['is_fraud']
    dataset=x
    # dataset = np.column_stack((x, y))
    #np.random.shuffle(dataset)
    # Split data
    train_size = int(0.7 * len(dataset))
    train_dataset, test_dataset = dataset[:train_size], dataset[train_size:]
    #features
    train_dataset=np.delete(train_dataset,0,axis=1)
    test_dataset=np.delete(test_dataset,0,axis=1)
    #targets
    train_target=train_dataset[:,7]
    test_target=test_dataset[:,7]

    train_dataset=np.delete(train_dataset,7,axis=1)
    test_dataset=np.delete(test_dataset,7,axis=1)

    return train_dataset, test_dataset, train_target, test_target

In [41]:
scaler = StandardScaler()

#output get_data
train_sample,test_sample,train_target,test_target=get_data('fraudTrain.csv')

train_sample_scaled = scaler.fit_transform(train_sample)
test_sample_scaled = scaler.transform(test_sample)

# Feedforward Neural Networks running
print("-----------------------------------FNN-----------------------------------")
Feedforward_Neural_Network = FNN(0.1, 100, train_sample_scaled, train_target)
Feedforward_Neural_Network.fit()

-----------------------------------FNN-----------------------------------


In [42]:
accuracy, f1 = Feedforward_Neural_Network.accuracy(test_sample_scaled, test_target)
print("Accuracy:",accuracy)
print("F1 Score:", f1)


Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud
Not Fraud


In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

ModuleNotFoundError: No module named 'tensorflow.python'

In [None]:
# reshape so it can match the input shape of the model
X_train_reshaped = np.reshape(train_sample, (train_sample.shape[0], 1, train_sample.shape[1]))
X_test_reshaped = np.reshape(test_sample, (test_sample.shape[0], 1, test_sample.shape[1]))

# creates the model
model = Sequential()
model.add(LSTM(4, input_shape=(1, 30)))
model.add(Dense(1))

# compiles the model
model.compile(optimizer='adam', loss='mean_squared_error')

# trains the model
history = model.fit(X_train_reshaped, train_target, epochs=1000, verbose=2)

# evaluates the model
loss = model.evaluate(X_test_reshaped, train_sample,test_sample,train_target,test_target=get_data('fraudTrain.csv')
)

In [None]:
# Import the model we are using
from sklearn.ensemble import RandomForestRegressor
# Instantiate model with 1000 decision trees
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
# Train the model on training data
rf.fit(train_sample, train_target)

In [None]:
# Make predictions
y_pred = rf.predict(test_sample)

accuracy = 0
# finds accuracy
for i in range(len(y_pred)):
  # uses the threshold to determine if it is 1 or 0
  if y_pred[i] > .5:
    # checks if the guess was correct, if it was increment by 1
    if test_target[i] == 1:
      accuracy += 1
  else:
    # checks if the guess was correct, if it was increment by 1
    if test_target[i] == 0:
      accuracy += 1

accuracy = (accuracy / len(test_target)) * 100

print("Accuracy:", accuracy)