In [10]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pandas as pd



In [11]:
df = pd.read_csv('creditcard.csv')
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.16598e-15,3.416908e-16,-1.37315e-15,2.086869e-15,9.604066e-16,1.490107e-15,-5.556467e-16,1.177556e-16,-2.406455e-15,...,1.656562e-16,-3.44485e-16,2.578648e-16,4.471968e-15,5.340915e-16,1.687098e-15,-3.666453e-16,-1.220404e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [12]:
fraud = df[df.Class==1].index
no_of_frauds = len(fraud)

normal = df[df.Class==0].index
no_of_normal = len(normal)

print("Normal Cases {}".format(no_of_normal))
print("Fraud Cases {}".format(no_of_frauds))
print("Ratio Of Fraud {}".format(no_of_frauds/(no_of_frauds+no_of_normal)))

Normal Cases 284315
Fraud Cases 492
Ratio Of Fraud 0.001727485630620034


# Auto Encoders

In [13]:
train_set = df.sample(frac=0.75,replace=False,random_state=123)
test_set = df.loc[ set(df.index) - set(train_set.index)]

In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(df.drop(['Class','Time'],axis=1))
scaled_test_set = scaler.transform(test_set.drop(['Class','Time'],axis=1))
scaled_train_set = scaler.transform(train_set.drop(['Class','Time'],axis=1))
print("Length Of Train {}".format(len(scaled_train_set)))
print("Length Of Test {}".format(len(scaled_test_set)))

Length Of Train 213605
Length Of Test 71202


In [15]:
num_input = len(scaled_train_set[1])
num_hidden = 2
num_output = num_input

learning_rate = 0.01
keep_prob = 0.5
tf.compat.v1.reset_default_graph()

In [16]:
tf.compat.v1.disable_eager_execution()
x = tf.compat.v1.placeholder(tf.float32,shape = [None,num_input])

#weight

initializer = tf.compat.v1.variance_scaling_initializer()
w = tf.Variable(initializer([num_input,num_hidden]))
w_out = tf.Variable(initializer([num_hidden,num_output]))

b = tf.Variable(tf.zeros(num_hidden))
b_out = tf.Variable(tf.zeros(num_output))

#activation
act_func = tf.nn.tanh
#layer

hidden_layer = act_func(tf.matmul(x,w)+b)
dropout_layer = tf.nn.dropout(hidden_layer,rate=1-keep_prob)
output_layer = tf.matmul(dropout_layer,w_out) + b_out 

In [17]:
import numpy as np
loss = tf.reduce_mean(tf.abs(output_layer-x))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
init = tf.compat.v1.global_variables_initializer()

def next_batch(x_data,batch_size):
    rindex = np.random.choice(x_data.shape[0],batch_size,replace=False)
    x_batch = x_data[rindex,:]
    return x_batch

# Training

In [18]:
num_step = 10
batch_size = 150
num_batches = len(scaled_train_set)//batch_size
with tf.compat.v1.Session() as sess:
    sess.run(init)
    for step in range(num_step):
        for iteration in range(num_batches):
            x_batch = next_batch(scaled_train_set,batch_size)
            sess.run(train,feed_dict={x:scaled_train_set})
        if step % 1 == 0:
            err = loss.eval(feed_dict={x:scaled_train_set})
            print(step, "\tLoss : ",err)
            output_2d = hidden_layer.eval(feed_dict={x:scaled_train_set})
    output_2d = hidden_layer.eval(feed_dict={x:scaled_train_set})

KeyboardInterrupt: 

In [None]:
import seaborn as sns
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.figure(figsize=(20,8))
plt.scatter(output_2d[:,0],output_2d[:,1],c=train_set['Class'],alpha=0.7)

# Linear Classification


In [None]:
y = df.Class
x = df.drop(['Class','Time'],axis=1)
x_scaled = (x-x.min())/(x.max()-x.min()) #feature scaling 0-1

In [None]:
nv1 = tf.feature_column.numeric_column('V1')
nv2 = tf.feature_column.numeric_column('V2')
nv3 = tf.feature_column.numeric_column('V3')
nv4 = tf.feature_column.numeric_column('V4')
nv5 = tf.feature_column.numeric_column('V5')
nv6 = tf.feature_column.numeric_column('V6')
nv7 = tf.feature_column.numeric_column('V7')
nv8 = tf.feature_column.numeric_column('V8')
nv9 = tf.feature_column.numeric_column('V9')
nv10 = tf.feature_column.numeric_column('V10')
nv11 = tf.feature_column.numeric_column('V11')
nv12 = tf.feature_column.numeric_column('V12')
nv13 = tf.feature_column.numeric_column('V13')
nv14 = tf.feature_column.numeric_column('V14')
nv15 = tf.feature_column.numeric_column('V15')
nv16 = tf.feature_column.numeric_column('V16')
nv17 = tf.feature_column.numeric_column('V17')
nv18 = tf.feature_column.numeric_column('V18')
nv19 = tf.feature_column.numeric_column('V19')
nv20 = tf.feature_column.numeric_column('V20')
nv21 = tf.feature_column.numeric_column('V21')
nv22 = tf.feature_column.numeric_column('V22')
nv23 = tf.feature_column.numeric_column('V23')
nv24 = tf.feature_column.numeric_column('V24')
nv25 = tf.feature_column.numeric_column('V25')
nv26 = tf.feature_column.numeric_column('V26')
nv27 = tf.feature_column.numeric_column('V27')
nv28 = tf.feature_column.numeric_column('V28')
nv30 = tf.feature_column.numeric_column('Amount')


In [None]:
features = [nv1,nv2,nv3,nv4,nv5,nv6,nv7,nv8,nv9,nv10,nv11,nv12,nv13,nv14,nv15,nv16,nv17,nv18,nv19,nv20,nv21,nv22,nv23,nv24,nv25,nv26,nv27,nv28,nv30,]

In [None]:
X_train,X_test,y_train,y_test = train_test_split(x_scaled,y,train_size=0.75,random_state=101)

In [None]:
input_func = tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=100,num_epochs=1000,shuffle=True)
model = tf.compat.v1.estimator.LinearClassifier(feature_columns=features,n_classes=2)
model.train(input_func,steps=1000)
#test on training data
result = model.evaluate(tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1,shuffle=False))

In [None]:
print(result)

In [None]:
test_eval = tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_test,y=y_test,batch_size=10,num_epochs=1,shuffle=False)
result = model.evaluate(test_eval)


In [None]:
print("***************Results****************")
print(result)

In [None]:
result = model.predict(X_test.iloc[1])
print(y_test.iloc[1])