Skip to content

Commit ecefa79

Browse files
committed
Added explanations to understand the Autoencoder implementation better
1 parent 563b60d commit ecefa79

File tree

1 file changed

+12
-20
lines changed

1 file changed

+12
-20
lines changed

Python Tensorflow Keras Fraud Detection Autoencoder.ipynb

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -332,10 +332,7 @@
332332
}
333333
],
334334
"source": [
335-
"\n",
336-
"\n",
337-
"pd.value_counts(df['Class'], sort = True) #class comparison 0=Normal 1=Fraud\n",
338-
"\n"
335+
"pd.value_counts(df['Class'], sort = True) #class comparison 0=Normal 1=Fraud"
339336
]
340337
},
341338
{
@@ -357,16 +354,13 @@
357354
}
358355
],
359356
"source": [
360-
"\n",
361-
"\n",
362357
"#if you don't have an intuitive sense of how imbalanced these two classes are, let's go visual\n",
363358
"count_classes = pd.value_counts(df['Class'], sort = True)\n",
364359
"count_classes.plot(kind = 'bar', rot=0)\n",
365360
"plt.xticks(range(2), LABELS)\n",
366361
"plt.title(\"Frequency by observation number\")\n",
367362
"plt.xlabel(\"Class\")\n",
368-
"plt.ylabel(\"Number of Observations\");\n",
369-
"\n"
363+
"plt.ylabel(\"Number of Observations\");"
370364
]
371365
},
372366
{
@@ -466,8 +460,6 @@
466460
}
467461
],
468462
"source": [
469-
"\n",
470-
"\n",
471463
"#plot of high value transactions\n",
472464
"bins = np.linspace(200, 2500, 100)\n",
473465
"plt.hist(normal_df.Amount, bins, alpha=1, normed=True, label='Normal')\n",
@@ -476,8 +468,7 @@
476468
"plt.title(\"Amount by percentage of transactions (transactions \\$200+)\")\n",
477469
"plt.xlabel(\"Transaction amount (USD)\")\n",
478470
"plt.ylabel(\"Percentage of transactions (%)\");\n",
479-
"plt.show()\n",
480-
"\n"
471+
"plt.show()"
481472
]
482473
},
483474
{
@@ -556,13 +547,10 @@
556547
"metadata": {},
557548
"outputs": [],
558549
"source": [
559-
"\n",
560-
"\n",
561550
"#data = df.drop(['Time'], axis=1) #if you think the var is unimportant\n",
562551
"df_norm = df\n",
563552
"df_norm['Time'] = StandardScaler().fit_transform(df_norm['Time'].values.reshape(-1, 1))\n",
564-
"df_norm['Amount'] = StandardScaler().fit_transform(df_norm['Amount'].values.reshape(-1, 1))\n",
565-
"\n"
553+
"df_norm['Amount'] = StandardScaler().fit_transform(df_norm['Amount'].values.reshape(-1, 1))"
566554
]
567555
},
568556
{
@@ -573,9 +561,9 @@
573561
"source": [
574562
"train_x, test_x = train_test_split(df_norm, test_size=TEST_PCT, random_state=RANDOM_SEED)\n",
575563
"train_x = train_x[train_x.Class == 0] #where normal transactions\n",
576-
"train_x = train_x.drop(['Class'], axis=1) #drop the class column\n",
577-
"\n",
564+
"train_x = train_x.drop(['Class'], axis=1) #drop the class column (as Autoencoder is unsupervised and does not need / use labels for training)\n",
578565
"\n",
566+
"# test_x (without class) for validation; test_y (with Class) for prediction + calculating MSE / reconstruction error\n",
579567
"test_y = test_x['Class'] #save the class column for the test set\n",
580568
"test_x = test_x.drop(['Class'], axis=1) #drop the class column\n",
581569
"\n",
@@ -615,12 +603,16 @@
615603
"nb_epoch = 5\n",
616604
"batch_size = 32\n",
617605
"\n",
606+
"\n",
607+
"# Autoencoder: 30 => 14 => 7 => 7 => 14 => 30 dimensions\n",
618608
"input_dim = train_x.shape[1] #num of columns, 30\n",
619609
"encoding_dim = 14\n",
620610
"hidden_dim = int(encoding_dim / 2) #i.e. 7\n",
621611
"learning_rate = 1e-7\n",
622612
"\n",
613+
"# Dense = fully connected layer \n",
623614
"input_layer = Input(shape=(input_dim, ))\n",
615+
"# First parameter is output units (14 then 7 then 7 then 30) : \n",
624616
"encoder = Dense(encoding_dim, activation=\"tanh\", activity_regularizer=regularizers.l1(learning_rate))(input_layer)\n",
625617
"encoder = Dense(hidden_dim, activation=\"relu\")(encoder)\n",
626618
"decoder = Dense(hidden_dim, activation='tanh')(encoder)\n",
@@ -665,11 +657,11 @@
665657
" write_graph=True,\n",
666658
" write_images=True)\n",
667659
"\n",
668-
"history = autoencoder.fit(train_x, train_x,\n",
660+
"history = autoencoder.fit(train_x, train_x, # Autoencoder => Input == Output dimensions!\n",
669661
" epochs=nb_epoch,\n",
670662
" batch_size=batch_size,\n",
671663
" shuffle=True,\n",
672-
" validation_data=(test_x, test_x),\n",
664+
" validation_data=(test_x, test_x), # Autoencoder => Input == Output dimensions!\n",
673665
" verbose=1,\n",
674666
" callbacks=[cp, tb]).history"
675667
]

0 commit comments

Comments
 (0)