In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import tensorflow as ts
from tensorflow import keras

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv('creditcard.csv')

In [5]:
X = df.drop(columns=['Class'], axis=1)
Y = df['Class'] 

In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [7]:
weight_0 = 1.0/len(df[df['Class']==0])
weight_1 = 1.0/len(df[df['Class']==1])
print(weight_0, weight_1)

3.51722561243691e-06 0.0020325203252032522


In [8]:
from sklearn.preprocessing import normalize

In [61]:
mean = np.mean(x_train, axis=0)
x_train -= mean
x_test -= mean
std = np.std(x_train, axis=0)
x_train_norma /= std
x_test_norma /= std

### Строим нейронку

#### Создаем модель

In [62]:
hid_size = 256
input_shape = x_train_norma.shape[-1]

model = keras.Sequential(
    [
        keras.layers.Dense(hid_size, activation='relu', input_shape=(input_shape,)),
        keras.layers.Dense(hid_size, activation="relu"), # y^2
        keras.layers.Dropout(0.3),
        keras.layers.Dense(hid_size, activation="relu"), # y^3
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"), # y^4
    ]
)
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 256)               7936      
                                                                 
 dense_9 (Dense)             (None, 256)               65792     
                                                                 
 dropout_4 (Dropout)         (None, 256)               0         
                                                                 
 dense_10 (Dense)            (None, 256)               65792     
                                                                 
 dropout_5 (Dropout)         (None, 256)               0         
                                                                 
 dense_11 (Dense)            (None, 1)                 257       
                                                                 
Total params: 139,777
Trainable params: 139,777
Non-tr

#### Метрики

In [63]:
metrics= [
    keras.metrics.FalseNegatives(name='fn'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
]

#### Компиляция модели (перевод из высокоуровневой оболочки в бинарный вид)

In [64]:
model.compile(
    optimizer = keras.optimizers.Adam(1e-2), loss = 'binary_crossentropy', metrics =metrics
)

#### CallBack (сохраняем информацию по эпохам)

In [65]:
callback = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.h5")]

#### Обучение модели

In [66]:
class_weight = {0: weight_0, 1: weight_1}

model.fit(
    x_train_norma,
    y_train,
    batch_size = 2048,
    epochs = 30,
    verbose = 2,
    callbacks=callback,
    validation_data = (x_test_norma, y_test),
    class_weight = class_weight,
)


Epoch 1/30
112/112 - 41s - loss: 4.8737e-06 - fn: 287.0000 - fp: 72073.0000 - tn: 155378.0000 - tp: 107.0000 - precision: 0.0015 - recall: 0.2716 - val_loss: 0.7067 - val_fn: 0.0000e+00 - val_fp: 56864.0000 - val_tn: 0.0000e+00 - val_tp: 98.0000 - val_precision: 0.0017 - val_recall: 1.0000 - 41s/epoch - 368ms/step
Epoch 2/30
112/112 - 39s - loss: 4.8713e-06 - fn: 74.0000 - fp: 182520.0000 - tn: 44931.0000 - tp: 320.0000 - precision: 0.0018 - recall: 0.8122 - val_loss: 0.6604 - val_fn: 98.0000 - val_fp: 0.0000e+00 - val_tn: 56864.0000 - val_tp: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - 39s/epoch - 347ms/step
Epoch 3/30
112/112 - 40s - loss: 4.8791e-06 - fn: 318.0000 - fp: 47657.0000 - tn: 179794.0000 - tp: 76.0000 - precision: 0.0016 - recall: 0.1929 - val_loss: 0.6931 - val_fn: 98.0000 - val_fp: 115.0000 - val_tn: 56749.0000 - val_tp: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - 40s/epoch - 358ms/step
Epoch 4/30
112/112 - 40s - loss: 4.8721e

Epoch 27/30
112/112 - 54s - loss: 4.8722e-06 - fn: 63.0000 - fp: 194229.0000 - tn: 33222.0000 - tp: 331.0000 - precision: 0.0017 - recall: 0.8401 - val_loss: 0.6812 - val_fn: 98.0000 - val_fp: 0.0000e+00 - val_tn: 56864.0000 - val_tp: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - 54s/epoch - 481ms/step
Epoch 28/30
112/112 - 60s - loss: 4.8737e-06 - fn: 340.0000 - fp: 33231.0000 - tn: 194220.0000 - tp: 54.0000 - precision: 0.0016 - recall: 0.1371 - val_loss: 0.6982 - val_fn: 0.0000e+00 - val_fp: 56864.0000 - val_tn: 0.0000e+00 - val_tp: 98.0000 - val_precision: 0.0017 - val_recall: 1.0000 - 60s/epoch - 540ms/step
Epoch 29/30
112/112 - 65s - loss: 4.8739e-06 - fn: 50.0000 - fp: 198312.0000 - tn: 29139.0000 - tp: 344.0000 - precision: 0.0017 - recall: 0.8731 - val_loss: 0.6719 - val_fn: 98.0000 - val_fp: 0.0000e+00 - val_tn: 56864.0000 - val_tp: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - 65s/epoch - 584ms/step
Epoch 30/30
112/112 - 48s - loss: 4.

<keras.callbacks.History at 0x1126e84d220>

In [67]:
model.evaluate(x_test, y_test)



[1277.1666259765625, 0.0, 56864.0, 0.0, 98.0, 0.001720445230603218, 1.0]

In [58]:
res = model.predict(x_test[:2])
res

array([[1.],
       [1.]], dtype=float32)

In [59]:
y_test[:2]

43428    1
49906    0
Name: Class, dtype: int64

In [50]:
df_res = pd.DataFrame(res, columns=['pred'])
df_res

Unnamed: 0,pred
0,1.0
1,1.0
2,1.0
3,1.0
4,1.0
...,...
56957,1.0
56958,1.0
56959,1.0
56960,1.0


In [49]:
y_test.value_counts()

0    56864
1       98
Name: Class, dtype: int64