In [None]:
import cv2
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.callbacks import Callback, ModelCheckpoint


In [None]:
data_dir = 'data/sign_data'
train_dir = f'{data_dir}/train'
test_dir = f'{data_dir}/test'

train_df = pd.read_csv(f"{data_dir}/train_data.csv", header=None)
test_df = pd.read_csv(f"{data_dir}/test_data.csv", header=None)
train_df.columns = ["orig_path", "comp_path", "is_forg"]
test_df.columns = ["orig_path", "comp_path", "is_forg"]

metrics = ['accuracy', 'precision', 'recall', 'auc']

In [None]:
train_df.head()

In [4]:
filenames = pd.concat([train_df.orig_path, train_df.comp_path], axis=0).unique()
images = {}

_dir = train_dir

width, height = 224, 224
shape = (width, height)
for filename in filenames:
    image = cv2.imread(f'{_dir}/{filename}', cv2.IMREAD_GRAYSCALE)
    try:
        image = cv2.resize(image, shape)
    except Exception as e:
        print(f'ERROR: {_dir}/{filename}')
        raise e

    image = image.reshape(shape)
    image = np.asarray(image, dtype=np.float16)
    images[filename] = image

In [5]:
class DataLoader:
    def __init__(self, dataset: pd.DataFrame, batch_size: int, _dir: str, shape: (int, int)):
        self.dataset = dataset
        self.batch_size = batch_size
        self.dir = _dir
        self.shape = shape

    def shuffle(self):
        return self.dataset.sample(frac=1)

    def generate(self):
        num_samples = len(self.dataset)
        while True:
            self.dataset = self.shuffle()
            for batch_begin in range(1, num_samples, self.batch_size):
                batch = self.dataset[batch_begin:batch_begin + self.batch_size]
                origs = np.array([images[i] for i in batch['orig_path']], dtype=np.float32)
                comps = np.array([images[i] for i in batch['comp_path']], dtype=np.float32)
                is_forgs = np.asarray(batch['is_forg'])

                yield (origs, comps), is_forgs

In [39]:
from tensorflow.keras import layers, losses, metrics, optimizers
from tensorflow.keras import Model

x1 = layers.Input(shape=(width, height))
x2 = layers.Input(shape=(width, height))
# параллельная модель
inputs = layers.Input(shape=(width, height))
feature = layers.Conv1D(32, 3, activation='relu')(inputs)
feature = layers.MaxPooling1D(2)(feature)
feature = layers.Conv1D(64, 3, activation='relu')(feature)
feature = layers.MaxPooling1D(2)(feature)
feature = layers.Conv1D(128, 3, activation='relu')(feature)
feature = layers.MaxPooling1D(2)(feature)
feature_model = Model(inputs=inputs, outputs=feature)

x1_net = feature_model(x1)
x2_net = feature_model(x2)
net = layers.Subtract()([x1_net, x2_net])
net = layers.Conv1D(128, 3, activation='relu')(net)
net = layers.MaxPooling1D(2)(net)
net = layers.Flatten()(net)
net = layers.Dense(512, activation='relu')(net)
net = layers.Dense(1, activation='softmax')(net)
classifier = Model(inputs=[x1, x2], outputs=net)
classifier.compile(loss='binary_crossentropy',
                   optimizer=optimizers.Adam(), metrics=['accuracy', Precision(), Recall(), AUC()])
classifier.summary()
classifier

<Functional name=functional_31, built=True>

In [40]:
checkpoint_filepath = './checkpoints/best/signatures_e30.keras'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    verbose=1,
    save_freq='epoch'
)

In [41]:
batch_size = 512
train_set, val_set = train_test_split(train_df, test_size=0.6)

train_loader = DataLoader(dataset=train_set, batch_size=batch_size, _dir=f'{data_dir}/train', shape=(width, height))
val_loader = DataLoader(dataset=val_set, batch_size=batch_size, _dir=f'{data_dir}/train', shape=(width, height))
# next(train_loader.generate())

In [42]:

history = classifier.fit(
    train_loader.generate(),
    verbose=1,
    steps_per_epoch=int(len(train_set) / batch_size),
    epochs=30,
    validation_data=val_loader.generate(),
    validation_steps=int(len(val_set) / batch_size),
    callbacks=[model_checkpoint_callback]
)

Epoch 1/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445ms/step - accuracy: 0.5019 - auc_7: 0.5029 - loss: 19.6282 - precision_7: 0.4614 - recall_7: 0.5363
Epoch 1: val_loss improved from inf to 0.95461, saving model to ./checkpoints/best/signatures_e30.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 945ms/step - accuracy: 0.5021 - auc_7: 0.5030 - loss: 19.1640 - precision_7: 0.4615 - recall_7: 0.5340 - val_accuracy: 0.5430 - val_auc_7: 0.5052 - val_loss: 0.9546 - val_precision_7: 0.0000e+00 - val_recall_7: 0.0000e+00
Epoch 2/30
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 372ms/step - accuracy: 0.5525 - auc_7: 0.5438 - loss: 0.7634 - precision_7: 0.6739 - recall_7: 0.0305
Epoch 2: val_loss improved from 0.95461 to 0.68225, saving model to ./checkpoints/best/signatures_e30.keras
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 785ms/step - accuracy: 0.5531 - auc_7: 0.5441 - loss: 0.7608 - precision_7: 0.672

In [17]:
classifier.save_weights('./checkpoints/checkpoint1.ckpt')

In [11]:
import os

os.getcwd()

'D:\\python\\DataProcessingLabs'