In [3]:
from datetime import datetime
from glob import glob
import os

from keras import Input, Model
from keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, MaxPool1D
from keras.losses import SparseCategoricalCrossentropy
from keras.metrics import SparseCategoricalAccuracy
from keras.optimizers import Adam
from matplotlib import pyplot as plt
from numpy import interp
import numpy as np
import pandas as pd
from PIL import Image
import plotly.express as px
import seaborn as sns
from sklearn import metrics
from sklearn.preprocessing import label_binarize
import tensorflow as tf
import tensorflow.keras as k
from tensorflow.python.keras.callbacks import EarlyStopping
from tensorflow.python.ops.confusion_matrix import confusion_matrix


In [4]:
GPU_SET = 0

gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        if GPU_SET == 0:
    ## 1 필요한 만큼 메모리를 런타임에 할당
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        elif GPU_SET == 1:
    ## 2 GPU에 할당되는 전체 메모리 크기를 제한
            tf.config.experimental.set_virtual_device_configuration(
                    gpus[0],
                    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6144)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)


1 Physical GPUs, 1 Logical GPUs


2022-07-06 09:15:18.891055: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-06 09:15:18.920353: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-06 09:15:18.920630: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-06 09:15:18.922246: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [5]:
DEBUG = 1
SAVE = 1

BATCH = 32
EPOCH = 64
ES = 8

MIN, MAX = 0, 255

CLASS = [*range(0, 19, 1)]
CLASS


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

## FUNCTIONS

In [6]:
def log(l):
    if DEBUG == 1: print(l)


def draw_CM(label, predicted):
    cm = confusion_matrix(label, predicted)
    sns.heatmap(cm, annot=True, fmt="d")
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')

    # true : false rate
    true = 0
    false = 0
    for i, j in enumerate(label):
        if j != predicted[i]:
            false += 1
        else: true += 1
    classification_report = metrics.classification_report(label, predicted)
    multilabel_to_binary_matrics = metrics.multilabel_confusion_matrix(label, predicted)

    return plt.show(), print('true rate: ', true), print('false rate: ', false), print(), print('='*10,
                                                                                                'classification_report: ',
                                                                                                '\n',
                                                                                                classification_report), print(
            '='*10, 'multilabel_to_binary_matrics by class_num: ', '\n', '[[TN / FP] [FN / TP]]',
            '\n', multilabel_to_binary_matrics)


def draw_ROC_AUC(x, y, category_names):
    n_classes = len(category_names)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(n_classes):
        fpr[i], tpr[i], _ = metrics.roc_curve(y[:, i], x[:, i])
        roc_auc[i] = metrics.auc(fpr[i], tpr[i])

    fpr["micro"], tpr["micro"], _ = metrics.roc_curve(y.ravel(), x.ravel())
    roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"])

    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])

    plt.figure()
    plt.plot(fpr["micro"], tpr["micro"],
             label=f'micro-average ROC curve (area = {roc_auc["micro"]:0.2f})',
             color='deeppink', linestyle=':', linewidth=1)

    plt.plot(fpr["macro"], tpr["macro"],
             label=f'macro-average ROC curve (area = {roc_auc["macro"]:0.2f})',
             color='navy', linestyle=':', linewidth=1)

    colors = (['purple', 'pink', 'red', 'green', 'yellow', 'cyan', 'magenta', 'blue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=1, label=f'Class {i} ROC curve (area = {roc_auc[i]:0.2f})')

    plt.plot([0, 1], [0, 1], 'k--', lw=1)
    plt.xlim([-.01, 1.0])
    plt.ylim([0.0, 1.01])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC & AUC')
    plt.legend(loc="lower right")

    return plt.show()


## DATASET

In [7]:
data_dir = f"/media/z/0/MVPC10/DATA/v1.1/RAW/device_03"
file = f"refined_concat.csv"
df = pd.read_csv(file)
df.sort_values(by=df.keys()[0], inplace=True, ascending=True)
log(df.head())


            data  pre_label  label  location
0  1650315681891          0      0       NaN
1  1650315681978          0     -1       NaN
2  1650315682128          0     -1       NaN
3  1650315682196          0     -1       NaN
4  1650315736806          0      0       NaN


In [8]:
## SHUFFLE
df = df.sample(frac=1).reset_index(drop=True)

In [9]:
# base_dir = "out"
# img_list = glob(f"{base_dir}/*.png")
# df = pd.read_csv(f"{base_dir}/output(err_dropped).csv")
# log(df.head)

# col = list(df.columns)
# log(col)


In [10]:
## DROP ERROR
df1 = df[df.iloc[:, 2] > 0]

## PATH TO REAL_PATH
img_dir = f"/media/z/0/MVPC10/DATA/v1.2/out"
for i in range(len(df1)):
    df1.iloc[i, 0] = f"{img_dir}/{df1.iloc[i, 0]}.png"
log(df1.iloc[1441])

col = list(df1.columns)
log(col)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1.iloc[i, 0] = f"{img_dir}/{df1.iloc[i, 0]}.png"


data         /media/z/0/MVPC10/DATA/v1.2/out/1651530976724.png
pre_label                                                    0
label                                                        1
location                                                   NaN
Name: 2283, dtype: object
['data', 'pre_label', 'label', 'location']


In [11]:
## GET H,W
sample_img = Image.open(df1.iloc[16, 0])
img_array = np.array(sample_img, int)
H, W = img_array.shape

len(df1)

8873

## PRE-PROCESS

In [12]:
## DATASET TO TENSOR
data = []
label = []
for index, row in df1.iterrows():
    try:
        img = Image.open(row[col[0]])
        img = data.append(list(img.getdata()))
        lbl = label.append(row[col[1]])
        if index%1000 == 0:  log(index)
    except FileNotFoundError as FNFE:
        log(FNFE)

data = np.array(data)
data = data.reshape(data.shape[0], H, W, 1)

label = np.array(label)
label = label.reshape(label.shape[0], 1)

log(data.shape)
log(label.shape)

0
1000
3000
6000
9000
10000
13000
14000
(8873, 60, 48, 1)
(8873, 1)


In [13]:
# ## Shuffle
# seed = 99
# np.random.seed(seed)
# np.random.shuffle(data)
# # np.random.seed(seed)
# np.random.shuffle(label)


## Nomalize
# log(data[0][0])
norm_data = data/MAX
# norm_data = data.astype("float")/MAX
# log(norm_data[0][0])


## TEST SPLIT
split1 = int(len(label)*0.9)
train_data, test_data = norm_data[:split1], norm_data[split1:]
train_label, test_label = label[:split1], label[split1:]
## VAL SPLIT
split2 = int(len(label)*0.9)
# train_data, val_data = train_data[:split2], train_data[split2:]
# train_label, val_label = train_label[:split2], train_label[split2:]


## MODEL

In [15]:
input = Input(shape=(H, W, 1))

x = Conv2D(128, (3, 3))(input)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)

x = Conv2D(128, (3, 3))(x)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)

x = Conv2D(128, (3, 3))(x)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)

x = Conv2D(128, (3, 3))(x)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)

# x = Conv2D(128, (3, 3))(x)
# x = BatchNormalization()(x)
# x = Activation('selu')(x)
# x = Dropout(.1)(x)
#
# x = Conv2D(64, (3, 3))(x)
# x = BatchNormalization()(x)
# x = Activation('selu')(x)
# x = Dropout(.1)(x)
#
# x = Conv2D(64, (3, 3))(x)
# x = BatchNormalization()(x)
# x = Activation('selu')(x)
# x = Dropout(.1)(x)

x = Conv2D(64, (3, 3))(x)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)
#
x = Conv2D(32, (3, 3))(x)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)

x = Conv2D(16, (3, 3))(x)
x = BatchNormalization()(x)
x = Activation('selu')(x)
x = Dropout(.1)(x)

# x = MaxPool1D(1)

x = Flatten()(x)
# print(x.shape)

# x = Dense(8192, activation="selu")(x)
# x = Dense(4096, activation="selu")(x)
# x = Dense(2048, activation="selu")(x)
# x = Dense(1024, activation="selu")(x)
# x = Dense(512, activation="selu")(x)
# x = Dense(256, activation="selu")(x)
x = Dense(128, activation="selu")(x)
# x = Dense(64, activation="selu")(x)
x = Dense(32, activation="selu")(x)

x = Dropout(.5)(x)

output = Dense(len(CLASS), activation="softmax")(x)

model = Model(input, output)

### COMPILE

In [16]:
## OPTIMIZER

# lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#         initial_learning_rate=0.001,
#         decay_steps=100000,
#         decay_rate=0.96,
#         staircase=True)

lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
        initial_learning_rate=0.001,
        decay_steps=10000, )

        # optimizer = 'adam'
# optimizer = Adam(0.001)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)


In [17]:
## LOSS

# loss = 'sparse_categorical_crossentropy'
loss = SparseCategoricalCrossentropy()


In [18]:
## METRICS

# metrics = ['accuracy']
metrics = [SparseCategoricalAccuracy()]

In [19]:
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 60, 48, 1)]       0         
                                                                 
 conv2d_7 (Conv2D)           (None, 58, 46, 128)       1280      
                                                                 
 batch_normalization_7 (Batc  (None, 58, 46, 128)      512       
 hNormalization)                                                 
                                                                 
 activation_7 (Activation)   (None, 58, 46, 128)       0         
                                                                 
 dropout_8 (Dropout)         (None, 58, 46, 128)       0         
                                                                 
 conv2d_8 (Conv2D)           (None, 56, 44, 128)       147584    
                                                           

## TRAIN

In [None]:
## fit
log_path = "logs/"+datetime.now().strftime("%Y%m%d-%H%M%S")
es = EarlyStopping(monitor="val_loss", patience=ES, mode="auto", verbose=2)
tensorboard_callback = k.callbacks.TensorBoard(log_dir=log_path, histogram_freq=1)

history = model.fit(train_data, train_label,
                    validation_split=0.2,
                    # validation_data=(val_data, val_label),
                    batch_size=BATCH,
                    epochs=EPOCH,
                    verbose=1,
                    # callbacks=[es],)
                    callbacks=[es, tensorboard_callback],)
print(history)

Epoch 1/64


2022-07-06 09:18:08.266209: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8401
2022-07-06 09:18:09.556356: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-07-06 09:18:09.910209: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/64
Epoch 3/64
Epoch 4/64
Epoch 5/64
Epoch 6/64
Epoch 7/64
Epoch 8/64
Epoch 9/64
Epoch 10/64
Epoch 11/64

In [None]:
## history to DF
hdf = pd.DataFrame(history.history)
hdf.keys()

## plot history
hdf.plot(figsize=(9, 6), grid=1, xlabel="epoch", label="accuracy")
plt.ylim([0, 3])
plt.show()


In [None]:

# df = px.data.gapminder().query("continent=='Oceania'")
# fig = px.line(hdf, x=hdf.index, y=hdf.values, color=hdf.keys())
# fig.show()


In [None]:
test_data[1]


## EVALUATE

In [None]:
loss, acc = model.evaluate(test_data, test_label, verbose=1)

predict = model.predict(test_data)
predicted = np.argmax(predict, axis=1)

n = 10
print(predicted[:n])
print(test_label[:n].reshape([n]))


In [None]:
# predict = model.predict(train_data)
# predicted = np.argmax(predict, axis=1)
#
# draw_CM(train_label, predicted)


In [None]:
## CM
draw_CM(test_label, predicted)

## ROC, AUC
x = label_binarize(predicted, classes=CLASS)
y = label_binarize(test_label, classes=CLASS)
draw_ROC_AUC(x, y, CLASS)

In [None]:
N = 10
# N = len(predicted)
for i in range(N):
    NUM = i

    size = 10
    test_img = Image.fromarray((test_data[NUM]*255).reshape(H,W)).convert('L').resize((W*size, H*size))
    display(test_img)

    log(f"predicted: {predicted[NUM]}, label: {test_label[NUM][0]}")
    log(f"difference: {abs(predicted[NUM]-test_label[NUM][0])}")


In [None]:
if SAVE == 1:
  file_name = "model/mvpc10_"+datetime.now().strftime("%Y%m%d-%H%M%S")
  model_format = ".h5"
  model_name = file_name+model_format
  model.save(model_name)
