In [42]:
import json
import os
import requests
import io
import pathlib
import math
import numpy as np
import glob
import shutil
from PIL import Image, ImageOps, ImageEnhance
from pprint import pprint
from collections import Counter
from datetime import datetime
from sklearn.model_selection import train_test_split

In [43]:
API_BASE_URL = 'http://fireeye-test-backend-container:9090/api/'
TF_SERVING_BASE_URL = 'http://fireeye-test-model-container:8501/'
task_id = '1ac1e8a095df4611af387d9934799251'
id_code_mapping = {
    'dbee3deebc5444f5b011da4e5518752c': '0',
    'edb4cb51d54644c08aa122d3f041bb0a': '1'}

In [44]:
def get_image_by_id(image_id):
    """Retrieve image by its ID."""
    r = requests.get(url=API_BASE_URL + 'image/' + image_id)
    if r.status_code == 200:
        return Image.open(io.BytesIO(r.content))
    else:
        raise RuntimeError(r.text)

In [45]:
import pprint
def get_image_records(task_id):
    """Fetch image records given a task ID."""
    resp = requests.get(
        url=API_BASE_URL + 'image',
        params={'task_id': task_id, 'has_truth': True}
    )
    if resp.status_code == 200:
        return resp.json()
    else:
        raise RuntimeError(resp.text)
image_records = get_image_records(task_id)
print(f'该类别下图片数量是：{len(image_records)}')

该类别下图片数量是：318


In [46]:
def crop_by_percentile(img, lower_percentile=5, upper_percentile=95):
    img_array = np.array(img.convert('L'))
    
    low_val, high_val = np.percentile(img_array, [lower_percentile,upper_percentile])
                         
    mask = np.logical_and(img_array > low_val, img_array < high_val)
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
                         
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]

    cropped_img = img.crop((cmin, rmin, cmax, rmax))
    return cropped_img

In [47]:
def normalize_image(img: Image.Image) -> np.ndarray:
    img_array = np.array(img)
    return img_array / 255.0

In [48]:
image_dir = "./images"
Category0_dir = os.path.join(image_dir, 'Category0')
Category1_dir = os.path.join(image_dir, 'Category1')
if os.path.exists(Category0_dir):
    shutil.rmtree(Category0_dir)
if os.path.exists(Category1_dir):
    shutil.rmtree(Category1_dir)


os.makedirs(Category0_dir)
os.makedirs(Category1_dir)

In [49]:
def clear_and_create_directory(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)

base_dir = './images'

for set_name in ['train', 'test', 'val']:
    for category in ['Category0', 'Category1']:
        directory = os.path.join(base_dir, set_name, category)
        clear_and_create_directory(directory)

In [50]:
labels = [id_code_mapping[record['truth_id']] for record in image_records]


train_records, test_records, train_labels, test_labels = train_test_split(
    image_records, labels, test_size=0.3, stratify=labels, random_state=42)

train_records, val_records, train_labels, val_labels = train_test_split(
    train_records, train_labels, test_size=0.1, stratify=train_labels, random_state=42)


for set_name, records in [('train', train_records), ('test', test_records), ('val', val_records)]:
    for record in records:
        try:
            img = get_image_by_id(record['id'])
            cropped_img = crop_by_percentile(img)
            normalized_img_array = np.array(cropped_img) / 255.0
            normalized_img = Image.fromarray((normalized_img_array * 255).astype(np.uint8))

            truth_id = record['truth_id']
            category = id_code_mapping[truth_id]

            directory = os.path.join(base_dir, set_name, f'Category{category}')
            file_path = os.path.join(directory, f'{record["id"]}.png')
            normalized_img.save(file_path, 'PNG')
        except Exception as e:
            print(f'Error processing image {record["id"]}. Error: {e}')

In [10]:
def download_image(image_id):
    response = requests.get(f"{API_BASE_URL}image/download/{image_id}")
    return response.content

In [11]:
def color_jitter(img: Image.Image, brightness=0.2, contrast=0.2, saturation=0.2) -> Image.Image:
    img = ImageEnhance.Brightness(img).enhance(1 + brightness * (2 * np.random.random() - 1))
    img = ImageEnhance.Contrast(img).enhance(1 + contrast * (2 * np.random.random() - 1))
    img = ImageEnhance.Color(img).enhance(1 + saturation * (2 * np.random.random() - 1))
    return img

In [12]:
def vertical_flip(img: Image.Image) -> Image.Image:
    return ImageOps.flip(img)

In [13]:
def horizontal_flip(img: Image.Image) -> Image.Image:
    return ImageOps.mirror(img)

In [14]:
train_directory = './images/train/'


def preprocess_and_save(img, image_id, category):
    color_jittered = color_jitter(img)
    color_jittered_path = os.path.join(train_directory, category, f'{image_id}_colorjittered.png')
    color_jittered.save(color_jittered_path, 'PNG')

    vflipped = vertical_flip(img)
    vflipped_path = os.path.join(train_directory, category, f'{image_id}_vflipped.png')
    vflipped.save(vflipped_path, 'PNG')

    hflipped = horizontal_flip(img)
    hflipped_path = os.path.join(train_directory, category, f'{image_id}_hflipped.png')
    hflipped.save(hflipped_path, 'PNG')

for record in train_records:
    image_id = record['id']
    img = get_image_by_id(image_id)
    truth_id = record['truth_id']
    category = f'Category{id_code_mapping[truth_id]}'
    preprocess_and_save(img, image_id, category)

In [25]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print(tf.__version__)
train_dir = './images/train'
val_dir = './images/val'
test_dir = './images/test'

img_height, img_width = 218, 175
input_shape = (img_height, img_width, 3)
BATCH_SIZE = 32

2.8.2


In [26]:
train_image_generator = ImageDataGenerator(rescale=1. / 255)
val_image_generator = ImageDataGenerator(rescale=1. / 255)
test_image_generator = ImageDataGenerator(rescale=1. / 255)

train_data_gen = train_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(img_height, img_width),
                                                           class_mode='binary')

val_data_gen = val_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
                                                       directory=val_dir,
                                                       target_size=(img_height, img_width),
                                                       class_mode='binary')

test_data_gen = test_image_generator.flow_from_directory(batch_size=BATCH_SIZE,
                                                         directory=test_dir,
                                                         target_size=(img_height, img_width),
                                                         class_mode='binary')


def create_advanced_cnn(input_shape):
    input_layer = Input(shape=input_shape)

    x = Conv2D(32, (3, 3), activation='relu')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input_layer, outputs=output)
    return model


model = create_advanced_cnn(input_shape)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


Found 796 images belonging to 2 classes.
Found 23 images belonging to 2 classes.
Found 110 images belonging to 3 classes.


2023-09-14 02:21:08.012080: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-09-14 02:21:08.012119: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-09-14 02:21:08.012135: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (58c881efe57c): /proc/driver/nvidia/version does not exist
2023-09-14 02:21:08.012373: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [27]:
history = model.fit(
    train_data_gen,
    steps_per_epoch=train_data_gen.samples // BATCH_SIZE,
    epochs=30,
    validation_data=val_data_gen,
    validation_steps=val_data_gen.samples // BATCH_SIZE
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [29]:
test_loss, test_accuracy = model.evaluate(test_data_gen)
print(f'Test accuracy: {test_accuracy}')
model_path = "./saved_model/my_model"
model.save(model_path)
print("Model saved to", model_path)

Test accuracy: 0.00909090880304575
INFO:tensorflow:Assets written to: ./saved_model/my_model/assets
Model saved to ./saved_model/my_model


In [30]:
import pytz
from datetime import datetime

#model_version = datetime.now(pytz.timezone('Asia/Shanghai')).strftime('%Y%m%d%H%M%S')
#tf.keras.models.save_model(
#    model,
#    f'/models/slot1/{model_version}/',
#    overwrite=True,
#)
model_version = datetime.now(pytz.timezone('Asia/Shanghai')).strftime('%Y%m%d%H%M%S')
model_save_path = f'/models/slot1/{model_version}/'

tf.keras.models.save_model(
    model,
    model_save_path,
    overwrite=True
)

INFO:tensorflow:Assets written to: /models/slot1/20230914102913/assets


In [51]:
import base64
import json
import os
import requests
import io
import PIL
import PIL.Image as PImage
from PIL import ImageEnhance
import math
from pprint import pprint
import glob
from collections import Counter
from datetime import datetime
import shutil
import numpy as np
import math


def predict_image(images):
    bimages = []
    for image in images:
        with open(image, 'rb') as fimage:
            content = fimage.read()
        bimage = base64.urlsafe_b64encode(content).decode()
        bimages.append(bimage)
    req_data ={
      'inputs': bimages,
    }
    response = requests.post(TF_SERVING_BASE_URL+f'v1/models/slot1/versions/{model_version}:predict', # 根据部署地址填写
                             json=req_data,
                             headers={"content-type": "application/json"})
    if response.status_code != 200:
        raise RuntimeError('Request tf-serving failed: ' + response.text)
    resp_data = json.loads(response.text)    
    if 'outputs' not in resp_data \
                        or type(resp_data['outputs']) is not list:
        raise ValueError('Malformed tf-serving response')
    outputs = np.argmax(resp_data['outputs'], axis=1).tolist()
    return outputs


def test_image_model(test_dir, code, batch_size=10):    
    images = list(pathlib.Path(test_dir).joinpath(str(code)).glob('./*.png')) 
    codes = []
    for step in range(math.ceil(len(images)/batch_size)):
        outputs = predict_image(images[step*batch_size:(step+1)*batch_size])
        for i, o in zip(images, outputs):            
            if o != code:
                print('错误图片：', i)
        codes.extend(outputs)
    try:
        accuracy = round(codes.count(code) / len(codes), 4)
    except ZeroDivisionError:
        print('No codes avai')
        accuracy = 0
    return accuracy, codes

accuracy, codes = test_image_model(test_dir, 0)
print('类别0的准确率', accuracy)
print('类别0的测试结果', codes)
accuracy, codes = test_image_model(test_dir, 1)
print('类别1的准确率', accuracy)
print('类别1的测试结果', codes)

nonono
类别0的准确率 0
类别0的测试结果 []
nonono
类别1的准确率 0
类别1的测试结果 []
