In [1]:
import json
import os
import requests
import io
import pathlib
import math
import numpy as np
import glob
import shutil
from PIL import Image, ImageOps, ImageEnhance
from pprint import pprint
from collections import Counter
from datetime import datetime
from sklearn.model_selection import train_test_split

In [2]:
API_BASE_URL = 'http://fireeye-test-backend-container:9090/api/'
TF_SERVING_BASE_URL = 'http://fireeye-test-model-container:8501/'
task_id = '1ac1e8a095df4611af387d9934799251'
id_code_mapping = {
    'dbee3deebc5444f5b011da4e5518752c': '0',
    'edb4cb51d54644c08aa122d3f041bb0a': '1'}

In [3]:
def get_image_by_id(image_id):
    """Retrieve image by its ID."""
    r = requests.get(url=API_BASE_URL + 'image/' + image_id)
    if r.status_code == 200:
        return Image.open(io.BytesIO(r.content))
    else:
        raise RuntimeError(r.text)

In [4]:
import pprint
def get_image_records(task_id):
    """Fetch image records given a task ID."""
    resp = requests.get(
        url=API_BASE_URL + 'image',
        params={'task_id': task_id, 'has_truth': True}
    )
    if resp.status_code == 200:
        return resp.json()
    else:
        raise RuntimeError(resp.text)
image_records = get_image_records(task_id)
print(f'该类别下图片数量是：{len(image_records)}')

该类别下图片数量是：320


In [5]:
def crop_by_percentile(img, lower_percentile=5, upper_percentile=95):
    img_array = np.array(img.convert('L'))
    
    low_val, high_val = np.percentile(img_array, [lower_percentile,upper_percentile])
                         
    mask = np.logical_and(img_array > low_val, img_array < high_val)
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)
                         
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]

    cropped_img = img.crop((cmin, rmin, cmax, rmax))
    return cropped_img

In [6]:
def normalize_image(img: Image.Image) -> np.ndarray:
    img_array = np.array(img)
    return img_array / 255.0

In [7]:
image_dir = "./images"
Category0_dir = os.path.join(image_dir, 'Category0')
Category1_dir = os.path.join(image_dir, 'Category1')
if os.path.exists(Category0_dir):
    shutil.rmtree(Category0_dir)
if os.path.exists(Category1_dir):
    shutil.rmtree(Category1_dir)


os.makedirs(Category0_dir)
os.makedirs(Category1_dir)

In [8]:
def clear_and_create_directory(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)

base_dir = './images'

for set_name in ['train', 'test', 'val']:
    for category in ['Category0', 'Category1']:
        directory = os.path.join(base_dir, set_name, category)
        clear_and_create_directory(directory)

In [9]:
labels = [id_code_mapping[record['truth_id']] for record in image_records]


train_records, test_records, train_labels, test_labels = train_test_split(
    image_records, labels, test_size=0.3, stratify=labels, random_state=42)

train_records, val_records, train_labels, val_labels = train_test_split(
    train_records, train_labels, test_size=0.1, stratify=train_labels, random_state=42)


# Saving images for Test and Validation Sets without processing
for set_name, records in [('train', train_records), ('test', test_records), ('val', val_records)]:
    for record in records:
        try:
            img = get_image_by_id(record['id'])
            cropped_img = crop_by_percentile(img)
            normalized_img_array = np.array(cropped_img) / 255.0
            normalized_img = Image.fromarray((normalized_img_array * 255).astype(np.uint8))

            truth_id = record['truth_id']
            category = id_code_mapping[truth_id]

            directory = os.path.join(base_dir, set_name, f'Category{category}')
            file_path = os.path.join(directory, f'{record["id"]}.png')
            normalized_img.save(file_path, 'PNG')
        except Exception as e:
           print(f'Error processing image {record["id"]}. Error: {e}')

In [10]:
def download_image(image_id):
    response = requests.get(f"{API_BASE_URL}image/download/{image_id}")
    return response.content

In [11]:
def color_jitter(img: Image.Image, brightness=0.2, contrast=0.2, saturation=0.2) -> Image.Image:
    img = ImageEnhance.Brightness(img).enhance(1 + brightness * (2 * np.random.random() - 1))
    img = ImageEnhance.Contrast(img).enhance(1 + contrast * (2 * np.random.random() - 1))
    img = ImageEnhance.Color(img).enhance(1 + saturation * (2 * np.random.random() - 1))
    return img

In [12]:
def vertical_flip(img: Image.Image) -> Image.Image:
    return ImageOps.flip(img)

In [13]:
def horizontal_flip(img: Image.Image) -> Image.Image:
    return ImageOps.mirror(img)

In [14]:
train_directory = './images/train/'


def preprocess_and_save(img_path, image_id, category):
    img = Image.open(img_path)
    
    color_jittered = color_jitter(img)
    color_jittered_path = os.path.join(train_directory, category, f'{image_id}_colorjittered.png')
    color_jittered.save(color_jittered_path, 'PNG')

    vflipped = vertical_flip(img)
    vflipped_path = os.path.join(train_directory, category, f'{image_id}_vflipped.png')
    vflipped.save(vflipped_path, 'PNG')

    hflipped = horizontal_flip(img)
    hflipped_path = os.path.join(train_directory, category, f'{image_id}_hflipped.png')
    hflipped.save(hflipped_path, 'PNG')
#    img = get_image_by_id(image_id)
for record in train_records:
    image_id = record['id']
    truth_id = record['truth_id']
    category = f'Category{id_code_mapping[truth_id]}'
    img_path = os.path.join(train_directory, category, f'{image_id}.png')
    if os.path.exists(img_path):
        preprocess_and_save(img_path, image_id, category)
print('Data augmentation for the training set is complete')

Data augmentation for the training set is complete


In [15]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
import os

# Print TensorFlow version
print(tf.__version__)

# Data directories
train_dir = './images/train'
val_dir = './images/val'
test_dir = './images/test'

# Image dimensions
img_height, img_width = 218, 175
input_shape = (img_height, img_width, 3)
batch_size = 32

2.8.2


In [16]:
def build_dataset(data_dir, target=None):
    all_images = []
    all_labels = []
    for i in range(2):
        if target is not None and i != target:
            continue
        images = glob.glob(os.path.join(data_dir, f'Category{i}/*.png'), recursive=True)
        all_images.extend(images)
        all_labels.extend([i] * len(images))
    ds = tf.data.Dataset.from_tensor_slices((all_images, all_labels))
    ds = ds.map(lambda x,y: (tf.io.encode_base64(tf.io.read_file(x)) ,y))
    ds = ds.shuffle(100, seed=123)
    ds = ds.batch(batch_size)
    return ds

In [31]:
train_image_generator = ImageDataGenerator(rescale=1. / 255)
val_image_generator = ImageDataGenerator(rescale=1. / 255)
test_image_generator = ImageDataGenerator(rescale=1. / 255)

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(img_height, img_width),
                                                           class_mode='binary')

val_data_gen = val_image_generator.flow_from_directory(batch_size=batch_size,
                                                       directory=val_dir,
                                                       target_size=(img_height, img_width),
                                                       class_mode='binary')

test_data_gen = test_image_generator.flow_from_directory(batch_size=batch_size,
                                                         directory=test_dir,
                                                         target_size=(img_height, img_width),
                                                         class_mode='binary')


def process_base64_image(s):
    img = tf.io.decode_base64(s)
    img = tf.io.decode_png(img, channels=3)
    img = tf.image.resize(img, (img_height, img_width), antialias=True)    
    return img / 255.0


model = tf.keras.Sequential([

    # CNN Layers
    layers.Conv2D(16, 3, padding='same', activation='gelu', input_shape=(img_height, img_width, 3)),
    layers.MaxPooling2D(),

    layers.Conv2D(32, 3, padding='same', activation='gelu'),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, padding='same', activation='gelu'),
    layers.MaxPooling2D(),

    # Dense Layers
    layers.Flatten(),
    layers.Dense(256, activation='gelu', name="dense_layer1"),

    layers.Dense(1, activation='sigmoid', name="output_layer")
])


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Found 804 images belonging to 2 classes.
Found 23 images belonging to 2 classes.
Found 96 images belonging to 2 classes.


In [32]:
history = model.fit(train_data_gen, epochs=20, validation_data=val_data_gen)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
model.evaluate(train_data_gen)



[0.0002940577978733927, 1.0]

In [37]:
import pytz
from datetime import datetime

model_version =  datetime.now(pytz.timezone('Asia/Shanghai')).strftime('%Y%m%d%H%M%S')
tf.keras.models.save_model(
    model,
    f'/models/slot1/{model_version}/',
    overwrite=True,
)

INFO:tensorflow:Assets written to: /models/slot1/20230916095124/assets


In [35]:
#import base64
#from typing import List


#def image_to_tf_format(img: Image.Image) -> List[List[List[float]]]:
#    img = img.resize((218, 175)).convert('RGB')
#    img_array = np.array(img)
#    img_array = img_array / 255.0
#    return img_array.tolist()
    


#def predict_image(images: List[Image.Image]):
#    data = [image_to_tf_format(img) for img in images]

#    json_data ={
#      'signature_name': 'serving_default',
#      'instances': data
#    }
#    response = requests.post(url=TF_SERVING_BASE_URL+f'v1/models/slot1/versions/{model_version}:predict', # 根据部署地址填写
#                             json=json_data,
#                             headers={"content-type": "application/json"})
#    if response.status_code != 200:
#        raise RuntimeError('Request tf-serving failed: ' + response.text)
#    resp_data = json.loads(response.text)    
#    if 'predictions' not in resp_data:
#        print(f'Unexpected response form TensorFlow Serving: {resp_data}')
#        raise RuntimeError('Invalid response from TensorFlow Serving')
#    return resp_data['predictions']


#def test_image_model(test_dir, code, batch_size=10):    
#    image_paths = list(pathlib.Path(test_dir).joinpath(str(code)).glob('./*.png')) 
    
#    images = [Image.open(img_path) for img_path in image_paths]
#    codes = []
 #   for step in range(math.ceil(len(images)/batch_size)):
 #       outputs = predict_image(images[step*batch_size:(step+1)*batch_size])
 #       for i, o in zip(image_paths, outputs):            
 #           if o != code:
 #               print('错误图片：', i)
 #       codes.extend(outputs)
 #   accuracy = round(codes.count(code) / len(codes), 4)
 #   return accuracy, codes

#accuracy, codes = test_image_model(val_dir, 0)
#print('类别0的准确率', accuracy)
#print('类别0的测试结果', codes)
#accuracy, codes = test_image_model(val_dir, 1)
#print('类别1的准确率', accuracy)
#print('类别1的测试结果', codes)

In [36]:
import base64

def predict_image(images):
    bimages = []
    for image in images:
        with open(image, 'rb') as  fimage:
            content = fimage.read()
        bimage = base64.urlsafe_b64encode(content).decode()
        bimages.append(bimage)
    req_data ={
      'inputs': bimages,
    }
    response = requests.post(TF_SERVING_BASE_URL+f'v1/models/slot1/versions/{model_version}:predict', # 根据部署地址填写
                             json=req_data,
                             headers={"content-type": "application/json"})
    if response.status_code != 200:
        raise RuntimeError('Request tf-serving failed: ' + response.text)
    resp_data = json.loads(response.text)    
    if 'outputs' not in resp_data \
                        or type(resp_data['outputs']) is not list:
        raise ValueError('Malformed tf-serving response')
    outputs = np.argmax(resp_data['outputs'], axis=1).tolist()
    return outputs


def test_image_model(test_dir, code, batch_size=10):    
    images = list(pathlib.Path(test_dir, 'Category0').joinpath(str(code)).glob('./*.png')) 
    codes = []
    for step in range(math.ceil(len(images)/batch_size)):
        outputs = predict_image(images[step*batch_size:(step+1)*batch_size])
        for i, o in zip(images, outputs):            
            if o != code:
                print('错误图片：', i)
        codes.extend(outputs)
    accuracy = round(codes.count(code) / len(codes), 4)
    return accuracy, codes

accuracy, codes = test_image_model(test_dir, 0)
print('类别0的准确率', accuracy)
print('类别0的测试结果', codes)
accuracy, codes = test_image_model(test_dir, 1)
print('类别1的准确率', accuracy)
print('类别1的测试结果', codes)

ZeroDivisionError: division by zero