# Predict dataset

In [15]:
import os
import csv
import cv2
import shutil
import warnings

from tensorflow import keras
from PIL import Image
from PIL.Image import Resampling
import numpy as np

Set logging to only error

In [16]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings("ignore")

Prediction inputs

In [17]:
dataset_folder = 'data_set/'
model_path = 'diagrams.h5'
result_csv_path = 'csv/results_test.csv'

batch_size = 1000

In [None]:
class_map = {
    1: 'Activity Diagram',
    2: 'Sequence Diagram',
    3: 'Class Diagram',
    4: 'Component Diagram',
    5: 'Use Case Diagram',
    6: 'Cloud',
    0: 'None',
}

## Load Model

In [19]:
loaded_model = keras.models.load_model(model_path)

## Read images from directory

In [18]:
def batch_listdir(path: str, size=1):
    i = 0
    batch_list = []
    for element in os.scandir(path):
        if i < size:
            i += 1
            batch_list.append(element)
        else:
            yield batch_list
            i = 0
            batch_list = []

## Prediction

In [20]:
def evaluate_model(dataset_folder_path, model, writer, batch_size=1):
    """Loads a dataset of images
        - dataset_folder_path is the path of the folder that contains the images
        - csv_path is the path of the CSV file that contains the labels of the images
        Returns: X_data, y_labeled
        - X_data is a numpy.ndarray containing the pixel data of an image X
        - y_labeled is a numpy.ndarray containing an int, the label Y for the image X in that index
    """
    n = 0
    for image_batch in batch_listdir(dataset_folder_path, batch_size):
        X_data = []
        not_loaded = []
        print(f'\rReading batch #{n}, size: {len(image_batch)}', flush=True, end=' ' * 50)
        for i in range(len(image_batch)):
            image = get_normalize_image(image_batch[i].path)
            if image is None:
                not_loaded.append(i)
            else:
                X_data.append(image)

        for i in not_loaded:
            image_batch.pop(i)
        X_data = np.array(X_data, dtype=np.uint8)
        X_data = keras.applications.densenet.preprocess_input(X_data)
        print(f'\rProcessing batch #{n}, size: {len(X_data)}', flush=True, end=' ' * 50)
        prob = model.predict_on_batch(X_data)
        bests = prob.argmax(axis=-1)
        prob_text = [['{:.6f}'.format(v) for v in p] for p in prob]
        rows = [[image_batch[i].name, str(bests[i])] + prob_text[i] for i in range(len(image_batch))]

        writer.writerows(rows)
        n += 1

In [21]:
def get_normalize_image(path: str):
    try:
        img = keras.utils.load_img(path, target_size=(224, 224), interpolation="lanczos")
        img = keras.utils.img_to_array(img)

        return img
    except Exception as e:
        pass

### Create CSV to write th predictions

In [22]:
csv_file = open(result_csv_path, mode='w')
csv_writer = csv.writer(csv_file)

### Execute prediction

In [23]:
evaluate_model(dataset_folder, loaded_model, csv_writer, batch_size)
csv_file.close()

Processing batch #1, size: 1000                                                  

KeyboardInterrupt: 

## Validate results

In [None]:
def validate_predictions(csv_path: str, new_csv_path: str, dest_folder: str):
    new_dataset_file = open(new_csv_path, 'a', newline='')
    dataset_extra = csv.writer(new_dataset_file)
    with open(csv_path, mode='r') as file:
        csv_reader = csv.reader(file, delimiter=',')
        counter = 0
        for line in csv_reader:
            print(f'\rEvaluation #{counter}', flush=True, end=' ' * 50)
            category = int(line[1])
            prob = float(line[category + 2])
            image_name = line[0]
            counter += 1

            if category != 0 and prob < 0.5:
                image_path = f'{dataset_folder}{image_name}'
                image = cv2.imread(image_path, cv2.IMREAD_ANYCOLOR)
                cv2.imshow(str(class_map[category]), image)
                key = cv2.waitKey(0) & 0xFF
                if key == 13:
                    continue
                shutil.copy(image_path, os.path.join(dest_folder, image_name))
                dataset_extra.writerow([image_name, chr(key)])

validate_predictions(result_csv_path, 'csv/extra_images.csv', 'toProcess/')

Evaluation #107                                                  

In [None]:
validate_predictions(result_csv_path, 'csv/extra_images.csv', 'toProcess/')