# Predict dataset

In [1]:
import os
import csv
import cv2
import shutil
import warnings

from tensorflow import keras
import numpy as np

Set logging to only error

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings("ignore")

Prediction inputs

In [3]:
dataset_folder = 'data_set/'
model_path = 'tuned.h5'
result_csv_path = 'csv/results.csv'

batch_size = 1000

In [4]:
class_map = {
    1: 'Activity Diagram',
    2: 'Sequence Diagram',
    3: 'Class Diagram',
    4: 'Component Diagram',
    5: 'Use Case Diagram',
    6: 'Cloud',
    0: 'None',
}

## Load Model

In [5]:
loaded_model = keras.models.load_model(model_path)

Metal device set to: Apple M1 Pro


## Read images from directory

In [6]:
def batch_listdir(path: str, size=1):
    i = 0
    batch_list = []
    for element in os.scandir(path):
        if i < size:
            i += 1
            batch_list.append(element)
        else:
            yield batch_list
            i = 0
            batch_list = []

## Prediction

In [7]:
def evaluate_model(dataset_folder_path, model, writer, batch_size=1):
    """Loads a dataset of images
        - dataset_folder_path is the path of the folder that contains the images
        - csv_path is the path of the CSV file that contains the labels of the images
        Returns: X_data, y_labeled
        - X_data is a numpy.ndarray containing the pixel data of an image X
        - y_labeled is a numpy.ndarray containing an int, the label Y for the image X in that index
    """
    n = 0
    processed_images = 0
    for image_batch in batch_listdir(dataset_folder_path, batch_size):
        X_data = []
        not_loaded = []
        print(f'\rReading batch #{n}, size: {len(image_batch)}, processed images: {processed_images}', flush=True, end=' ' * 50)
        for i in range(len(image_batch)):
            image = get_normalize_image(image_batch[i].path)
            if image is None:
                not_loaded.append(i)
            else:
                processed_images += 1
                X_data.append(image)

        for i in not_loaded:
            image_batch.pop(i)
        X_data = np.array(X_data, dtype=np.uint8)
        X_data = keras.applications.densenet.preprocess_input(X_data)
        print(f'\rProcessing batch #{n}, size: {len(X_data)}, processed images: {processed_images}', flush=True, end=' ' * 50)
        prob = model.predict_on_batch(X_data)
        bests = prob.argmax(axis=-1)
        prob_text = [['{:.6f}'.format(v) for v in p] for p in prob]
        rows = [[image_batch[i].name, str(bests[i])] + prob_text[i] for i in range(len(image_batch))]

        writer.writerows(rows)
        n += 1

In [8]:
def get_normalize_image(path: str):
    try:
        img = keras.utils.load_img(path, target_size=(224, 224), interpolation="lanczos")
        img = keras.utils.img_to_array(img)

        return img
    except Exception as e:
        pass

### Create CSV to write th predictions

In [9]:
csv_file = open(result_csv_path, mode='w')
csv_writer = csv.writer(csv_file)

### Execute prediction

In [10]:
evaluate_model(dataset_folder, loaded_model, csv_writer, batch_size)
csv_file.close()

Processing batch #2465, size: 1000, processed images: 2464274                                                  

## Validate results

In [11]:
def validate_predictions(csv_path: str, new_csv_path: str, dest_folder: str, show: bool):
    new_dataset_file = open(new_csv_path, 'w', newline='')
    dataset_extra = csv.writer(new_dataset_file)
    with open(csv_path, mode='r') as file:
        csv_reader = csv.reader(file, delimiter=',')
        counter = 0
        found = 0
        for line in csv_reader:
            if counter % 1000 == 0:
                print(f'\rEvaluation #{counter}', flush=True, end='')
            category = int(line[1])
            prob = float(line[category + 2])
            image_name = line[0]
            counter += 1

            if prob < 0.3:
                found += 1
                if show:
                    image_path = f'{dataset_folder}{image_name}'
                    image = cv2.imread(image_path, cv2.IMREAD_ANYCOLOR)
                    cv2.imshow(f'{category}: {class_map[category]}, {prob:.3f}', image)
                    key = cv2.waitKey(0) & 0xFF
                    cv2.destroyAllWindows()
                    if ord('0') <= key <= ord('9') :
                        shutil.copy(image_path, os.path.join(dest_folder, image_name))
                        dataset_extra.writerow([image_name, chr(key)])
        print(f'\nFound: {found}')


In [12]:
validate_predictions(result_csv_path, 'csv/extra_images.csv', 'toProcess/', True)

Evaluation #772000



Evaluation #865000



Evaluation #1542000



Evaluation #2212000



Evaluation #2464000
Found: 224
