In [1]:
import numpy as np
import pandas as pd
import os
import math
from glob import glob

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adamax
from keras.utils import plot_model

import sklearn.metrics
from sklearn.metrics import confusion_matrix, f1_score, classification_report
from sklearn.linear_model import LinearRegression

import shutil
from tqdm import tqdm
import albumentations as A




In [2]:
root_path = "d:/ham/"
original_test_df_path = root_path + "ham_test_dataframe/df_test_original.csv"
original_test_df = pd.read_csv(original_test_df_path)
original_test_df["labels"] = original_test_df["labels"].astype(str) #y_col="labels" column values must be type string, list or tuple

test_df_path = root_path + "ham_test_dataframe/df_test_ita.csv"
test_df = pd.read_csv(test_df_path)
test_df["labels"] = test_df["labels"].astype(str)

model_path = "d:/ham/ham_work_folder/saved_model/"

In [16]:
img_size = (310,640)
batch_size=16
ycol='labels'

trgen=ImageDataGenerator(horizontal_flip=True)
t_and_v_gen=ImageDataGenerator()

def make_generator(df, xcol):
    length=len(df)
    test_batch_size=sorted([int(length/n) for n in range(1,length+1) if length % n ==0 and length/n<=80],reverse=True)[0]
    test_steps=int(length/test_batch_size)
    test_gen=t_and_v_gen.flow_from_dataframe(df, x_col=xcol, y_col=ycol, target_size=img_size,
                                   class_mode='categorical', color_mode='rgb', shuffle=False, batch_size=test_batch_size)
    return test_gen

# Image data generator
original_test_gen = make_generator(original_test_df, "filepaths")
test_gen = make_generator(test_df, "filepaths")

Found 259 validated image filenames belonging to 7 classes.
Found 25900 validated image filenames belonging to 7 classes.


In [17]:
def F1_score(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [18]:
def print_in_color(txt_msg,fore_tupple=(0,255,255),back_tupple=(100,100,100)):
    #prints the text_msg in the foreground color specified by fore_tupple with the background specified by back_tupple
    #text_msg is the text, fore_tupple is foregroud color tupple (r,g,b), back_tupple is background tupple (r,g,b)
    # default parameter print in cyan foreground and gray background
    rf,gf,bf=fore_tupple
    rb,gb,bb=back_tupple
    msg='{0}' + txt_msg
    mat='\33[38;2;' + str(rf) +';' + str(gf) + ';' + str(bf) + ';48;2;' + str(rb) + ';' +str(gb) + ';' + str(bb) +'m'
    print(msg .format(mat), flush=True)
    print('\33[0m', flush=True) # returns default print color to back to black
    return

## Load trained model

In [19]:
# load trained model
model = tf.keras.models.load_model(model_path, custom_objects={'F1_score': F1_score})

# Needs to compile again as for lr setting
lr=.001
model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy', F1_score, 'AUC'])

In [20]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 310, 640, 3)]        0         []                            
                                                                                                  
 rescaling (Rescaling)       (None, 310, 640, 3)          0         ['input_1[0][0]']             
                                                                                                  
 normalization (Normalizati  (None, 310, 640, 3)          7         ['rescaling[0][0]']           
 on)                                                                                              
                                                                                                  
 rescaling_1 (Rescaling)     (None, 310, 640, 3)          0         ['normalization[0][0]']   

In [21]:
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [22]:
# To check
model.layers[-1].get_weights()[0]

array([[ 0.05559245, -0.18024282,  0.12250493, ..., -0.0295473 ,
        -0.17020807, -0.15949364],
       [-0.12429732, -0.23267792, -0.21331936, ...,  0.23716791,
         0.23557101, -0.042546  ],
       [ 0.05453651, -0.10753678,  0.13634595, ...,  0.07818402,
        -0.02538554,  0.02428198],
       ...,
       [-0.1847724 , -0.11073647,  0.55943775, ...,  0.05239721,
        -0.440774  , -0.46829966],
       [-0.23853803,  0.51700175, -0.27631506, ..., -0.24088919,
        -0.31163523, -0.02179029],
       [-0.332065  , -0.34693554, -0.32986775, ..., -0.3523678 ,
        -0.49162748,  0.6808193 ]], dtype=float32)

## Prediction

In [23]:
def predictor(test_gen):
    y_pred= []
    error_list=[]
    error_pred_list = []
    y_true=test_gen.labels
    classes=list(test_gen.class_indices.keys())
    class_count=len(classes)
    errors=0
    preds=model.predict(test_gen, verbose=1)
    tests=len(preds)
    for i, p in enumerate(preds):
        pred_index=np.argmax(p)
        true_index=test_gen.labels[i]  # labels are integer values
        if pred_index != true_index: # a misclassification has occurred
            errors=errors + 1
            file=test_gen.filenames[i]
            error_list.append(file)
            error_class=classes[pred_index]
            error_pred_list.append(error_class)
        y_pred.append(pred_index)

    acc=( 1-errors/tests) * 100
    msg=f'there were {errors} errors in {tests} tests for an accuracy of {acc:6.2f}'
    print_in_color(msg, (0,255,255), (100,100,100)) # cyan foreground
    ypred=np.array(y_pred)
    ytrue=np.array(y_true)
    f1score=f1_score(ytrue, ypred, average='weighted')* 100
    clr = classification_report(y_true, y_pred, target_names=classes, digits= 4, output_dict=True) # create classification report
    return errors, tests, error_list, error_pred_list, f1score, y_pred, y_true, clr

In [24]:
# Original dataset
o_errors, o_tests, o_error_list, o_error_pred_list, o_f1score, o_y_pred, o_y_true, o_clr = predictor(original_test_gen)

[38;2;0;255;255;48;2;100;100;100mthere were 60 errors in 259 tests for an accuracy of  76.83
[0m


In [30]:
o_clr

{'0': {'precision': 0.9459459459459459,
  'recall': 0.7,
  'f1-score': 0.8045977011494254,
  'support': 150.0},
 '1': {'precision': 0.5138888888888888,
  'recall': 0.8409090909090909,
  'f1-score': 0.6379310344827587,
  'support': 44.0},
 '2': {'precision': 0.76,
  'recall': 0.8837209302325582,
  'f1-score': 0.8172043010752689,
  'support': 43.0},
 '3': {'precision': 0.8,
  'recall': 0.8,
  'f1-score': 0.8000000000000002,
  'support': 15.0},
 '4': {'precision': 0.6,
  'recall': 1.0,
  'f1-score': 0.7499999999999999,
  'support': 3.0},
 '5': {'precision': 0.5,
  'recall': 1.0,
  'f1-score': 0.6666666666666666,
  'support': 2.0},
 '6': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2.0},
 'accuracy': 0.7683397683397684,
 'macro avg': {'precision': 0.7314049764049765,
  'recall': 0.8892328601630927,
  'f1-score': 0.7823428147677314,
  'support': 259.0},
 'weighted avg': {'precision': 0.8261892007837955,
  'recall': 0.7683397683397684,
  'f1-score': 0.77792177208195,
  'supp

In [25]:
# Generated colored dataset
errors, tests, error_list, error_pred_list, f1score, y_pred, y_true, clr = predictor(test_gen)

[38;2;0;255;255;48;2;100;100;100mthere were 12958 errors in 25900 tests for an accuracy of  49.97
[0m


In [31]:
original_test_df["predictions"] = o_y_pred

In [32]:
test_df["predictions"] = y_pred

In [34]:
original_test_df.to_csv(root_path + "ham_test_results/df_test_original.csv")
test_df.to_csv(root_path + "ham_test_results/df_test_ita.csv")