https://pyimagesearch.com/2020/06/22/turning-any-cnn-image-classifier-into-an-object-detector-with-keras-tensorflow-and-opencv/#pyis-cta-modal

In [2]:
#Für Colab
#!git clone -b master https://github.com/HennFarr/Coins.git
#!pip install keras_tuner
#!pip install shap

In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorboard.plugins.hparams import api as hp
from tensorflow import keras


from keras import layers
import keras_tuner as kt
from keras.preprocessing.image import ImageDataGenerator, img_to_array,load_img
from PIL import Image

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
import os
import shap

import imutils
import argparse
import time
import cv2
from keras.applications.resnet import preprocess_input
from keras.applications import imagenet_utils
from imutils.object_detection import non_max_suppression

In [4]:
local_model_dir = "Models/Tuned/tuned_model"
colab_model_dir = "/content/Coins/Models/Tuned/tuned_model"
model = keras.models.load_model(local_model_dir)

In [5]:
def sliding_window(image, step, ws):
	# slide a window across the image
	for y in range(0, image.shape[0] - ws[1], step):
		for x in range(0, image.shape[1] - ws[0], step):
			# yield the current window
			yield (x, y, image[y:y + ws[1], x:x + ws[0]])

In [6]:
def image_pyramid(image, scale, minSize):
	# yield the original image
	yield image
	# keep looping over the image pyramid
	while True:
		# compute the dimensions of the next image in the pyramid
		w = int(image.shape[1] / scale)
		image = imutils.resize(image, width=w)
		# if the resized image does not meet the supplied minimum
		# size, then stop constructing the pyramid
		if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
			break
		# yield the next image in the pyramid
		yield image

In [7]:
# initialize variables used for the object detection procedure
WIDTH = 500
PYR_SCALE = 1.5
WIN_STEP = 16
ROI_SIZE = (50,50) #eval(args["size"])
INPUT_SIZE = (200, 200)

In [8]:
local_img="real_test_data/multi/IMG_20220620_105307.jpg"
colab_img="/content/Coins/real_test_data/multi/IMG_20220620_105307.jpg"

In [9]:
# load the input image from disk, resize it such that it has the
# has the supplied width, and then grab its dimensions
orig = cv2.imread(local_img) #args["image"]
orig = imutils.resize(orig, width=WIDTH)
(H, W) = orig.shape[:2]

In [10]:
# initialize the image pyramid
pyramid = image_pyramid(orig, scale=PYR_SCALE, minSize=ROI_SIZE)
# initialize two lists, one to hold the ROIs generated from the image
# pyramid and sliding window, and another list used to store the
# (x, y)-coordinates of where the ROI was in the original image
rois = []
locs = []
# time how long it takes to loop over the image pyramid layers and
# sliding window locations
start = time.time()

In [11]:
# loop over the image pyramid
for image in pyramid:
	# determine the scale factor between the *original* image
	# dimensions and the *current* layer of the pyramid
	scale = W / float(image.shape[1])
	# for each layer of the image pyramid, loop over the sliding
	# window locations
	for (x, y, roiOrig) in sliding_window(image, WIN_STEP, ROI_SIZE):
		# scale the (x, y)-coordinates of the ROI with respect to the
		# *original* image dimensions
		x = int(x * scale)
		y = int(y * scale)
		w = int(ROI_SIZE[0] * scale)
		h = int(ROI_SIZE[1] * scale)
		# take the ROI and preprocess it so we can later classify
		# the region using Keras/TensorFlow
		roi = cv2.resize(roiOrig, INPUT_SIZE)
		roi = img_to_array(roi)
		roi = preprocess_input(roi)
		# update our list of ROIs and associated coordinates
		rois.append(roi)
		locs.append((x, y, x + w, y + h))
		# check to see if we are visualizing each of the sliding
		# windows in the image pyramid
		if 0 > 0:			#args["visualize"] # Dauert ewig
			# clone the original image and then draw a bounding box
			# surrounding the current region
			clone = orig.copy()
			cv2.rectangle(clone, (x, y), (x + w, y + h),
				(0, 255, 0), 2)
			# show the visualization and current ROI
			cv2.imshow("Visualization", clone)
			cv2.imshow("ROI", roiOrig)
			cv2.waitKey(0)		

In [12]:
# show how long it took to loop over the image pyramid layers and
# sliding window locations
end = time.time()
print("[INFO] looping over pyramid/windows took {:.5f} seconds".format(
	end - start))
# convert the ROIs to a NumPy array
rois = np.array(rois, dtype="float32")
# classify each of the proposal ROIs using ResNet and then show how
# long the classifications took
print("[INFO] classifying ROIs...")
start = time.time()
preds = model.predict(rois)
end = time.time()
print("[INFO] classifying ROIs took {:.5f} seconds".format(
	end - start))
# decode the predictions and initialize a dictionary which maps class
# labels (keys) to any ROIs associated with that label (values)
#preds = imagenet_utils.decode_predictions(preds, top=1)
labels = {}

[INFO] looping over pyramid/windows took 0.94716 seconds
[INFO] classifying ROIs...
[INFO] classifying ROIs took 56.81680 seconds


In [13]:
class_names=["1c", "2c", "5c", "10c", "20c", "50c", "1e", "2e"]
preds2=[]
for i in preds:
    preds2.append([np.argmax(i), class_names[np.argmax(i)],list(i)])

In [14]:
preds2[0:20]

[[2,
  '5c',
  [0.40225473,
   0.00089046743,
   0.59685487,
   8.3097075e-17,
   4.8150882e-18,
   9.795338e-17,
   1.012653e-17,
   1.1025343e-16]],
 [0,
  '1c',
  [0.5109173,
   0.0011257161,
   0.487957,
   8.0330103e-17,
   6.0127957e-18,
   8.796223e-17,
   1.0351557e-17,
   1.3595924e-16]],
 [0,
  '1c',
  [0.696897,
   0.0019846584,
   0.30111834,
   1.0046117e-15,
   5.433796e-17,
   4.0005993e-16,
   7.812765e-17,
   9.940028e-16]],
 [0,
  '1c',
  [0.68576133,
   0.0019393746,
   0.31229928,
   1.431197e-15,
   5.0966677e-17,
   3.7300128e-16,
   8.632993e-17,
   1.0590328e-15]],
 [0,
  '1c',
  [0.71015054,
   0.0015397576,
   0.28830975,
   4.2290007e-16,
   1.3213907e-17,
   9.496808e-17,
   1.8760484e-17,
   2.5251167e-16]],
 [0,
  '1c',
  [0.6186105,
   0.0008883867,
   0.3805011,
   3.78019e-17,
   1.1797191e-18,
   8.587728e-18,
   2.1940482e-18,
   2.898682e-17]],
 [0,
  '1c',
  [0.6718991,
   0.000908189,
   0.32719272,
   2.875154e-17,
   5.9129455e-19,
   6.3552936e-

In [15]:
# loop over the predictions
for (i,p) in enumerate(preds2):
	# grab the prediction information for the current ROI
	(coinID, label, prob) = p
	# filter out weak detections by ensuring the predicted probability
	# is greater than the minimum probability
	if prob[coinID] >= 0.9:	#args["min_conf"]
		# grab the bounding box associated with the prediction and
		# convert the coordinates
		box = locs[i]
		# grab the list of predictions for the label and add the
		# bounding box and probability to the list
		L = labels.get(label, [])
		L.append((box, prob[coinID]))
		labels[label] = L

In [16]:
labels

{'1c': [((336, 48, 386, 98), 0.92388815),
  ((352, 48, 402, 98), 0.9713615),
  ((368, 48, 418, 98), 0.9632732),
  ((256, 64, 306, 114), 0.9082099),
  ((320, 64, 370, 114), 0.92836547),
  ((336, 64, 386, 114), 0.91232353),
  ((96, 80, 146, 130), 0.9443263),
  ((112, 80, 162, 130), 0.95541036),
  ((128, 80, 178, 130), 0.9454711),
  ((240, 80, 290, 130), 0.93182135),
  ((256, 80, 306, 130), 0.95590377),
  ((272, 80, 322, 130), 0.96905375),
  ((288, 80, 338, 130), 0.9450366),
  ((304, 80, 354, 130), 0.9422684),
  ((400, 80, 450, 130), 0.94143707),
  ((288, 96, 338, 146), 0.93727875),
  ((416, 96, 466, 146), 0.95072424),
  ((416, 112, 466, 162), 0.9835857),
  ((416, 128, 466, 178), 0.97244084),
  ((416, 144, 466, 194), 0.918139),
  ((320, 160, 370, 210), 0.9547605),
  ((416, 160, 466, 210), 0.9012621),
  ((432, 160, 482, 210), 0.94649357),
  ((448, 160, 498, 210), 0.9177985),
  ((208, 176, 258, 226), 0.9465751),
  ((224, 176, 274, 226), 0.98427445),
  ((320, 176, 370, 226), 0.9864638),
  ((

In [17]:
# loop over the labels for each of detected objects in the image
for label in labels.keys():
	# clone the original image so that we can draw on it
	print("[INFO] showing results for '{}'".format(label))
	clone = orig.copy()
	# loop over all bounding boxes for the current label
	for (box, prob) in labels[label]:
		# draw the bounding box on the image
		(startX, startY, endX, endY) = box
		cv2.rectangle(clone, (startX, startY), (endX, endY),
			(0, 255, 0), 2)
	# show the results *before* applying non-maxima suppression, then
	# clone the image again so we can display the results *after*
	# applying non-maxima suppression
	cv2.imshow("Before", clone)
	clone = orig.copy()
    # extract the bounding boxes and associated prediction
	# probabilities, then apply non-maxima suppression
	boxes = np.array([p[0] for p in labels[label]])
	proba = np.array([p[1] for p in labels[label]])
	boxes = non_max_suppression(boxes, proba)
	# loop over all bounding boxes that were kept after applying
	# non-maxima suppression
	for (startX, startY, endX, endY) in boxes:
		# draw the bounding box and label on the image
		cv2.rectangle(clone, (startX, startY), (endX, endY),
			(0, 255, 0), 2)
		y = startY - 10 if startY - 10 > 10 else startY + 10
		cv2.putText(clone, label, (startX, y),
			cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
	# show the output after apply non-maxima suppression
	cv2.imshow("After", clone)
	cv2.waitKey(0)

[INFO] showing results for '1c'
[INFO] showing results for '5c'
[INFO] showing results for '2c'
[INFO] showing results for '20c'
[INFO] showing results for '10c'
