In [1]:
import os
import numpy as np
# initialize the base path to the input documents dataset
BASE_PATH = "C:\Programmes\python\mini_project"
# define the path to the training directories
TRAIN_PATH = os.path.sep.join([BASE_PATH, "train"])
CLEANED_PATH = os.path.sep.join([BASE_PATH, "train_cleaned"])

In [None]:
import cv2
def blur_and_threshold(image, eps=1e-7):
	# apply a median blur to the image and then subtract the blurred
	# image from the original image to approximate the foreground
	blur = cv2.medianBlur(image, 5)
	foreground = image.astype("float") - blur
	# threshold the foreground image by setting any pixels with a
	# value greater than zero to zero
	foreground[foreground > 0] = 0
    # apply min/max scaling to bring the pixel intensities to the
	# range [0, 1]
	minVal = np.min(foreground)
	maxVal = np.max(foreground)
	foreground = (foreground - minVal) / (maxVal - minVal + eps)
	# return the foreground-approximated image
	return foreground

In [None]:
# import the necessary packages
from config import denoise_config as config
from pyimagesearch.denoising import blur_and_threshold
from imutils import paths
import progressbar
import random
import cv2

In [None]:
trainPaths = sorted(list(paths.list_images(config.TRAIN_PATH)))
cleanedPaths = sorted(list(paths.list_images(config.CLEANED_PATH)))
# initialize the progress bar
widgets = ["Creating Features: ", progressbar.Percentage(), " ",
	progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(trainPaths),
	widgets=widgets).start()

In [None]:
imagePaths = zip(trainPaths, cleanedPaths)
csv = open(config.FEATURES_PATH, "w")
# loop over the training images together
for (i, (trainPath, cleanedPath)) in enumerate(imagePaths):
	# load the noisy and corresponding gold-standard cleaned images
	# and convert them to grayscale
	trainImage = cv2.imread(trainPath)
	cleanImage = cv2.imread(cleanedPath)
	trainImage = cv2.cvtColor(trainImage, cv2.COLOR_BGR2GRAY)
	cleanImage = cv2.cvtColor(cleanImage, cv2.COLOR_BGR2GRAY)

In [None]:
trainImage = cv2.copyMakeBorder(trainImage, 2, 2, 2, 2,
		cv2.BORDER_REPLICATE)
	cleanImage = cv2.copyMakeBorder(cleanImage, 2, 2, 2, 2,
		cv2.BORDER_REPLICATE)
	# blur and threshold the noisy image
	trainImage = blur_and_threshold(trainImage)
	# scale the pixel intensities in the cleaned image from the range
	# [0, 255] to [0, 1] (the noisy image is already in the range
	# [0, 1])
	cleanImage = cleanImage.astype("float") / 255.0

In [None]:
for y in range(0, trainImage.shape[0]):
		for x in range(0, trainImage.shape[1]):
			# extract the window ROIs for both the train image and
			# clean image, then grab the spatial dimensions of the
			# ROI
			trainROI = trainImage[y:y + 5, x:x + 5]
			cleanROI = cleanImage[y:y + 5, x:x + 5]
			(rH, rW) = trainROI.shape[:2]
			# if the ROI is not 5x5, throw it out
			if rW != 5 or rH != 5:
				continue

In [None]:
features = trainROI.flatten()
			target = cleanROI[2, 2]
			# if we wrote *every* feature/target combination to disk
			# we would end up with millions of rows -- let's only
			# write rows to disk with probability N, thereby reducing
			# the total number of rows in the file
			if random.random() <= config.SAMPLE_PROB:
				# write the target and features to our CSV file
				features = [str(x) for x in features]
				row = [str(target)] + features
				row = ",".join(row)
				csv.write("{}\n".format(row))
	# update the progress bar
	pbar.update(i)
# close the CSV file
pbar.finish()
csv.close()

In [None]:
from config import denoise_config as config
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np
import pickle

In [None]:
print("[INFO] loading dataset...")
features = []
targets = []
# loop over the rows in our features CSV file
for row in open(config.FEATURES_PATH):
	# parse the row and extract (1) the target pixel value to predict
	# along with (2) the 5x5=25 pixels which will serve as our feature
	# vector
	row = row.strip().split(",")
	row = [float(x) for x in row]
	target = row[0]
	pixels = row[1:]
	# update our features and targets lists, respectively
	features.append(pixels)
	targets.append(target)

In [None]:
# convert the features and targets to NumPy arrays
features = np.array(features, dtype="float")
target = np.array(targets, dtype="float")
# construct our training and testing split, using 75% of the data for
# training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(features, target,
	test_size=0.25, random_state=42)

In [None]:
print("[INFO] training model...")
model = RandomForestRegressor(n_estimators=10)
model.fit(trainX, trainY)
# compute the root mean squared error on the testing set
print("[INFO] evaluating model...")
preds = model.predict(testX)
rmse = np.sqrt(mean_squared_error(testY, preds))
print("[INFO] rmse: {}".format(rmse))
# serialize our random forest regressor to disk
f = open(config.MODEL_PATH, "wb")
f.write(pickle.dumps(model))
f.close()

In [None]:
from imutils import paths
import argparse
import pickle
import random
import cv2

In [None]:
ap = argparse.ArgumentParser()
ap.add_argument("-t", "--testing", required=True,
	help="path to directory of testing images")
ap.add_argument("-s", "--sample", type=int, default=10,
	help="sample size for testing images")
args = vars(ap.parse_args())

In [None]:
model = pickle.loads(open(config.MODEL_PATH, "rb").read())
# grab the paths to all images in the testing directory and then
# randomly sample them
imagePaths = list(paths.list_images(args["testing"]))
random.shuffle(imagePaths)
imagePaths = imagePaths[:args["sample"]]

In [None]:
for imagePath in imagePaths:
	# load the image, convert it to grayscale, and clone it
	print("[INFO] processing {}".format(imagePath))
	image = cv2.imread(imagePath)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	orig = image.copy()
	# pad the image followed by blurring/thresholding it
	image = cv2.copyMakeBorder(image, 2, 2, 2, 2,
		cv2.BORDER_REPLICATE)
	image = blur_and_threshold(image)
	roiFeatures = []
	# slide a 5x5 window across the image
	for y in range(0, image.shape[0]):
		for x in range(0, image.shape[1]):
			# extract the window ROI and grab the spatial dimensions
			roi = image[y:y + 5, x:x + 5]
			(rH, rW) = roi.shape[:2]
			# if the ROI is not 5x5, throw it out
			if rW != 5 or rH != 5:
				continue
			# our features will be the flattened 5x5=25 pixels from
			# the training ROI
			features = roi.flatten()
			roiFeatures.append(features)
	pixels = model.predict(roiFeatures)
	# the pixels list is currently a 1D array so we need to reshape
	# it to a 2D array (based on the original input image dimensions)
	# and then scale the pixels from the range [0, 1] to [0, 255]
	pixels = pixels.reshape(orig.shape)
	output = (pixels * 255).astype("uint8")
	# show the original and output images
	cv2.imshow("Original", orig)
	cv2.imshow("Output", output)
	cv2.waitKey(0)
	