# Image Preprocessing Pipeline

In [None]:
"""
Install necessary packages
"""
# !pip install numpy
# !pip install pandas
# !pip install opencv-python
# !pip install scikit-image
# !pip install scipy
# !pip install matplotlib
# !apt-get update
# !apt-get install ffmpeg libsm6 libxext6  -y

In [None]:
"""
Import necessary packages
"""
import os
import sys
from pathlib import Path
from datetime import datetime

import glob
import json

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.colors as colors

from skimage.filters import threshold_local, sobel, gaussian, unsharp_mask
from skimage.io import imsave, imread
from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral,
                                 denoise_wavelet, estimate_sigma)
from skimage.transform import rotate, SimilarityTransform, warp

import imageio

from scipy.ndimage import generic_filter

"""
Imports from preprocessing codebase
"""
from preprocessing.center_finding import find_center
from preprocessing.utils import create_circular_mask
from preprocessing.utils import gen_rotation_line
from preprocessing.utils import get_angle
from preprocessing.denoising import filter_strays
from preprocessing.image_processing import centerize
from preprocessing.image_processing import rotate_image
from preprocessing.image_processing import convert_to_cv2_img
from preprocessing.image_processing import crop_image
from preprocessing.image_processing import quadrant_fold
from preprocessing.preprocess import PreprocessData

In [None]:
OUTPUT_MAP = { 
        # Maps output style to preprocessing plan
        "centered_rotated":"centerize_rotate",
        "quad_folded":"quad_fold",
        "local_thresh_centered_rotated":"local_thresh_centerize_rotate",
        "local_thresh_quad_folded":"local_thresh_quad_fold",
        }

INVERSE_OUTPUT_MAP = { 
        # Maps preprocessing plan to output style
        "centerize_rotate":"centered_rotated",
        "quad_fold":"quad_folded",
        "local_thresh_centerize_rotate":"local_thresh_centered_rotated",
        "local_thresh_quad_fold":"local_thresh_quad_folded",
        }

In [None]:
# Set image and region of interest parameters
params = { 
    # Set parameters
    # Image size
    "h":256,
    "w":256,
    # Region of interest for beam center on raw images
    # rmax_beam=50
    "beam_rmax":25,
    # Annulus region of interest for XRD pattern
    # rmin=30
    # rmax=120
    "rmin":22.5,
#     "rmin": 25,
    "rmax":90,
    # Annulus region of interest for 9A feature region
    # reyes_min=40
    # reyes_max=80
    "eyes_rmin":30,
    "eyes_rmax":45,
    # Maximum distance from 9A feature maximum intensity location
    # for blob analysis
    # reyes_max_blob=30
    "eyes_blob_rmax":20,
    # Percentile used to analyze 9A features as blobs
    "eyes_percentile":99,
    # Local threshold block size
    # local_threshold_block_size = 27
    "local_thresh_block_size":21,
}

In [None]:
"""
Preprocess Data Set
"""
# Set main files directory
filesdir = ""

# Create a timestamped output folder
timestr = "%Y%m%dT%H%M%S.%f"
timestamp = datetime.utcnow().strftime(timestr)

# Specify preprocessing plans
plans = [
    "centerize_rotate",
    "quad_fold",
    "local_thresh_centerize_rotate",
    "local_thresh_quad_fold",
]

# Preprocess each classification
substart, subend = (None,None)
preprocessors = {"normal":None, "cancer":None}
# preprocessors = {"samples": None,}
for classification in preprocessors.keys():

    output_dir_name = "preprocessed_" + classification + "_" + timestamp

    print(classification.capitalize() + " output folder: " + output_dir_name)

    # Create timestamped output directory
    output_dir = os.path.join(filesdir, output_dir_name)
    os.makedirs(output_dir)

    # Write params to file
    params_dir = os.path.join(output_dir,"params")
    os.makedirs(params_dir)
    with open(os.path.join(params_dir,"params.txt"),"w") as paramsfile:
        paramsfile.write(json.dumps(params,indent=4))

    # Load input files
    input_dir = os.path.join(filesdir, classification)
    input_filenames = glob.glob(os.path.join(input_dir,"*.txt"))
    input_filenames.sort()

    print("Found " + str(len(input_filenames)) + " " + classification + " files.")

    # Run preprocessing
    # Preprocess images
    preprocessor = PreprocessData(input_dir=input_dir, params=params)
    # Store preprocessor
    preprocessors[classification] = preprocessor
    # Select subset
    filenames_fullpaths_subset = preprocessor.filenames_fullpaths[substart:subend]
    preprocessor.filenames_fullpaths = filenames_fullpaths_subset
    preprocessor.preprocess(visualize=False, plans=plans, mask_style="both")

    save = False
    if save:
        # Save to file
        for plan in plans:
            # Set output directories for raw data and images
            data_dir = os.path.join(output_dir, INVERSE_OUTPUT_MAP.get(plan))
            image_dir = os.path.join(output_dir, INVERSE_OUTPUT_MAP.get(plan)+"_images")

            preprocessor.save(output_dir=data_dir, output_format="txt",
                              output_style=INVERSE_OUTPUT_MAP.get(plan), rescale=False)
#             preprocessor.save(output_dir=image_dir, output_format="png",
#                               output_style=INVERSE_OUTPUT_MAP.get(plan), rescale=False)

In [None]:
"""
Display images
"""
for classification, preprocessor  in preprocessors.items():

    for idx in range(len(preprocessor.filenames_fullpaths))[:1]:
        centered_rotated = preprocessor.cache["centered_rotated"][idx]
        filename = os.path.basename(preprocessor.filenames_fullpaths[idx])
        
        fig = plt.figure(dpi=100)
        fig.set_size_inches(4*5,4*1)
        fig.set_facecolor("white")
#         fig.suptitle(classification.capitalize() + " " + filename)

        ax1 = fig.add_subplot(1,5,1)
        original = preprocessor.cache["original"][idx]
        plt.imshow(20*np.log10(original+1),cmap="gray")
#         plt.plot(initial_max_centroid[1],initial_max_centroid[0],marker='o',color='r')
#         plt.plot(eye_max_blob_centroid[1],eye_max_blob_centroid[0],marker='o',color='g')
        plt.title("Original [dB+1]")
    
        plt.xticks(())
        plt.yticks(())


        ax2 = fig.add_subplot(1,5,2)
        plt.imshow(centered_rotated,cmap="gray")
        plt.title("Centered and Rotated")
        
        plt.xticks(())
        plt.yticks(())

        ax3 = fig.add_subplot(1,5,3)
        quad_folded = preprocessor.cache["quad_folded"][idx]
        plt.imshow(quad_folded,cmap="gray")
        plt.title("Quadrant Folded")
        
        plt.xticks(())
        plt.yticks(())

        ax4 = fig.add_subplot(1,5,4)
        local_thresh_quad_folded = preprocessor.cache["local_thresh_centered_rotated"][idx]
        plt.imshow(local_thresh_quad_folded,cmap="gray")
        plt.title("Local Threshold Centered Rotated")
        
        plt.xticks(())
        plt.yticks(())

        ax4 = fig.add_subplot(1,5,5)
        local_thresh_quad_folded = preprocessor.cache["local_thresh_quad_folded"][idx]
        plt.imshow(local_thresh_quad_folded,cmap="gray")
        plt.title("Local Threshold Quad Folded")

        plt.xticks(())
        plt.yticks(())

        plt.show()