Dependencies

In [1]:
import cv2
import os
import numpy as np
from typing import Tuple, Optional
import webbrowser
from pathlib import Path
from typing import Union, List
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim

Initial Processing

In [2]:
def contrast_stretch(img_rgb):
        img_float = img_rgb.astype(np.float32)
        min_val = float(np.min(img_float))
        max_val = float(np.max(img_float))
        if max_val <= min_val + 1e-6:
            return img_rgb.copy()
        stretched = (img_float - min_val) / (max_val - min_val) * 255.0
        return np.clip(stretched, 0, 255).astype(np.uint8)

Denoising

In [3]:
def denoise(img_rgb, h=2.5, hColor=5, templateWindowSize=7, searchWindowSize=21, **kwargs):
        # Ensure valid odd window sizes as required by OpenCV
        templateWindowSize = int(templateWindowSize)
        searchWindowSize = int(searchWindowSize)
        if templateWindowSize % 2 == 0:
            templateWindowSize += 1
        if searchWindowSize % 2 == 0:
            searchWindowSize += 1

        img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
        filtered_bgr = cv2.fastNlMeansDenoisingColored(
            img_bgr, None,
            h=float(h),
            hColor=float(hColor),
            templateWindowSize=templateWindowSize,
            searchWindowSize=searchWindowSize
        )
        return cv2.cvtColor(filtered_bgr, cv2.COLOR_BGR2RGB)

Brightness improvement

In [4]:
def gamma_correction(img_rgb, gamma=2.2):
        normalized = img_rgb.astype(np.float32) / 255.0
        corrected = np.power(normalized, 1.0 / float(gamma))
        return np.clip(corrected * 255.0, 0, 255).astype(np.uint8)

Colour Balancing

In [5]:
def white_balance(img_rgb):
        result = img_rgb.astype(np.float32)
        avg = np.mean(result, axis=(0, 1))  # RGB means
        avg_gray = float(np.mean(avg))
        # avoid division by zero
        scale = np.where(avg <= 1e-6, 1.0, avg_gray / avg)
        result *= scale
        return np.clip(result, 0, 255).astype(np.uint8)

Tone Mapping

In [6]:
def clahe_yuv_and_tone_correction_lab(img_rgb, clipLimit1=1.0, tileGridSize1=(2, 2), clipLimit2=0.1, tileGridSize2=(1, 1)):
    # Convert RGB to BGR for OpenCV
    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
    
    # First: CLAHE on YUV luminance channel
    yuv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YUV)
    clahe_yuv = cv2.createCLAHE(clipLimit=clipLimit1, tileGridSize=tileGridSize1)
    yuv[:, :, 0] = clahe_yuv.apply(yuv[:, :, 0])
    enhanced_bgr = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)
    
    # Second: CLAHE on LAB lightness channel
    lab = cv2.cvtColor(enhanced_bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe_lab = cv2.createCLAHE(clipLimit=clipLimit2, tileGridSize=tileGridSize2)
    cl = clahe_lab.apply(l)
    limg = cv2.merge((cl, a, b))
    final_bgr = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
    
    # Convert back to RGB
    return cv2.cvtColor(final_bgr, cv2.COLOR_BGR2RGB)

Colour Saturation improvement

In [7]:
def saturation_enhance(img_rgb, max_boost=1.6, min_boost=1.1, fixedEnhance=1.36):
        hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
        h, s, v = cv2.split(hsv)
        normalized_s = s.astype(np.float32) / 255.0
        adaptive_scale = float(max_boost) - (float(max_boost) - float(min_boost)) * normalized_s
        s_enhanced = np.clip(s.astype(np.float32) * adaptive_scale * fixedEnhance, 0, 255).astype(np.uint8)
        enhanced_hsv = cv2.merge([h, s_enhanced, v])
        return cv2.cvtColor(enhanced_hsv, cv2.COLOR_HSV2RGB)


Pipeline Stages

In [8]:
def process_pipeline(img):
    # Stage 1: Contrast stretching
    img = contrast_stretch(img)

    # Stage 2: Denoising
    img = denoise(img)

    # Stage 3: Gamma Correction
    img = gamma_correction(img)

    # Stage 4: White Balancing
    img = white_balance(img)

    # Stage 5: CLAHE
    img = clahe_yuv_and_tone_correction_lab(img)

    # Stage 6: HSV saturation enhancement (Adaptive + FIxed)
    img = saturation_enhance(img)

    return img

Batch Processing

In [9]:
input_folder = "C:/Users/gaura/Current_Code_Files/Internship_Project/house_exterior_data - 2/Batch1/Primary_Exterior_Dataset"
output_folder = "C:/Users/gaura/Current_Code_Files/Internship_Project/OutputImages"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for filename in os.listdir(input_folder):
    if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff")):
        img_path = os.path.join(input_folder, filename)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)

        if img is None:
            print(f"Skipping {filename} (cannot read image)")
            continue

        processed = process_pipeline(img)

        # Split name and extension
        name, ext = os.path.splitext(filename)
        out_filename = f"{name}_output{ext}"

        out_path = os.path.join(output_folder, out_filename)
        print(type(processed), processed.shape if processed is not None else None)
        cv2.imwrite(out_path, processed)
        print(f"Processed and saved: {out_filename}")




<class 'numpy.ndarray'> (1500, 2000, 3)
Processed and saved: 0BYZodoqfSIog8Wq_86Pcw_output.jpg
<class 'numpy.ndarray'> (2448, 3264, 3)
Processed and saved: 0Gorezr4O2NfRAHkEfyZZg_output.jpg
<class 'numpy.ndarray'> (1611, 1611, 3)
Processed and saved: 0hfmiSEJtXFYhxs4XxR7PA_output.jpg
<class 'numpy.ndarray'> (2448, 3264, 3)
Processed and saved: 0tysbnnKZGOk-SyzweASaA_output.jpg
<class 'numpy.ndarray'> (1200, 1600, 3)
Processed and saved: 1a2bb6e318591cc40d280880bf23e259_output.jpg
<class 'numpy.ndarray'> (2448, 3264, 3)
Processed and saved: 1BvLhmNMa6A3aTiKezxSXA_output.jpg
<class 'numpy.ndarray'> (1823, 4032, 3)
Processed and saved: 1QE42YpA12FL6uWezE-d1w_output.jpg
<class 'numpy.ndarray'> (1561, 3264, 3)
Processed and saved: 1_wVzEv_cTx9P00b62WOGg_output.jpg
<class 'numpy.ndarray'> (2448, 3264, 3)
Processed and saved: 2-CkLq8O-FDPSzxodmBCiQ_output.jpg
<class 'numpy.ndarray'> (1060, 1600, 3)
Processed and saved: 2b3f30bd3ea9a4fe2fd1bb556551b621_output.jpg
<class 'numpy.ndarray'> (1453,

In [10]:
input_folder = "C:/Users/gaura/Current_Code_Files/Internship_Project/house_exterior_data - 2/Batch1/Primary_Exterior_Dataset"
output_folder = "C:/Users/gaura/Current_Code_Files/Internship_Project/OutputImages"
html_file = "comparison_01.html"

# Allowed image extensions
IMAGE_EXTS = {".png", ".jpg", ".jpeg"}

def is_image(fname):
    return os.path.splitext(fname)[1].lower() in IMAGE_EXTS

input_images = [f for f in os.listdir(input_folder) if is_image(f)]
output_images = [f for f in os.listdir(output_folder) if is_image(f)]

output_map = {}
for out_img in output_images:
    name, ext = os.path.splitext(out_img)
    if name.lower().endswith("_output"):
        base = name[:-7]
    else:
        base = name
    output_map[base.lower()] = out_img  

# HTML content
html_content = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Image Comparison</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            background: #f4f4f4;
            padding: 20px;
        }
        .pair {
            display: flex;
            justify-content: center;
            align-items: flex-start;
            margin-bottom: 30px;
            background: #fff;
            padding: 15px;
            border-radius: 8px;
            box-shadow: 0px 2px 6px rgba(0,0,0,0.2);
        }
        .image-container {
            text-align: center;
            margin: 0 15px;
        }
        img {
            max-width: 400px;
            height: auto;
            border-radius: 6px;
            border: 1px solid #ccc;
        }
        .caption {
            margin-top: 8px;
            font-weight: bold;
        }
        .missing {
            color: red;
            font-weight: bold;
        }
    </style>
</head>
<body>
    <h1>Input vs Output Image Comparison</h1>
"""

for in_img in sorted(input_images, key=str.lower):
    base, _ = os.path.splitext(in_img)
    out_img = output_map.get(base.lower()) 
    html_content += f"""
    <div class="pair">
        <div class="image-container">
            <img src="{input_folder}/{in_img}" alt="Input Image">
            <div class="caption">Input: {in_img}</div>
        </div>
    """
    if out_img:
        html_content += f"""
        <div class="image-container">
            <img src="{output_folder}/{out_img}" alt="Output Image">
            <div class="caption">Output: {out_img}</div>
        </div>
        """
    else:
        html_content += f"""
        <div class="image-container">
            <div class="missing"> No matching output found</div>
        </div>
        """
    html_content += "</div>"

html_content += """
</body>
</html>
"""

# Save HTML file
with open(html_file, "w", encoding="utf-8") as f:
    f.write(html_content)

webbrowser.open("file://" + os.path.abspath(html_file))

print(f"HTML file generated and opened: {html_file}")

HTML file generated and opened: comparison_01.html


Dataset division

In [11]:
import os
import shutil

def split_dataset(input_folder, output_folder, batch_size=200):
    """
    Splits images from input_folder into batches of given size
    and saves them in subfolders inside output_folder.
    
    Example:
        batch_1 -> first 200 images
        batch_2 -> next 200 images
        ...
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # List all files in input folder
    files = [f for f in os.listdir(input_folder) 
             if os.path.isfile(os.path.join(input_folder, f))]

    total_files = len(files)
    print(f"Total files found: {total_files}")

    batch_num = 1
    for i in range(0, total_files, batch_size):
        batch_files = files[i:i+batch_size]
        batch_folder = os.path.join(output_folder, f"batch_{batch_num}")
        os.makedirs(batch_folder, exist_ok=True)

        for file in batch_files:
            src = os.path.join(input_folder, file)
            dst = os.path.join(batch_folder, file)
            shutil.copy(src, dst)

        print(f"Created {batch_folder} with {len(batch_files)} files")
        batch_num += 1

# Example usage
input_folder = r"C:/Users/gaura/Current_Code_Files/Internship_Project/house_exterior_data"
output_folder = r"C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset"
split_dataset(input_folder, output_folder, batch_size=200)


Total files found: 19658
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_1 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_2 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_3 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_4 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_5 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_6 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_7 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_8 with 200 files
Created C:/Users/gaura/Current_Code_Files/Internship_Project/BatchedDataset\batch_9 with 200 files


KeyboardInterrupt: 