# Image Denoiser

In this project, an Autoencoder-based deep learning model is developed to reconstruct and denoise air temperature-related visual data, simulating noisy sensor imagery. All input images are preprocessed and resized to 100×100 pixels to fit the network architecture, and Gaussian noise is added to mimic corrupted input data.

Two autoencoder models are would be compared:
1. A baseline Auto Encoder model with an architecture as follow:

| Layer            | Type          | Output Shape         | Kernel Size | Activation |
|------------------|---------------|-----------------------|-------------|------------|
| Input Layer      | Input          | (None, 100, 100, 3)   | 3×3         | ReLU       |
| Conv2D\_0         | Conv2D         | (None, 100, 100, 32)  | 3×3         | ReLU       |
| MaxPool2D\_0      | MaxPooling2D   | (None, 50, 50, 32)    | 3×3         | ReLU       |
| Conv2D\_1         | Conv2D         | (None, 50, 50, 64)    | 3×3         | ReLU       |
| MaxPool2D\_1      | MaxPooling2D   | (None, 25, 25, 64)    | 3×3         | ReLU       |
| Conv2D\_2         | Conv2D         | (None, 25, 25, 64)    | 3×3         | ReLU       |
| UpSample2D\_0     | UpSampling2D   | (None, 50, 50, 64)    | -           | ReLU       |
| Conv2D\_3         | Conv2D         | (None, 50, 50, 32)    | 3×3         | ReLU       |
| UpSample2D\_1     | UpSampling2D   | (None, 100, 100, 32)  | -           | ReLU       |
| Conv2D\_4         | Conv2D         | (None, 100, 100, 3)   | 3×3         | Sigmoid    |


The optimizer for the baseline Auto Encoder model is Adam with a loss function of MSE.

2. A proposed Auto Encoder model optimized using Optuna for hyperparameter tuning.

All images were resized to 100x100 to match model input requirements. Gaussian noise was added to simulate corrupted data, useful for training the autoencoder to denoise images.


## Import Libraries

In [1]:
# Common python libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import pynvml
import cv2
import os
import glob
from tqdm import tqdm
import pickle
import shutil
import random
import math
import copy
from PIL import ImageFile

# Deep Learning Libraries
import torch
import torch.nn as nn
from torch import optim
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

# Hyper Parameter Tuning
import optuna
from optuna.exceptions import TrialPruned
from optuna.trial import TrialState

# Data preprocessing
from sklearn.model_selection import train_test_split

# Model Evaluation
from sklearn.metrics import classification_report, confusion_matrix

## GPU Check

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    print("Pytorch is using the GPU")

    import pynvml
    pynvml.nvmlInit()

    num_gpus = pynvml.nvmlDeviceGetCount()

    for i in range(num_gpus):
        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
        gpu_name = pynvml.nvmlDeviceGetName(handle)
        print("GPU Name:", gpu_name)

    pynvml.nvmlShutdown()
else:
    print("Pytorch is not using the GPU")

Pytorch is not using the GPU


In [3]:
print("Num GPUs Available: ", torch.cuda.device_count())

Num GPUs Available:  0


## Importing the Data

In [4]:
def img_to_df(img_path: str):
    """
    Convert image paths to DataFrame. for easier data preprocessing.
    
    Args:
        img_path (str): Path to the folder containing the image.
    
    Returns:
        pd.DataFrame: DataFrame containing image path, label, width, height, and channel_info.
    """
    #checks
    if not isinstance(img_path, str):
        raise TypeError("img_path must be a string.")
    if not os.path.exists(img_path):
        raise FileNotFoundError(f"The path {img_path} does not exist.")
    if not os.path.isdir(img_path):
        raise NotADirectoryError(f"The path {img_path} is not a directory.")
    
    # Get the list of all image files in the directory
    glob_path = os.path.join(img_path, "**", "*.jpg")
    print(f"glob_path: {glob_path}")
    
    image_path = glob.glob(glob_path, recursive=True) #recursive=True is to allow for subdirectories to be searched
    print(f"image_path: {image_path}")
    
    # Get the width, height, and channel information of the images
    width = []
    height = []
    channel_info = []
    avg_red = []
    avg_green = []
    avg_blue = []
    
    for path in tqdm(image_path, desc="Processing images", unit="image"):
        img = cv2.imread(path)
        if img is None:
            print(f"Skipping corrupted image: {path}")
            continue

        h, w, c = img.shape
        average = img.mean(axis=0).mean(axis=0)
        
        # Append to the list
        height.append(h)
        width.append(w)
        channel_info.append(c)
        avg_blue.append(average[0])
        avg_green.append(average[1])
        avg_red.append(average[2])
        
        
    #combine into one dataframe
    df = pd.DataFrame({
        "image_path": image_path,
        "width": width,
        "height": height,
        "channel_info": channel_info,
        "img_avg_red": avg_red,
        "img_avg_green": avg_green,
        "img_avg_blue": avg_blue
    })
    
    return df

### Caching Function to Load the Data Faster

In [5]:
def save_cache(data, filename):
    """This is a function to save the data to a file using pickle

    Args:
        data (_type_): data to be saved
        filename (_type_): the name of the file to save the data to
    """
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
    print(f"Data saved to {filename}")

def load_cache(filename):
    """This is a function to load the data to a file using pickle

    Args:
        data (_type_): data to be saved
        filename (_type_): the name of the file to load the data to
    """
    with open(filename, 'rb') as f:
        return pickle.load(f)
    print(f"Data loaded from {filename}")

In [6]:
data_path = "./img_data/"
write_path = "./Cache/"

if not os.path.exists(write_path):
            os.makedirs(write_path)

csv_path = os.path.join(write_path, "path_dataset.csv")

# We will cache the data so that it will load faster
if os.path.exists(csv_path):
    print("Loading cached dataset...")
    img_df = pd.read_csv(csv_path)
    print("Cached dataset loaded")
else:
    print("Creating and caching dataset...")
    img_df = img_to_df(data_path)
    img_df.to_csv(csv_path, index=False)

Creating and caching dataset...
glob_path: ./img_data/**/*.jpg
image_path: ['./img_data/kirmizi 680.jpg', './img_data/kirmizi 986.jpg', './img_data/kirmizi 1000.jpg', './img_data/kirmizi 1001.jpg', './img_data/kirmizi 1002.jpg', './img_data/kirmizi 1003.jpg', './img_data/kirmizi 1004.jpg', './img_data/kirmizi 1005.jpg', './img_data/kirmizi 1006.jpg', './img_data/kirmizi 1007.jpg', './img_data/kirmizi 1008.jpg', './img_data/kirmizi 1009.jpg', './img_data/kirmizi 1010.jpg', './img_data/kirmizi 1011.jpg', './img_data/kirmizi 1012.jpg', './img_data/kirmizi 1013.jpg', './img_data/kirmizi 1014.jpg', './img_data/kirmizi 1015.jpg', './img_data/kirmizi 1017.jpg', './img_data/kirmizi 1018.jpg', './img_data/kirmizi 1019.jpg', './img_data/kirmizi 1020.jpg', './img_data/kirmizi 1021.jpg', './img_data/kirmizi 1022.jpg', './img_data/kirmizi 1023.jpg', './img_data/kirmizi 1024.jpg', './img_data/kirmizi 1025.jpg', './img_data/kirmizi 1026.jpg', './img_data/kirmizi 1027.jpg', './img_data/kirmizi 1028.jp

Processing images: 100%|██████████| 1074/1074 [00:00<00:00, 1179.11image/s]
