In [1]:
# the images get downloaded in folder let's preprocess the images and save them in some another folder.
# resize the images. 
from dataclasses import dataclass
from pathlib import Path


In [2]:
@dataclass(frozen=True)
class DownloadDataConfig:
    data_directory: Path
    source_gdrive_url: str
    gdrive_api_key : str
    download_image_folder : Path


@dataclass(frozen=True)
class PreprocessDataConfig:
    download_image_folder:Path
    processed_image_folder:Path

@dataclass(frozen=True)
class PrepareBlurImageConfig:
    processed_image_folder:Path
    blur_image_folder:Path

In [3]:
from autoencoder.constants import filepath
from autoencoder import logger
from autoencoder.utils.util_functions import read_yaml, create_dir

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = filepath.CONFIG_FILE_PATH,
        param_filepath = filepath.PARAMS_FILE_PATH,
        secret_filepath = filepath.SECRET_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(param_filepath)
        self.secret = read_yaml(secret_filepath)
        # Creating Root folder called artifacts
        create_dir([self.config.root])


    def get_download_data_config(self) -> DownloadDataConfig:
        logger.info(f'getting download data configuration')
        config = self.config.data_paths

        logger.info(f'Creating dataset and orignal_image folder Inside artifacts folder')
        create_dir([config.data_directory, config.download_image_folder])


        download_data_config = DownloadDataConfig(
            data_directory = config.data_directory,
            source_gdrive_url = config.source_gdrive_url,
            gdrive_api_key = config.gdrive_api_key,
            download_image_folder = config.download_image_folder
        )

        return download_data_config
    

    def get_preprocess_data_config(self) -> DownloadDataConfig:
        logger.info(f'getting download data configuration')
        config = self.config.data_paths
        logger.info(f'Creating Data Directory Folder')
        create_dir([config.processed_image_folder])

        preprocess_data_config = PreprocessDataConfig(
            download_image_folder = config.download_image_folder,
            processed_image_folder=config.processed_image_folder
        )

        return preprocess_data_config
    
    
    def get_blurimage_data_config(self) -> PrepareBlurImageConfig:
        logger.info(f'getting download data configuration')
        config = self.config.data_paths
        logger.info(f'Creating Data Directory Folder')
        create_dir([config.blur_image_folder])

        blurimage_data_config = PrepareBlurImageConfig(
            processed_image_folder=config.processed_image_folder,
            blur_image_folder= config.blur_image_folder
        )

        return blurimage_data_config



In [5]:
import requests
import gdown
import os
import cv2
from keras.preprocessing import image
from PIL import Image

In [6]:
class DataIngestionPreparation:
    def __init__(self, download_config: DownloadDataConfig,
                  preprocess_config: PreprocessDataConfig,
                  blurimage_config : PrepareBlurImageConfig):
        self.download_config = download_config
        self.preprocess_config = preprocess_config
        self.blurimage_config = blurimage_config


    def download_data(self) ->str:
        try:
            source_url = self.download_config.source_gdrive_url
            folder_id = source_url.split("/")[-1]
            folder_id = folder_id.split("?")[0]

            api_token = self.download_config.gdrive_api_key
            api_url = f"https://www.googleapis.com/drive/v3/files?q='{folder_id}'+in+parents&key={api_token}"
            logger.info(f"Getting Response from Gdrive api _url : {api_url}")
            response = requests.get(api_url)
            logger.info(f'response : {response.status_code}')
            if response.status_code==200:
                logger.info("Response 200 OK")
                files = response.json().get('files', [])
                logger.info(f'Downloading files in folder : {self.download_config.download_image_folder}')
                for file in files:
                    file_id = file['id']
                    file_name = file['name']
                    download_url = f'https://drive.google.com/uc?id={file_id}'
                    
                    # Download the file using gdown
                    gdown.download(download_url, os.path.join(self.download_config.download_image_folder, file_name), quiet=False)
                logger.info("**** Images Downloaded Ready for Preprocess ****")
            else:
                logger.info("Unable to Downlaod the data")

        except Exception as e:
            raise e
        

    def preprocess_data(self) ->str:
        try:
            source_folder = self.preprocess_config.download_image_folder
            destination_folder = self.preprocess_config.processed_image_folder
            image_paths = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]
            logger.info('Resizing the Images to (128, 128)')
            image_size = (128, 128)
            for image in image_paths:
                src = os.path.join(source_folder, image)
                dst = os.path.join(destination_folder, image)
                with Image.open(src) as img:
                    img = img.resize(image_size)
                    img.save(dst)
            logger.info(f'All Images are resized to (128, 128) and saved in {destination_folder}')
        except Exception as e:
            raise e
        
    def generate_blur_images(self) ->str:
        try:
            source_folder = self.blurimage_config.processed_image_folder
            destination_folder = self.blurimage_config.blur_image_folder
            logger.info("Generating Blur Images for Model Input")
            logger.info('Kernal Size (7, 7)')
            kernal_size = (7,7)
            for img_name in os.listdir(source_folder):
                img_path = os.path.join(source_folder, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    blurred_img = cv2.GaussianBlur(img, kernal_size, 0)
                    cv2.imwrite(os.path.join(destination_folder, img_name), blurred_img)
            logger.info(f'Generated Blur Images and saved in {destination_folder}')
        except Exception as e:
            raise e

In [7]:
try:
    config_maneger = ConfigurationManager()
    download_config = config_maneger.get_download_data_config()
    process_config = config_maneger.get_preprocess_data_config()
    blurimage_config = config_maneger.get_blurimage_data_config()
    downloadandprocess_data = DataIngestionPreparation(download_config, process_config, blurimage_config)

    downloadandprocess_data.download_data()
    downloadandprocess_data.preprocess_data()
    downloadandprocess_data.generate_blur_images()
except Exception as e:
    raise e


[2024-09-23 17:29:46,843:INFO:util_functions:yaml file: ..\config\config.yaml loaded successfully]
[2024-09-23 17:29:46,844:INFO:util_functions:yaml file: ..\params.yaml loaded successfully]
[2024-09-23 17:29:46,848:INFO:util_functions:yaml file: ..\secret\secrets.yaml loaded successfully]
[2024-09-23 17:29:46,850:INFO:util_functions:Created directory at : ../artifacts]
[2024-09-23 17:29:46,851:INFO:1850321610:getting download data configuration]
[2024-09-23 17:29:46,852:INFO:1850321610:Creating dataset and orignal_image folder Inside artifacts folder]
[2024-09-23 17:29:46,854:INFO:util_functions:Created directory at : ../artifacts/dataset]
[2024-09-23 17:29:46,855:INFO:util_functions:Created directory at : ../artifacts/dataset/orignal_images]
[2024-09-23 17:29:46,855:INFO:1850321610:getting download data configuration]
[2024-09-23 17:29:46,856:INFO:1850321610:Creating Data Directory Folder]
[2024-09-23 17:29:46,858:INFO:util_functions:Created directory at : ../artifacts/dataset/proces

Downloading...
From: https://drive.google.com/uc?id=1WHXYajr9DQi6O3GAER_dn1Nrtgy2sj7u
To: d:\SONU\folder c\Desktop\Portfolio Github Projects\Deblur-Image-autoencoder\artifacts\dataset\orignal_images\SFHQ_pt2_00009960.jpg
100%|██████████| 138k/138k [00:00<00:00, 586kB/s]
Downloading...
From: https://drive.google.com/uc?id=1STCQQbXJjZ1D2NX1zhNzj_Tar66aEDQT
To: d:\SONU\folder c\Desktop\Portfolio Github Projects\Deblur-Image-autoencoder\artifacts\dataset\orignal_images\SFHQ_pt2_00009961.jpg
100%|██████████| 130k/130k [00:00<00:00, 792kB/s]
Downloading...
From: https://drive.google.com/uc?id=1YEShGHUDcDU2aym7MdAzQ_LGLdQM3F4r
To: d:\SONU\folder c\Desktop\Portfolio Github Projects\Deblur-Image-autoencoder\artifacts\dataset\orignal_images\SFHQ_pt2_00009963.jpg
100%|██████████| 130k/130k [00:00<00:00, 640kB/s]
Downloading...
From: https://drive.google.com/uc?id=1e9I1ifTLkCbWlTRjxIrn6yZkJALPStPp
To: d:\SONU\folder c\Desktop\Portfolio Github Projects\Deblur-Image-autoencoder\artifacts\dataset\ori

[2024-09-23 17:30:07,837:INFO:4197627254:**** Images Downloaded Ready for Preprocess ****]
[2024-09-23 17:30:07,837:INFO:4197627254:Resizing the Images to (128, 128)]
[2024-09-23 17:30:07,949:INFO:4197627254:All Images are resized to (128, 128) and saved in ../artifacts/dataset/processed_images]
[2024-09-23 17:30:07,950:INFO:4197627254:Generating Blur Images for Model Input]
[2024-09-23 17:30:07,951:INFO:4197627254:Kernal Size (7, 7)]
[2024-09-23 17:30:07,953:INFO:4197627254:Generated Blur Images and saved in ../artifacts/dataset/blur_images]



