In [1]:
import os

In [2]:
%pwd

'c:\\Users\\satya\\Documents\\data_science_roadmap\\Deep_Learning\\Project\\Kidney-Disease-Classification-MLFlow-DVC\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\satya\\Documents\\data_science_roadmap\\Deep_Learning\\Project\\Kidney-Disease-Classification-MLFlow-DVC'

In [5]:
#Entity 
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True) 
class DataTransformationConfig:
    root_dir: Path
    data: Path
    split: Path
    train: float
    test: float
    
    # train_data_path: Path
    # test_data_path: Path
    
   

In [6]:
#Config Manager
from src.cnnClassifier.constants import *  #IMPORTING THE TWO CONSTANTS PATH OF CONFIG AND PARAMS FROM CONSTANTS

from src.cnnClassifier.utils.common import read_yaml, create_directories #Importing from utils the common functions like read_yaml and creat_directories.


In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        

    def get_data_transformation_config(self) -> DataTransformationConfig:
        data_transformation = self.config.data_transformation
        data_ingestion = self.config.data_ingestion  # Get data ingestion config
        params=self.params
        
        data = os.path.join(data_ingestion.unzip_dir, "dataset")
        
        create_directories([Path(data_transformation.root_dir)])

        data_transformation_config = DataTransformationConfig(
            root_dir=Path(data_transformation.root_dir),
            data=Path(data),
            split=Path(data_transformation.split),
            train=params.TRAIN,
            test=params.TEST
        )
        
        return data_transformation_config

In [None]:
import os
import cv2 as cv
import numpy as np
import python_splitter
import shutil
import random
import imgaug.augmenters as iaa

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.data = self.config.data
        self.split = self.config.split
        self.train = self.config.train
        self.test = self.config.test

    @staticmethod
    def watershed(image_path):
        img = cv.imread(image_path)
        if img is None:
            print(f"Warning: Unable to read image at {image_path}")
            return None

        gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
        ret, thresh = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)
        kernel = np.ones((3, 3), np.uint8)
        opening = cv.morphologyEx(thresh, cv.MORPH_OPEN, kernel, iterations=2)
        sure_bg = cv.dilate(opening, kernel, iterations=3)
        dist_transform = cv.distanceTransform(opening, cv.DIST_L2, 5)
        ret, sure_fg = cv.threshold(dist_transform, 0.001 * dist_transform.max(), 255, 0)
        sure_fg = np.uint8(sure_fg)
        unknown = cv.subtract(sure_bg, sure_fg)
        ret, markers = cv.connectedComponents(sure_fg)
        markers = markers + 1
        markers[unknown == 255] = 0
        markers = cv.watershed(img, markers)
        img[markers == -1] = [255, 0, 0]
        return img

    @staticmethod
    def augment_and_save_images(folder_path):
        """Augments all images in the specified folder"""
        image_files = [f for f in os.listdir(folder_path) 
                     if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        
        if not image_files:
            print("No images found in the specified folder.")
            return

        print(f"Found {len(image_files)} images. Augmenting...")

        for image_file in image_files:
            image_path = os.path.join(folder_path, image_file)
            image = cv.imread(image_path)
            if image is None:
                print(f"Error: Unable to read {image_file}")
                continue

            augmenters = [
                iaa.Affine(rotate=(-15, 15)),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.2),
                iaa.Affine(translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}),
                iaa.Affine(shear=(-8, 8)),
                iaa.Affine(scale=(0.9, 1.1)),
                iaa.Multiply((0.8, 1.2)),
                iaa.LinearContrast((0.8, 1.2)),
                iaa.GaussianBlur(sigma=(0, 0.5))
            ]

            for i in range(3):  # Create 3 augmented versions
                selected = random.sample(augmenters, random.randint(3, 5))
                seq = iaa.Sequential(selected)
                augmented = seq.augment_image(image)
                aug_path = os.path.join(folder_path, f"aug_{i}_{image_file}")
                cv.imwrite(aug_path, augmented)
                print(f"Saved: {aug_path}")

        print("Augmentation complete.")

    def process_and_save_images(self):
        src_dir = str(self.data)
        dest_dir = str(self.config.root_dir)
        
        # Create destination folders
        normal_dir = os.path.join(dest_dir, 'Normal')
        tumor_dir = os.path.join(dest_dir, 'Tumor')
        os.makedirs(normal_dir, exist_ok=True)
        os.makedirs(tumor_dir, exist_ok=True)

        # Process and save original images
        for folder_name in ['Normal', 'Tumor']:
            folder_path = os.path.join(src_dir, folder_name)
            output_folder = normal_dir if folder_name == 'Normal' else tumor_dir

            for filename in os.listdir(folder_path):
                file_path = os.path.join(folder_path, filename)
                if os.path.isfile(file_path) and filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                    processed_img = DataTransformation.watershed(file_path)
                    if processed_img is not None:
                        output_path = os.path.join(output_folder, filename)
                        cv.imwrite(output_path, processed_img)
                        print(f"Processed and saved: {output_path}")

        # Perform train-test split
        python_splitter.split_from_folder(dest_dir, train=self.train, test=self.test)
        
        # Move the split folder if needed
        if os.path.exists('Train_Test_Folder'):
            shutil.move('Train_Test_Folder', str(self.split))
            
        # Augment tumor training images (after split)
        tumor_train_path = os.path.join(str(self.split), 'train', 'Tumor')
        if os.path.exists(tumor_train_path):
            DataTransformation.augment_and_save_images(tumor_train_path) #Accessing the static method wit calling the class name

In [9]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.process_and_save_images()  # No parameters needed
except Exception as e:
    raise e

[2025-04-04 13:39:57,665: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-04 13:39:57,670: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-04 13:39:57,672: INFO: common: created directory at: artifacts]
[2025-04-04 13:39:57,673: INFO: common: created directory at: artifacts\data_transformation]
Processed and saved: artifacts\data_transformation\Normal\Normal- (2074).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2075).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2076).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2077).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2078).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2079).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2080).jpg
Processed and saved: artifacts\data_transformation\Normal\Normal- (2081).jpg
Processed and saved: artifacts\data_transfo