In [24]:
import kagglehub
# Download latest version
path = kagglehub.dataset_download("adityasinghsengar122/analytica-main-problem")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'analytica-main-problem' dataset.
Path to dataset files: /kaggle/input/analytica-main-problem


In [25]:
import os
base_dir = "/kaggle/input/analytica-main-problem"
for root, dirs, files in os.walk(base_dir):
    print(root)

/kaggle/input/analytica-main-problem
/kaggle/input/analytica-main-problem/test
/kaggle/input/analytica-main-problem/test/tumors
/kaggle/input/analytica-main-problem/test/cysts
/kaggle/input/analytica-main-problem/test/normal
/kaggle/input/analytica-main-problem/test/stones
/kaggle/input/analytica-main-problem/train
/kaggle/input/analytica-main-problem/train/tumors
/kaggle/input/analytica-main-problem/train/cysts
/kaggle/input/analytica-main-problem/train/normal
/kaggle/input/analytica-main-problem/train/stones


In [26]:
# âœ… Define input/output paths
input_train = "/kaggle/input/analytica-main-problem/train"
input_test  = "/kaggle/input/analytica-main-problem/test"

output_train = "/kaggle/working/processed_data/train"
output_test  = "/kaggle/working/processed_data/test"


In [29]:
import cv2
import numpy as np
import os
from tqdm import tqdm

# ============================
# ğŸ”¹ Step 1: Preprocessing functions
# ============================

def preprocess_ultrasound(img):
    """Preprocessing pipeline for ultrasound kidney images."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Remove speckle noise
    denoised = cv2.medianBlur(gray, 3)
    denoised = cv2.bilateralFilter(denoised, 5, 75, 75)

    # Enhance contrast (CLAHE)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(denoised)

    # Normalize intensity (0â€“1)
    normalized = cv2.normalize(enhanced, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    return normalized


def preprocess_ct(img):
    """Preprocessing pipeline for CT kidney images."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Light Gaussian smoothing
    blurred = cv2.GaussianBlur(gray, (3,3), 0)

    # Equalize histogram for consistent contrast
    equalized = cv2.equalizeHist(blurred)

    # Normalize intensity (0â€“1)
    normalized = cv2.normalize(equalized, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    return normalized


# ============================
# ğŸ”¹ Step 2: Simple modality detector (CT vs Ultrasound)
# ============================

def detect_modality(img):
    """
    Rough heuristic:
    Ultrasound images â†’ darker backgrounds, higher noise variance
    CT images â†’ brighter, smoother texture
    """
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    var = np.var(gray)
    mean_intensity = np.mean(gray)

    if var > 500 and mean_intensity < 100:
        return 'ultrasound'
    else:
        return 'ct'


# ============================
# ğŸ”¹ Step 3: Full dataset preprocessing function
# ============================

def preprocess_dataset(input_dir, output_dir):
    """
    Applies modality-specific preprocessing to all images in a dataset.
    Folder structure (example):
        input_dir/
            cyst/
            normal/
            stone/
            tumor/
    """
    os.makedirs(output_dir, exist_ok=True)

    categories = ['cysts', 'normal', 'stones', 'tumors']

    for category in categories:
        src_path = os.path.join(input_dir, category)
        dst_path = os.path.join(output_dir, category)
        os.makedirs(dst_path, exist_ok=True)

        print(f"\nğŸ”§ Processing category: {category}")

        for file in tqdm(os.listdir(src_path)):
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif')):
                img_path = os.path.join(src_path, file)
                img = cv2.imread(img_path)
                if img is None:
                    continue

                # Decide modality and preprocess
                modality = detect_modality(img)
                if modality == 'ultrasound':
                    processed = preprocess_ultrasound(img)
                else:
                    processed = preprocess_ct(img)

                # Convert back to 8-bit for saving
                processed_uint8 = (processed * 255).astype(np.uint8)

                save_path = os.path.join(dst_path, file)
                cv2.imwrite(save_path, processed_uint8)

    print("\nâœ… Preprocessing completed successfully!")



In [30]:

preprocess_dataset(input_train, output_train)
preprocess_dataset(input_test, output_test)



ğŸ”§ Processing category: cysts


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2528/2528 [01:01<00:00, 41.09it/s]



ğŸ”§ Processing category: normal


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 4499/4499 [01:37<00:00, 46.26it/s]



ğŸ”§ Processing category: stones


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3708/3708 [01:22<00:00, 45.13it/s]



ğŸ”§ Processing category: tumors


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2353/2353 [00:56<00:00, 41.68it/s]



âœ… Preprocessing completed successfully!

ğŸ”§ Processing category: cysts


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 496/496 [00:02<00:00, 191.95it/s]



ğŸ”§ Processing category: normal


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 861/861 [00:04<00:00, 199.46it/s]



ğŸ”§ Processing category: stones


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 715/715 [00:03<00:00, 195.02it/s]



ğŸ”§ Processing category: tumors


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 444/444 [00:02<00:00, 185.28it/s]


âœ… Preprocessing completed successfully!



