# Spliting the DataSets According to the model

In [3]:
import numpy as np
import pandas as pd
import keras
from numpy import random
import tensorflow as tf
import tensorflow_datasets as tfds
from matplotlib import pyplot as plt
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D
from keras.layers import MaxPooling2D, BatchNormalization
from keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from keras.applications import MobileNet, VGG16, ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.metrics import classification_report ,confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models
import seaborn as sns
import os

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import os
import shutil
import random

In [7]:
os.chdir('../')

In [10]:
%pwd

'd:\\coding\\MLProject\\KindneyDiseaseClassification'

In [13]:
original_data_dir = 'artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE'
output_base_dir = 'artifacts/data_ingestion/kidney_dataset_split'

In [11]:
# Set up output directories
train_val_dir = os.path.join(output_base_dir, 'train_val')
test_dir = os.path.join(output_base_dir, 'test')
val_dir = os.path.join(output_base_dir, 'val')  # Validation directory
os.makedirs(train_val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

In [12]:
# Desired counts for each dataset
num_train = 800
num_val = 220
num_test = 275

In [14]:
# Process each class folder
for class_folder in os.listdir(original_data_dir):
    class_path = os.path.join(original_data_dir, class_folder)
    if os.path.isdir(class_path):
        class_train_val_dir = os.path.join(train_val_dir, class_folder)
        class_test_dir = os.path.join(test_dir, class_folder)
        class_val_dir = os.path.join(val_dir, class_folder)
        
        # Ensure class folders exist
        os.makedirs(class_train_val_dir, exist_ok=True)
        os.makedirs(class_test_dir, exist_ok=True)
        os.makedirs(class_val_dir, exist_ok=True)

        # List images in the class folder
        images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        print(f"Found {len(images)} images in '{class_folder}': {images[:5]}...")  # Debug statement (show first 5)

        # Check if there are enough images
        total_needed = num_train + num_val + num_test
        if len(images) < total_needed:
            print(f"Not enough images in '{class_folder}'. Available: {len(images)}. Required: {total_needed}.")
            continue

        random.shuffle(images)  # Shuffle images to randomize selection

        # Select images for each dataset
        train_images = images[:num_train]
        val_images = images[num_train:num_train + num_val]
        test_images = images[num_train + num_val:num_train + num_val + num_test]

        # Move test images
        for img in test_images:
            src = os.path.join(class_path, img)
            dst = os.path.join(class_test_dir, img)
            try:
                shutil.move(src, dst)  # Move the test images
                print(f"Moved {src} to {dst}")
            except Exception as e:
                print(f"Error moving {src} to {dst}: {e}")

        # Move validation images
        for img in val_images:
            src = os.path.join(class_path, img)
            dst = os.path.join(class_val_dir, img)
            try:
                shutil.move(src, dst)  # Move the validation images
                print(f"Moved {src} to {dst}")
            except Exception as e:
                print(f"Error moving {src} to {dst}: {e}")

        # Move training images
        for img in train_images:
            src = os.path.join(class_path, img)
            dst = os.path.join(class_train_val_dir, img)
            try:
                shutil.move(src, dst)  # Move the training images
                print(f"Moved {src} to {dst}")
            except Exception as e:
                print(f"Error moving {src} to {dst}: {e}")

print("Data split into train, validation, and test folders successfully.")

Found 3709 images in 'CYST': ['Cyst- (1).jpg', 'Cyst- (10).jpg', 'Cyst- (100).jpg', 'Cyst- (1000).jpg', 'Cyst- (1001).jpg']...
Moved artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE\CYST\Cyst- (2343).jpg to artifacts/data_ingestion/kidney_dataset_split\test\CYST\Cyst- (2343).jpg
Moved artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE\CYST\Cyst- (213).jpg to artifacts/data_ingestion/kidney_dataset_split\test\CYST\Cyst- (213).jpg
Moved artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE\CYST\Cyst- (215).jpg to artifacts/data_ingestion/kidney_dataset_split\test\CYST\Cyst- (215).jpg
Moved artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE\CYST\Cyst- (939).jpg to artifacts/data_ingestion/kidney_dataset_split\test\CYST\Cyst- (939).jpg
Moved artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE\CYST\Cyst- (2574).jpg to ar

In [15]:
import os

# Define the base directory for the split dataset
base_dir = 'artifacts/data_ingestion/kidney_dataset_split'

# Function to count images in class subfolders within the main directories
def count_images_in_class_folders(main_directory):
    for class_folder in os.listdir(main_directory):
        class_path = os.path.join(main_directory, class_folder)
        if os.path.isdir(class_path):
            # Count image files in each class subfolder
            image_files = [
                f for f in os.listdir(class_path)
                if os.path.isfile(os.path.join(class_path, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg'))
            ]
            print(f"Folder '{class_folder}' contains {len(image_files)} images.")

# Check image counts in train, validation, and test folders
print("Train Folder Image Counts:")
count_images_in_class_folders(os.path.join(base_dir, 'train_val'))

print("\nValidation Folder Image Counts:")
count_images_in_class_folders(os.path.join(base_dir, 'val'))

print("\nTest Folder Image Counts:")
count_images_in_class_folders(os.path.join(base_dir, 'test'))

# Check names for duplication
# Define the base directories for train/validation and test datasets
train_val_dir = os.path.join(base_dir, 'train_val')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Function to get image names from a directory
def get_image_names(directory):
    image_names = set()  # Use a set to avoid duplicates within the same directory
    for class_folder in os.listdir(directory):
        class_path = os.path.join(directory, class_folder)
        if os.path.isdir(class_path):
            # Add image names to the set
            image_names.update(f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f)))
    return image_names

# Get image names from train/validation, validation, and test folders
train_val_images = get_image_names(train_val_dir)
val_images = get_image_names(val_dir)
test_images = get_image_names(test_dir)

# Check for duplicates between train/validation and test, and between train/validation and validation
duplicates_train_val_test = train_val_images.intersection(test_images)
duplicates_train_val_val = train_val_images.intersection(val_images)
duplicates_val_test = val_images.intersection(test_images)

# Output results
if duplicates_train_val_test:
    print("Duplicate image names found between train/validation and test folders:")
    for img in duplicates_train_val_test:
        print(img)
else:
    print("\nNo duplicate image names found between train/validation and test folders.")

if duplicates_train_val_val:
    print("Duplicate image names found between train/validation and validation folders:")
    for img in duplicates_train_val_val:
        print(img)
else:
    print("\nNo duplicate image names found between train/validation and validation folders.")

if duplicates_val_test:
    print("Duplicate image names found between validation and test folders:")
    for img in duplicates_val_test:
        print(img)
else:
    print("\nNo duplicate image names found between validation and test folders.")

Train Folder Image Counts:
Folder 'CYST' contains 800 images.
Folder 'NORMAL' contains 800 images.
Folder 'STONE' contains 800 images.
Folder 'TUMOR' contains 800 images.

Validation Folder Image Counts:
Folder 'CYST' contains 220 images.
Folder 'NORMAL' contains 220 images.
Folder 'STONE' contains 220 images.
Folder 'TUMOR' contains 220 images.

Test Folder Image Counts:
Folder 'CYST' contains 275 images.
Folder 'NORMAL' contains 275 images.
Folder 'STONE' contains 275 images.
Folder 'TUMOR' contains 275 images.

No duplicate image names found between train/validation and test folders.

No duplicate image names found between train/validation and validation folders.

No duplicate image names found between validation and test folders.


In [17]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, regularizers, Sequential
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report


base_dir = 'artifacts/data_ingestion/kidney_dataset_split'
train_dir = os.path.join(base_dir, 'train_val')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

train_datagen = ImageDataGenerator(
    rescale=1./255,                 # Normalize pixel values to [0, 1]
    rotation_range=20,               # Small rotation, as large rotations might not be realistic
    width_shift_range=0.2,          # Slight horizontal shift (10% of width)
    height_shift_range=0.2,         # Slight vertical shift (10% of height)
    zoom_range=0.1,                 # Small zoom range, as excessive zoom could distort anatomical features
    fill_mode='nearest'             # Filling missing pixels from shifts
)

# Define the validation and test data generators (without augmentation)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Example of how to create the generators
train_generator = train_datagen.flow_from_directory(
    train_val_dir,                  # Directory for training data
    target_size=(244, 244),         # Resize images to this size
    batch_size=16,                  # Number of images to be yielded from the generator per iteration
    class_mode='categorical'        # Type of label arrays that are returned: categorical
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(244, 244),
    batch_size=16,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(244, 244),
    batch_size=16,
    class_mode='categorical',
    shuffle = False
)

Found 3200 images belonging to 4 classes.
Found 880 images belonging to 4 classes.
Found 1100 images belonging to 4 classes.


# Generate the base model

In [23]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2


model = models.Sequential()

# Input layer
model.add(layers.Input(shape=(224, 224, 3)))

# Block 1
model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.5))

# Block 2
model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.5))

# Block 3
model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.5))

# Block 4
model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.5))

# Block 5
model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.5))

model.add(layers.Flatten())

# Dense Layers
model.add(layers.Dense(64, activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='relu', kernel_regularizer=l2(0.02)))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4, activation='softmax'))  # Adjust output for 4 classes

# Model Summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_13 (Conv2D)          (None, 224, 224, 64)      1792      
                                                                 
 batch_normalization_15 (Bat  (None, 224, 224, 64)     256       
 chNormalization)                                                
                                                                 
 conv2d_14 (Conv2D)          (None, 224, 224, 64)      36928     
                                                                 
 batch_normalization_16 (Bat  (None, 224, 224, 64)     256       
 chNormalization)                                                
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 112, 112, 64)     0         
 2D)                                                             
                                                      

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer='SGD', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])  # Only include 'accuracy' here

# Checkpoint to save the best model
checkpoint_path = "best_model.keras"
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,   # Save the full model (architecture + weights)
    monitor='val_accuracy',    # Monitor the validation accuracy
    save_best_only=True,       # Save only when the validation accuracy improves
    verbose=1,
    initial_value_threshold=0.85
)

# Define other callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# Fit the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=100,
    callbacks=[early_stopping, reduce_lr, checkpoint_callback]
)

# Test the model

In [1]:
import os
os.chdir("../")

In [2]:
!python --version

Python 3.10.12


In [3]:
%pwd

'd:\\coding\\MLProject\\KindneyDiseaseClassification'

In [4]:
import numpy as np

# Print the version of NumPy
print(f"NumPy version: {np.__version__}")

NumPy version: 1.26.4


In [11]:
from tensorflow.keras.preprocessing import image
import numpy as np
import tensorflow as tf

# Check TensorFlow and Keras versions
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tf.keras.__version__}")

# Load the model from the .h5 file
model_path = 'model/test/model.h5'
loaded_model = tf.keras.models.load_model(model_path)

# Display the model summary to verify it has been loaded correctly
# loaded_model.summary()

# Example prediction for a single image
# Replace 'path_to_your_image.jpg' with the path to your actual image
img_path = 'artifacts/data_ingestion/content/data/CT KIDNEY DATASET Normal, CYST, TUMOR and STONE/TUMOR/Tumor- (15).jpg'
img = image.load_img(img_path, target_size=(224, 224))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = img_array / 255.0  # Normalize the image

# Make a prediction
predictions = model.predict(img_array)
predicted_class = np.argmax(predictions, axis=1)

# Map the predicted class index to the class label
class_labels = {0: 'CYST', 1: 'NORMAL', 2: 'STONE', 3: 'TUMOR'}
predicted_label = class_labels[predicted_class[0]]

print(f"Predicted class: {predicted_label}")

TensorFlow version: 2.17.0
Keras version: 3.4.1




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Predicted class: TUMOR
