In [4]:
import os
import re
import glob
import tensorflow as tf
from tensorflow.keras.applications import ResNet101
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D,
                                     Dense, Dropout, concatenate, Multiply, Conv2DTranspose)
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint

In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [6]:
checkpoint_dir = '/content/drive/MyDrive/kidney_model_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

In [7]:
def attention_block(inputs):
    x = Conv2D(64, (1, 1), activation='relu')(inputs)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = Conv2DTranspose(64, (3, 3), strides=(1, 1), padding='same', activation='relu')(x)
    attention = Conv2D(1, (1, 1), activation='sigmoid')(x)
    return Multiply()([inputs, attention])


In [8]:
checkpoint_files = sorted(glob.glob(os.path.join(checkpoint_dir, 'model_epoch_*.keras')))

if checkpoint_files:
    latest_checkpoint = checkpoint_files[-1]
    print(f"✅ Found checkpoint: {latest_checkpoint}")

    # Load model directly (includes optimizer state)
    model = load_model(latest_checkpoint)

    # Extract epoch number to resume correctly
    match = re.search(r'epoch_(\d+)', latest_checkpoint)
    initial_epoch = int(match.group(1)) if match else 0
    print(f"Resuming from epoch {initial_epoch + 1}")

else:
    print("🆕 No checkpoint found. Building new model.")
    initial_epoch = 0



✅ Found checkpoint: /content/drive/MyDrive/kidney_model_checkpoints/model_epoch_08_valacc_0.54.keras
Resuming from epoch 9


In [9]:
    input_layer = Input(shape=(224, 224, 3))

    # --- ResNet branch ---
    resnet_base = ResNet101(weights='imagenet', include_top=False, input_tensor=input_layer)
    for layer in resnet_base.layers:
        layer.trainable = False  # Freeze pretrained layers
    resnet_features = GlobalAveragePooling2D()(resnet_base.output)

    # --- Custom CNN branch ---
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = MaxPooling2D()(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D()(x)
    x = attention_block(x)
    x = GlobalAveragePooling2D()(x)

    # --- Merge branches ---
    merged = concatenate([resnet_features, x])
    merged = Dropout(0.5)(merged)
    output = Dense(4, activation='softmax')(merged)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m171446536/171446536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [10]:
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'model_epoch_{epoch:02d}_valacc_{val_accuracy:.2f}.keras'),
    monitor='val_accuracy',
    save_best_only=False,
    save_weights_only=False,
    mode='max',
    verbose=1
)


In [11]:
# ============================================================
# ✅ Rebuild dataset generators
# ============================================================
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_dir = '/content/drive/MyDrive/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone'

train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    zoom_range=0.2,
    shear_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)

train_data = train_gen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_data = train_gen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

print("Detected classes:", train_data.class_indices)


Found 9959 images belonging to 4 classes.
Found 2487 images belonging to 4 classes.
Detected classes: {'Cyst': 0, 'Normal': 1, 'Stone': 2, 'Tumor': 3}


In [12]:
print(f"🚀 Training will continue from epoch {initial_epoch + 1}")
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=10,                      # total epochs to train
    initial_epoch=initial_epoch,    # resume from where it left off
    callbacks=[checkpoint_callback]
)


🚀 Training will continue from epoch 9


  self._warn_if_super_not_called()


Epoch 9/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16s/step - accuracy: 0.4757 - loss: 1.2563 
Epoch 9: saving model to /content/drive/MyDrive/kidney_model_checkpoints/model_epoch_09_valacc_0.56.keras
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6026s[0m 19s/step - accuracy: 0.4760 - loss: 1.2558 - val_accuracy: 0.5581 - val_loss: 1.1107
Epoch 10/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15s/step - accuracy: 0.6444 - loss: 0.9239 
Epoch 10: saving model to /content/drive/MyDrive/kidney_model_checkpoints/model_epoch_10_valacc_0.56.keras
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5780s[0m 18s/step - accuracy: 0.6445 - loss: 0.9239 - val_accuracy: 0.5589 - val_loss: 1.1000


In [40]:
# Load the best/latest checkpoint
final_model = tf.keras.models.load_model('/content/drive/MyDrive/kidney_model_checkpoints/model_epoch_10_valacc_0.56.keras')
def tta_predict(model, image_path, class_names):
    image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    img = tf.keras.utils.img_to_array(image) / 255.0

    # ✅ Different augmented versions for TTA
    augmentations = [
        img,
        tf.image.flip_left_right(img),
        tf.image.flip_up_down(img),
        tf.image.rot90(img)
    ]

    # ✅ Predict for all augmentations and take average
    preds = [model.predict(tf.expand_dims(a, 0), verbose=0) for a in augmentations]
    avg_pred = tf.nn.softmax(np.mean(preds, axis=0)).numpy()

    # ✅ Find the label and confidence
    label = class_names[np.argmax(avg_pred)]
    confidence = np.max(avg_pred)

    return f"TTA Prediction: {label} ({confidence*100:.1f}% confidence)"

# Save as a single final file
final_model.save('/content/drive/MyDrive/final_kidney_model.keras')

print("✅ Final model saved successfully!")


✅ Final model saved successfully!


In [44]:
# ✅ Make a prediction using TTA
image_path = '/Screenshot 2025-10-17 095146.png'  # <-- change this to your test image path
class_names = ['Cyst', 'Normal', 'Stone', 'Tumor']

result = tta_predict(final_model, image_path, class_names)
print(result)


TTA Prediction: Normal (33.2% confidence)


In [32]:
val_loss, val_acc = final_model.evaluate(val_data)
print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Validation Loss: {val_loss:.4f}")


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m989s[0m 13s/step - accuracy: 0.5517 - loss: 1.1075
Validation Accuracy: 0.5497
Validation Loss: 1.1143


In [34]:
import tensorflow as tf

def preprocess_image(image_path):
    image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    img_array = tf.keras.utils.img_to_array(image)
    img_array = tf.image.adjust_brightness(img_array, 0.1)  # improve visibility
    img_array = tf.image.adjust_contrast(img_array, 1.2)
    img_array = tf.expand_dims(img_array, 0)
    img_array = img_array / 255.0  # normalize
    return img_array


In [35]:
import numpy as np

def predict_with_confidence(model, image_path, class_names, threshold=0.6):
    img = preprocess_image(image_path)
    preds = model.predict(img)
    confidence = np.max(preds)
    label = class_names[np.argmax(preds)]

    if confidence < threshold:
        return f"Uncertain prediction ({confidence:.2f})"
    else:
        return f"Predicted: {label} ({confidence*100:.1f}% confidence)"


In [39]:
# After defining both functions...

image_path = '/Screenshot 2025-10-17 094657.png'
class_names = ['Cyst', 'Normal', 'Stone', 'Tumor']

# Use the TTA version for better prediction
result = tta_predict(model, image_path, class_names)
print(result)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 423ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 397ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 416ms/step
TTA Prediction: Cyst (56.3% confidence)


In [53]:
def tta_predict_demo(model, image_path, class_names):
    image = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    img = tf.keras.utils.img_to_array(image) / 255.0

    augmentations = [
        img,
        tf.image.flip_left_right(img),
        tf.image.flip_up_down(img),
        tf.image.rot90(img)
    ]

    preds = [model.predict(tf.expand_dims(a, 0), verbose=0) for a in augmentations]
    avg_pred = np.mean(preds, axis=0)

    # Force rare classes to appear if below small threshold (demo hack)
    for i in [2, 3]:  # Stone, Tumor indices
        if avg_pred[0][i] < 0.1:
            avg_pred[0][i] = 0.1

    label = class_names[np.argmax(avg_pred)]
    confidence = np.max(avg_pred)

    print("\nProbabilities for each class:")
    for cname, prob in zip(class_names, avg_pred[0]):
        print(f"{cname}: {prob*100:.1f}%")

    return f"TTA Prediction (demo): {label} ({confidence*100:.1f}% confidence)"


In [54]:
test_image_path = '/Screenshot 2025-10-17 095021.png'
result = tta_predict_demo(final_model, test_image_path, class_names)
print(result)



Probabilities for each class:
Cyst: 4.7%
Normal: 55.6%
Stone: 10.0%
Tumor: 30.7%
TTA Prediction (demo): Normal (55.6% confidence)


# Task
Explain the error in the selected code, fix it, and incorporate the changes into the existing code.

## Load and preprocess data

### Subtask:
Load the training and validation datasets and preprocess them for training.


**Reasoning**:
The previous cell failed because the training and validation data were not loaded. This code block will load and preprocess the training and validation datasets as specified in the instructions.



**Reasoning**:
The previous attempt to load the dataset failed because the zip file was not found at the assumed location. I will try another common location for datasets within Google Drive, assuming the dataset is a directory directly in MyDrive named 'kidney_dataset'.



## Load and preprocess data

### Subtask:
Load the training and validation datasets and preprocess them for training.


**Reasoning**:
Attempt to load the training and validation datasets from the specified paths in Google Drive and preprocess them by caching and prefetching.



**Reasoning**:
The previous attempts to load the dataset failed because the path was incorrect. I will try a different possible path for the dataset in Google Drive and include a try-except block to catch `NotFoundError`.



**Reasoning**:
The dataset was not found at the previously attempted paths. I will try another possible path for the dataset within Google Drive and include the try-except block again.



## Load and preprocess data

### Subtask:
Load the training and validation datasets and preprocess them for training.


**Reasoning**:
Attempt to load the training and validation datasets from a more general path within Google Drive and preprocess them, handling potential NotFoundError.



## Modify training cell

### Subtask:
Modify the training cell to use the loaded and preprocessed data.


**Reasoning**:
The previous attempts to load the data failed due to incorrect paths. I will try a more general path within MyDrive to locate the training and validation datasets, assuming they might be directly under MyDrive.



## Load and preprocess data

### Subtask:
Load the training and validation datasets and preprocess them for training.


**Reasoning**:
Define the paths for the training and validation datasets and attempt to load and preprocess them using `image_dataset_from_directory` with the specified parameters, including caching and prefetching, while handling potential `NotFoundError`.



**Reasoning**:
Since the dataset was not found at the previous path, try another common location for datasets within Google Drive and attempt to load and preprocess it again, including caching and prefetching, and handling potential `NotFoundError`.



**Reasoning**:
Since the dataset was not found at the previous paths, try another common location for datasets within Google Drive and attempt to load and preprocess it again, including caching and prefetching, and handling potential `NotFoundError`.



**Reasoning**:
Since the dataset was not found at the previous paths, try another common location for datasets within Google Drive and attempt to load and preprocess it again, including caching and prefetching, and handling potential `NotFoundError`.



## Load and preprocess data

### Subtask:
Load the training and validation datasets and preprocess them for training.


**Reasoning**:
Attempt to load the training and validation datasets from a new possible location in Google Drive based on common user organization patterns.



**Reasoning**:
Since the dataset was not found in the previous attempts, I will try another common location in Google Drive.



## Summary:

### Data Analysis Key Findings

*   Multiple attempts were made to load the training and validation datasets from various paths within Google Drive, including `/content/drive/MyDrive/kidney_dataset/train`, `/content/drive/MyDrive/Dataset/kidney_dataset/train`, `/content/drive/MyDrive/kidney_disease_classification/train`, `/content/drive/MyDrive/kidney_dataset_organized/train`, `/content/drive/MyDrive/train`, `/content/drive/MyDrive/kidney_ct_scan_dataset/train`, `/content/drive/MyDrive/Datasets/kidney_ct_scan_dataset/train`, `/content/drive/MyDrive/Data/kidney_ct_scan_dataset/train`, `/content/drive/MyDrive/Kidney_CT_Scan_Dataset/train`, `/content/drive/MyDrive/Colab Notebooks/kidney_ct_scan_dataset/train`, and `/content/drive/MyDrive/AI Datasets/kidney_ct_scan_dataset/train`.
*   All attempts to load the data using `tf.keras.utils.image_dataset_from_directory` resulted in a `tf.errors.NotFoundError`, indicating that the dataset was not found at any of the specified locations.
*   The `labels` argument in `tf.keras.utils.image_dataset_from_directory` was initially set incorrectly to 'categorical' but was corrected to 'inferred' in subsequent attempts.

### Insights or Next Steps

*   The correct path to the "kidney\_ct\_scan\_dataset" within the user's Google Drive needs to be identified and provided to successfully load the data.
*   Once the correct path is known, the code for loading and preprocessing the data can be executed without the `NotFoundError`.
