# Import Libraries

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd

import cv2
import matplotlib
import matplotlib.pylab as plt
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import shuffle
#   DataGenerator to read images and rescale images
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#   count each class samples
from collections import Counter
import random
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score 
from sklearn.metrics import accuracy_score, classification_report
 
# Different layers
from keras.models import Sequential
from keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import ReLU
from tensorflow.keras.layers import Softmax
from keras.initializers import GlorotUniformV2




# Data Augmentation and Data Loading 

In [None]:
## Set Path Here before running the code
WORKING_DIRECTORY =  "/kaggle/input/kidney-diseases-recognition/dataset"
##  Name of classes
CLASSES = ['Cyst',
           'Normal',
           'Stone',
           'Tumor']

In [None]:
## from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define your data augmentation parameters
datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    horizontal_flip=True,
    width_shift_range=0.2,
 
)



# Load and augment images
train_dataset = datagen.flow_from_directory(
    WORKING_DIRECTORY,
    target_size=(224, 224),
    batch_size=200,
    shuffle=True
)

# Separate dataset from Data Generator
X, y = train_dataset.next()


In [None]:
samples_before = len(X)
print("Images shape :\t", X.shape)
print("Labels shape :\t", y.shape)

# Balancing Class Data 


In [None]:
# SMOTE
X = X.reshape(-1, 224 * 224 * 3)
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)
X = X.reshape(-1, 224, 224, 3)

print(X.shape) 
print(y.shape)  


In [None]:
print("Number of samples in each class:\t", sorted(Counter(np.argmax(y, axis=1)).items()))
print("Classes Names according to index:\t", train_dataset.class_indices)

# Visualization Data

In [None]:
#   show some images
fig = plt.figure(figsize=(10,8))

rows = 4
columns = 4

for i in range(rows * columns):
    fig.add_subplot(rows, columns, i+1)
    num = random.randint(0, len(X)-1 )
    plt.imshow(X[num])
    plt.axis('off')
    plt.title(CLASSES[(np.argmax(y[num]))], fontsize=8)
plt.axis('off')
plt.show()

#  Splitting Data 

In [None]:
#   20% split to validation and 80% split to train set
X_train, x_test, y_train, y_test = train_test_split(X,y, test_size = 0.3)
print("Number of samples after splitting into Training, validation & test set\n")
print("Train     \t",sorted(Counter(np.argmax(y_train, axis=1)).items()))
print("Test      \t",sorted(Counter(np.argmax(y_test, axis=1)).items()))

# Models

* **RandomForest**

In [None]:
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
x_test_flattened = x_test.reshape(x_test.shape[0], -1)
rf_classifier = RandomForestClassifier(n_estimators=25, random_state=42)
rf_classifier.fit(X_train_flattened, y_train)
y_pred = rf_classifier.predict(x_test_flattened)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


* **CNN**

In [None]:
init = GlorotUniformV2
model = Sequential()
model.add(Input(shape=(224, 224, 3)))
model.add(Convolution2D(16, 5, kernel_initializer=init))
model.add(ReLU())
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Convolution2D(32, 5, kernel_initializer=init))
model.add(ReLU())
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Convolution2D(64, 5, kernel_initializer=init))
model.add(ReLU())
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Convolution2D(128, 5, kernel_initializer=init))
model.add(ReLU())
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Convolution2D(256, 5, kernel_initializer=init))
model.add(ReLU())
model.add(AveragePooling2D(pool_size=(2,2)))
model.add(Dropout(0.01))

model.add(Flatten())
model.add(Dense(256, kernel_initializer=init))
model.add(ReLU())
model.add(Dropout(0.03))
model.add(Dense(4, kernel_initializer=init))
model.add(Softmax())

model.summary()

In [None]:
# Train model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_split=0.1)

In [None]:
# Evaluate model 
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.2f}")
print(f"Test Accuracy: {accuracy:.2f}")

* **CNN&RF**

In [None]:
init = GlorotUniformV2
input_shape=(224,224,3)

#create the Network
img_imput = Input(shape = input_shape,name='img_imput')
x=Convolution2D(16, 5, kernel_initializer=init , name='layer_1')(img_imput)
x=ReLU()(x)
x=AveragePooling2D(pool_size=(2,2), name='layer_2')(x)
x=Convolution2D(32, 5, kernel_initializer=init, name='layer_43')(x)
x=ReLU()(x)
x=AveragePooling2D(pool_size=(2,2), name='layer_4')(x)
x=Convolution2D(64, 5, kernel_initializer=init, name='layer_5')(x)
x=ReLU()(x)
x=AveragePooling2D(pool_size=(2,2), name='layer_6')(x)
x=Convolution2D(128, 5, kernel_initializer=init , name='layer_7')(x)
x=ReLU()(x)
x=AveragePooling2D(pool_size=(2,2), name='layer_8')(x)
x=Convolution2D(256, 5, kernel_initializer=init , name='layer_9')(x)
x=ReLU()(x)
x=AveragePooling2D(pool_size=(2,2), name='layer_10')(x)
x=Dropout(0.25, name='layer_11')(x)

x=Flatten(name='fc_1')(x)
x=Dense(256, kernel_initializer=init, name='layer_12')(x)
x=ReLU()(x)
x=Dropout(0.25)(x)



print('feature:' , x)

In [None]:
#Generate the model
model = Model(inputs=img_imput, outputs =x , name='multi_classification')
print(model.summary())

In [None]:
#Feature extraction using CNN model
featuresTrain= model.predict(X_train)
featuresTrain= featuresTrain.reshape(featuresTrain.shape[0], -1)
featuresVal= model.predict(x_test)
featuresVal= featuresVal.reshape(featuresVal.shape[0], -1)

In [None]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(featuresTrain, y_train)
y_pred_rf = rf_classifier.predict(featuresVal)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

# Performance Comparison of CNN, CNN+RF, RF 

The accuracy results indicate that the  **CNN** model performed the best with an accuracy of **0.85**, highlighting its effectiveness in image classification tasks. The **CNN+Random** Forest hybrid approach achieved a slightly lower accuracy of **0.67**, showcasing the potential benefits of combining deep feature learning (CNN) with ensemble-based decision-making (Random Forest). However, the **Random Forest** model lagged behind with an accuracy of **0.61**, suggesting it may not be the most suitable choice for this particular classification task. 
The CNN model demonstrated superior performance, while the hybrid approach showed promise.