In [None]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from PIL import Image
from sklearn.naive_bayes import GaussianNB
import shutil

In [None]:
# Define function to load and preprocess images
def load_and_preprocess_images(image_paths):
    # Initialize an empty list to store preprocessed images
    preprocessed_images = []
    for image_path in image_paths:
        # Load image
        image = Image.open(image_path)
        # Preprocess image (resize, normalize, etc.)
        # Example: resize image to (224, 224) and normalize pixel values to [0, 1]
        image = image.resize((224, 224))  
        image = np.array(image) / 255.0  # Normalize pixel values
        # Append preprocessed image to the list
        preprocessed_images.append(image)
    # Convert list of images to a 4D array (num_samples, height, width, channels)
    preprocessed_images = np.array(preprocessed_images)
    # Reshape the array to 2D (num_samples, height * width * channels)
    preprocessed_images = preprocessed_images.reshape(preprocessed_images.shape[0], -1)
    return preprocessed_images

In [None]:
def count_images(directory):
    count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                count += 1
    return count
#testing the number of images for testing
test_dir = '/kaggle/input/cotton-disease-dataset/Cotton Disease/test'
num_images = count_images(test_dir)
print(f'Total number of images in the test dataset: {num_images}')
#testing the number of images for training
test_dir = '/kaggle/input/cotton-disease-dataset/Cotton Disease/train'
num_images = count_images(test_dir)
print(f'Total number of images in the training dataset: {num_images}')
#testing the number of images for val
test_dir = '/kaggle/input/cotton-disease-dataset/Cotton Disease/val'
num_images = count_images(test_dir)
print(f'Total number of images in the val dataset: {num_images}')

In [None]:
# Define function for local model training
def local_train(X_local, y_local):
    clf = SVC(kernel='linear', probability=True, random_state=42)
    clf.fit(X_local, y_local)
    return clf


In [None]:
# Define function for model evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    y_pred_proba = model.predict_proba(X_test)
    return accuracy, y_pred, y_pred_proba


In [None]:
data = "/kaggle/input/cotton-disease-dataset/Cotton Disease"
# Define paths to the train, test, and val directories
train_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/train'
test_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/test'
val_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/val'
# Define the path to the combined directory
combined_path = '/kaggle/working/combined_dataset'

# Create the combined directory if it doesn't exist
os.makedirs(combined_path, exist_ok=True)
# Function to copy images from source directory to destination directory
def copy_images(source_dir, dest_dir):
    for subdir in os.listdir(source_dir):
        subdir_path = os.path.join(source_dir, subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, file)
                if os.path.isfile(file_path):
                    dest_subdir_path = os.path.join(dest_dir, subdir)
                    os.makedirs(dest_subdir_path, exist_ok=True)
                    try:
                        shutil.copy(file_path, os.path.join(dest_subdir_path, file))
                        print(f"Successfully copied: {file_path}")
                    except Exception as e:
                        print(f"Error copying file: {file_path}, Error: {e}")
# Copy images from train directory
copy_images(train_path, combined_path)

# Copy images from test directory
copy_images(test_path, combined_path)

# Copy images from val directory
copy_images(val_path, combined_path)

print("Images combined successfully.")


In [None]:
# Directory containing the dataset
dataset_dir = "/kaggle/working/combined_dataset"

In [None]:
# List of classes (subdirectories) in the dataset directory
classes = os.listdir(dataset_dir)


In [None]:
# Combine all data from different classes
X, y = [], []
for class_name in classes:
    class_dir = os.path.join(dataset_dir, class_name)
    image_files = os.listdir(class_dir)
    for image_file in image_files:
        image_path = os.path.join(class_dir, image_file)
        X.append(image_path)
        y.append(class_name)


In [None]:
# Convert class labels to numeric labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
# Load and preprocess images for the training set
X_train_processed = load_and_preprocess_images(X_train)


In [None]:
# Train the local model
local_model = local_train(X_train_processed, y_train)


In [None]:
# Load and preprocess images for the testing set
X_test_processed = load_and_preprocess_images(X_test)


In [None]:
# Define function for local model training using Naive Bayes
def local_train_nb(X_local, y_local):
    clf = GaussianNB()
    clf.fit(X_local, y_local)
    return clf

In [None]:
# Train the local model using Naive Bayes
local_model_nb = local_train_nb(X_train_processed, y_train)


In [None]:
# Evaluate the local model using Naive Bayes
accuracy_nb, y_pred_nb, y_pred_proba_nb = evaluate_model(local_model_nb, X_test_processed, y_test)
print(f"Local Model Accuracy (Naive Bayes): {accuracy_nb * 100:.2f}%")


# Calculate ROC AUC score using predicted probabilities for Naive Bayes
roc_auc_nb = roc_auc_score(y_test, y_pred_proba_nb, average='macro', multi_class='ovr')
print("ROC AUC Score (Naive Bayes):", roc_auc_nb)

# Generate classification report for Naive Bayes
print("Classification Report (Naive Bayes):")
print(classification_report(y_test, y_pred_nb))


In [None]:
# Generate confusion matrix for Naive Bayes
print("Confusion Matrix (Naive Bayes):")
print(confusion_matrix(y_test, y_pred_nb))

In [None]:
# Plot ROC curve for Naive Bayes
n_classes = len(classes)
y_test_binarized = label_binarize(y_test, classes=np.arange(n_classes))
fpr_nb = dict()
tpr_nb = dict()
roc_auc_nb = dict()
for i in range(n_classes):
    fpr_nb[i], tpr_nb[i], _ = roc_curve(y_test_binarized[:, i], y_pred_proba_nb[:, i])
    roc_auc_nb[i] = auc(fpr_nb[i], tpr_nb[i])

plt.figure()
colors = ['blue', 'red', 'green', 'yellow']  # Add more colors if needed
for i, color in zip(range(n_classes), colors):
    plt.plot(fpr_nb[i], tpr_nb[i], color=color, lw=2, label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc_nb[i]))

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) - Naive Bayes')
plt.legend(loc="lower right")
plt.show()

# Naive Bayes

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.naive_bayes import GaussianNB
import seaborn as sns  # For heatmap plotting

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten the image data for Naive Bayes
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Build and train a Naive Bayes model
nb_model = GaussianNB()

# Train the model
nb_model.fit(trainX, trainY)

# Predictions
predictions = nb_model.predict(testX)

# Print classification report
print(classification_report(testY, predictions, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_prob = nb_model.predict_proba(testX)

# Binarize the true labels for multi-class ROC
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


In [None]:
import os
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from skimage.feature import hog
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import label_binarize

# Function to load and preprocess images with HOG features
def load_and_preprocess_images_hog(image_paths):
    hog_features = []
    for image_path in image_paths:
        image = Image.open(image_path).convert('L')  # Convert to grayscale
        image = image.resize((128, 128))  # Resize image
        image = np.array(image) / 255.0  # Normalize pixel values
        # Extract HOG features
        features, _ = hog(image, pixels_per_cell=(16, 16), cells_per_block=(2, 2), visualize=True)
        hog_features.append(features)
    return np.array(hog_features)

# Load images and labels
def load_data(data_dir):
    X, y = [], []
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        for image_file in os.listdir(class_dir):
            image_path = os.path.join(class_dir, image_file)
            X.append(image_path)
            y.append(class_name)
    return X, y

# Define paths to the train and test directories
train_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/train'
test_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/test'

# Combine all data from different classes
X_train_paths, y_train = load_data(train_path)

# Encode labels to numeric values
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

# Preprocess the training data
X_train = load_and_preprocess_images_hog(X_train_paths)

# Apply PCA for dimensionality reduction
pca = PCA(n_components=0.95)  # Keep 95% of variance
X_train_pca = pca.fit_transform(X_train)

# Initialize the Naive Bayes model
nb_model = OneVsRestClassifier(GaussianNB())

# Perform Stratified K-Fold Cross-Validation
cv = StratifiedKFold(n_splits=5)  # 5-fold cross-validation
accuracies = cross_val_score(nb_model, X_train_pca, y_train, cv=cv, scoring='accuracy')

# Plot the Cross-Validation Accuracy Curve
plt.figure()
plt.plot(range(1, len(accuracies) + 1), accuracies, marker='o', color='b', label='Accuracy per fold')
plt.xlabel('Fold')
plt.ylabel('Accuracy')
plt.title('Cross-Validation Accuracy Curve')
plt.legend()
plt.grid(True)
plt.show()

# Print the accuracies and the mean accuracy
for i, acc in enumerate(accuracies):
    print(f"Fold {i+1} Accuracy: {acc * 100:.2f}%")

mean_accuracy = np.mean(accuracies)
print(f"Mean Cross-Validation Accuracy: {mean_accuracy * 100:.2f}%")


# KNN code

In [None]:
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from PIL import Image
import shutil

# Define function to load and preprocess images
def load_and_preprocess_images(image_paths):
    preprocessed_images = []
    for image_path in image_paths:
        # Load image
        image = Image.open(image_path)
        # Preprocess image (resize, normalize, etc.)
        image = image.resize((224, 224))  
        image = np.array(image) / 255.0  # Normalize pixel values
        preprocessed_images.append(image)
    preprocessed_images = np.array(preprocessed_images)
    preprocessed_images = preprocessed_images.reshape(preprocessed_images.shape[0], -1)
    return preprocessed_images

data = "/kaggle/input/cotton-disease-dataset/Cotton Disease"

# Define paths to the train, test, and val directories
train_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/train'
test_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/test'
val_path = '/kaggle/input/cotton-disease-dataset/Cotton Disease/val'

# Define the path to the combined directory
combined_path = '/kaggle/working/combined_dataset'

# Create the combined directory if it doesn't exist
os.makedirs(combined_path, exist_ok=True)

# Function to copy images from source directory to destination directory
def copy_images(source_dir, dest_dir):
    for subdir in os.listdir(source_dir):
        subdir_path = os.path.join(source_dir, subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, file)
                if os.path.isfile(file_path):
                    dest_subdir_path = os.path.join(dest_dir, subdir)
                    os.makedirs(dest_subdir_path, exist_ok=True)
                    try:
                        shutil.copy(file_path, os.path.join(dest_subdir_path, file))
                        print(f"Successfully copied: {file_path}")
                    except Exception as e:
                        print(f"Error copying file: {file_path}, Error: {e}")

# Copy images from train, test, and val directories
copy_images(train_path, combined_path)
copy_images(test_path, combined_path)
copy_images(val_path, combined_path)

print("Images combined successfully.")

# Directory containing the dataset
dataset_dir = "/kaggle/working/combined_dataset"

# List of classes (subdirectories) in the dataset directory
classes = os.listdir(dataset_dir)

# Combine all data from different classes
X, y = [], []
for class_name in classes:
    class_dir = os.path.join(dataset_dir, class_name)
    image_files = os.listdir(class_dir)
    for image_file in image_files:
        image_path = os.path.join(class_dir, image_file)
        X.append(image_path)
        y.append(class_name)

# Convert class labels to numeric labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Load and preprocess images for the training set
X_train_processed = load_and_preprocess_images(X_train)

# Define function for local model training using KNN
def local_train_knn(X_local, y_local, n_neighbors=3):
    clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    clf.fit(X_local, y_local)
    return clf

# Train the local model using KNN
local_model_knn = local_train_knn(X_train_processed, y_train)

# Load and preprocess images for the testing set
X_test_processed = load_and_preprocess_images(X_test)

# Define function to evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy, y_pred, y_pred_proba

# Evaluate the local model using KNN
accuracy_knn, y_pred_knn, y_pred_proba_knn = evaluate_model(local_model_knn, X_test_processed, y_test)
print(f"Local Model Accuracy (KNN): {accuracy_knn * 100:.2f}%")

# Calculate ROC AUC score using predicted probabilities for KNN
roc_auc_knn = roc_auc_score(y_test, y_pred_proba_knn, average='macro', multi_class='ovr')
print("ROC AUC Score (KNN):", roc_auc_knn)

# Generate classification report for KNN
print("Classification Report (KNN):")
print(classification_report(y_test, y_pred_knn))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_knn)
print("Confusion Matrix (KNN):\n", cm)

# # Plot Confusion Matrix
# plt.figure(figsize=(8,6))
# plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
# plt.title('Confusion Matrix (KNN)')
# plt.colorbar()
# tick_marks = np.arange(len(classes))
# plt.xticks(tick_marks, classes, rotation=45)
# plt.yticks(tick_marks, classes)
# plt.tight_layout()
# plt.ylabel('True label')
# plt.xlabel('Predicted label')
# plt.show()

# Plot ROC Curve
n_classes = len(classes)
y_test_binarized = label_binarize(y_test, classes=[i for i in range(n_classes)])

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_binarized[:, i], y_pred_proba_knn[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot each class's ROC curve
plt.figure()
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve (KNN)')
plt.legend(loc='lower right')
plt.show()


In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns  # Import seaborn for heatmap plotting

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data (optional but improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Flatten the images for KNN
trainX_flat = trainX.reshape(trainX.shape[0], -1)  # Reshape to (num_samples, height * width * channels)
testX_flat = testX.reshape(testX.shape[0], -1)

# Create a KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)  # You can adjust n_neighbors as needed

# Train the KNN model
knn.fit(trainX_flat, trainY)

# Make predictions
predictions = knn.predict(testX_flat)

# Print classification report
print(classification_report(testY, predictions, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Binarize the output
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], knn.predict_proba(testX_flat)[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# CNN Code without Data Augmentation

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import seaborn as sns  # Make sure to import seaborn for heatmap plotting


# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data (optional but improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Build a CNN model
model = Sequential()

# Add convolutional, pooling, and fully connected layers
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

# Add fully connected (dense) layers
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))

# Output layer (number of classes must match the number of labels)
num_classes = labels.shape[1]
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(trainX, trainY, epochs=10, batch_size=32, validation_data=(testX, testY))

# Plot accuracy and loss curves
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


**CNN code with data augmentation**# 

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import seaborn as sns  # Make sure to import seaborn for heatmap plotting

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data (optional but improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Data Augmentation using ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,       # Randomly rotate images
    width_shift_range=0.2,   # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.15,        # Shear transformations
    zoom_range=0.15,         # Zoom transformations
    horizontal_flip=True,    # Flip images horizontally
    fill_mode='nearest'      # Fill in new pixels with nearest neighbors
)

# Fit the generator to the training data
datagen.fit(trainX)

# Build a CNN model
model = Sequential()

# Add convolutional, pooling, and fully connected layers
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

# Add fully connected (dense) layers
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))

# Output layer (number of classes must match the number of labels)
num_classes = labels.shape[1]
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the augmented data
history = model.fit(datagen.flow(trainX, trainY, batch_size=32), epochs=10, validation_data=(testX, testY))

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)
# Plotting Accuracy and Loss Curves
plt.figure(figsize=(12, 4))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Show the plots
plt.tight_layout()
plt.show()

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


In [None]:
# CNN with VGG16 using DA , similarly not 
# power point of results - monday - done
# include ROC curve with four classes - done
# KNN using federated learning

# CNN with VGG16 without data augmentation

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # VGG16 input size is 224x224
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data (optional but improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Load the VGG16 model, excluding the top fully connected layers
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base VGG16 layers to prevent them from being trained
for layer in vgg_base.layers:
    layer.trainable = False

# Build a new model on top of VGG16
model = Sequential()

# Add the VGG16 base model
model.add(vgg_base)

# Flatten the output from VGG16
model.add(Flatten())

# Add custom fully connected layers
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))  # Add dropout to reduce overfitting
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output layer (number of classes must match the number of labels)
num_classes = labels.shape[1]
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model without data augmentation
history = model.fit(trainX, trainY, batch_size=32, epochs=10, validation_data=(testX, testY))

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# Plotting Accuracy and Loss Curves
plt.figure(figsize=(12, 4))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Show the plots
plt.tight_layout()
plt.show()

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# CNN using VGG16 with Data augmentation

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.applications import VGG16
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # VGG16 input size is 224x224
    data.append(img)

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data (optional but improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Data Augmentation using ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,       # Randomly rotate images
    width_shift_range=0.2,   # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.15,        # Shear transformations
    zoom_range=0.15,         # Zoom transformations
    horizontal_flip=True,    # Flip images horizontally
    fill_mode='nearest'      # Fill in new pixels with nearest neighbors
)

# Fit the generator to the training data
datagen.fit(trainX)

# Load the VGG16 model, excluding the top fully connected layers
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base VGG16 layers to prevent them from being trained
for layer in vgg_base.layers:
    layer.trainable = False

# Build a new model on top of VGG16
model = Sequential()

# Add the VGG16 base model
model.add(vgg_base)

# Flatten the output from VGG16
model.add(Flatten())

# Add custom fully connected layers
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))  # Add dropout to reduce overfitting
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output layer (number of classes must match the number of labels)
num_classes = labels.shape[1]
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the augmented data
history = model.fit(datagen.flow(trainX, trainY, batch_size=32), epochs=10, validation_data=(testX, testY))

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# Plotting Accuracy and Loss Curves
plt.figure(figsize=(12, 4))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Show the plots
plt.tight_layout()
plt.show()

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


Update the word paper(discussion), Knn using federated learning

In [None]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_auc_score

# Helper function to load images
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
    data.append(img)

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# PyTorch dataset definition
class ImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Data transformation (PyTorch's transforms for data augmentation)
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor()
])

# Create PyTorch Datasets and DataLoaders
train_dataset = ImageDataset(trainX, trainY, transform=transform)
test_dataset = ImageDataset(testX, testY, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Partition the data to simulate clients in Federated Learning
def create_clients(data, labels, num_clients=5):
    client_data = []
    split_size = len(data) // num_clients
    for i in range(num_clients):
        client_data.append((data[i * split_size:(i + 1) * split_size], labels[i * split_size:(i + 1) * split_size]))
    return client_data

# Create 5 clients
clients = create_clients(trainX, trainY, num_clients=5)

# Federated Learning process with KNN on each client
def client_knn_train(client_data, n_neighbors=3):
    # Unpack client data
    X_train, y_train = client_data
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train.reshape(X_train.shape[0], -1), y_train)
    return knn

# Train KNN model on each client and store the models
client_models = []
for client_data in clients:
    model = client_knn_train(client_data)
    client_models.append(model)

# Aggregate predictions from clients
def federated_knn_predict(client_models, X_test):
    # Reshape test data to fit KNN input
    X_test = X_test.reshape(X_test.shape[0], -1)
    predictions = []

    # Each client predicts
    for model in client_models:
        preds = model.predict(X_test)
        predictions.append(preds)
    
    # Aggregate predictions (e.g., majority vote)
    predictions = np.array(predictions)
    aggregated_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
    
    return aggregated_predictions

# Test the federated KNN model
predicted_labels = federated_knn_predict(client_models, testX)

# Calculate accuracy
accuracy = accuracy_score(testY, predicted_labels)
print(f"Federated KNN Accuracy: {accuracy * 100:.2f}%")

# Print classification report
print(classification_report(testY, predicted_labels, target_names=le.classes_))

# Confusion matrix
conf_matrix = confusion_matrix(testY, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# Binarize the labels for ROC calculation (one-vs-rest)
n_classes = len(np.unique(testY))
y_test_bin = label_binarize(testY, classes=[0, 1, 2, 3])  # Assume 4 classes, replace with your actual number of classes
y_pred_bin = label_binarize(predicted_labels, classes=[0, 1, 2, 3])

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_bin[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot the ROC curves for all classes
plt.figure()
colors = ['blue', 'red', 'green', 'orange']  # One color per class

for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f'Class {i} (area = {roc_auc[i]:0.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) for Multi-Class')
plt.legend(loc="lower right")
plt.show()

# Print ROC AUC score for overall model
roc_auc_score_value = roc_auc_score(y_test_bin, y_pred_bin, average='macro')
print(f"Overall ROC AUC Score: {roc_auc_score_value:.2f}")


# kNN Federated Learning

In [None]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import matplotlib.pyplot as plt

# Helper function to load images
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
    data.append(img)

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# PyTorch dataset definition
class ImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Data transformation (PyTorch's transforms for data augmentation)
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor()
])

# Create PyTorch Datasets and DataLoaders
train_dataset = ImageDataset(trainX, trainY, transform=transform)
test_dataset = ImageDataset(testX, testY, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Partition the data to simulate clients in Federated Learning
def create_clients(data, labels, num_clients=5):
    client_data = []
    split_size = len(data) // num_clients
    for i in range(num_clients):
        client_data.append((data[i * split_size:(i + 1) * split_size], labels[i * split_size:(i + 1) * split_size]))
    return client_data

# Create 5 clients
clients = create_clients(trainX, trainY, num_clients=5)

# Federated Learning process with KNN on each client
def client_knn_train(client_data, n_neighbors=3):
    # Unpack client data
    X_train, y_train = client_data
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train.reshape(X_train.shape[0], -1), y_train)
    return knn

# Train KNN model on each client and store the models
client_models = []
for client_data in clients:
    model = client_knn_train(client_data)
    client_models.append(model)

# Aggregate predictions from clients
def federated_knn_predict(client_models, X_test):
    X_test = X_test.reshape(X_test.shape[0], -1)
    predictions = []
    probabilities = []

    for model in client_models:
        preds = model.predict(X_test)
        preds_proba = model.predict_proba(X_test)  # Get probabilities
        predictions.append(preds)
        probabilities.append(preds_proba)

    predictions = np.array(predictions)
    probabilities = np.array(probabilities)
    aggregated_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
    
    return aggregated_predictions, probabilities.mean(axis=0)  # Average probabilities

# Test the federated KNN model
predicted_labels, predicted_probs = federated_knn_predict(client_models, testX)

# Calculate accuracy
accuracy = accuracy_score(testY, predicted_labels)
print(f"Federated KNN Accuracy: {accuracy * 100:.2f}%")

# Print classification report
print(classification_report(testY, predicted_labels, target_names=le.classes_))

# Confusion matrix
conf_matrix = confusion_matrix(testY, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# Calculate the ROC curve and AUC for each class
n_classes = len(le.classes_)
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(testY == i, predicted_probs[:, i])  # Binary classification for class i
    roc_auc[i] = auc(fpr[i], tpr[i])  # Area under the curve

# Plot the ROC curves
plt.figure()
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'ROC curve (area = {roc_auc[i]:.2f}) for class {le.classes_[i]}')
    
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()


# Random Forest Using Federated Learning

In [None]:
import os
import numpy as np
import cv2
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import matplotlib.pyplot as plt

# Helper function to load images
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)
    data.append(img)

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Normalize image data
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# PyTorch dataset definition
class ImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Data transformation (PyTorch's transforms for data augmentation)
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor()
])

# Create PyTorch Datasets and DataLoaders
train_dataset = ImageDataset(trainX, trainY, transform=transform)
test_dataset = ImageDataset(testX, testY, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Partition the data to simulate clients in Federated Learning
def create_clients(data, labels, num_clients=5):
    client_data = []
    split_size = len(data) // num_clients
    for i in range(num_clients):
        client_data.append((data[i * split_size:(i + 1) * split_size], labels[i * split_size:(i + 1) * split_size]))
    return client_data

# Create 5 clients
clients = create_clients(trainX, trainY, num_clients=5)

# Federated Learning process with Random Forest on each client
def client_rf_train(client_data, n_estimators=100):
    # Unpack client data
    X_train, y_train = client_data
    rf = RandomForestClassifier(n_estimators=n_estimators)
    rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
    return rf

# Train Random Forest model on each client and store the models
client_models = []
for client_data in clients:
    model = client_rf_train(client_data)
    client_models.append(model)

# Aggregate predictions from clients
def federated_rf_predict(client_models, X_test):
    X_test = X_test.reshape(X_test.shape[0], -1)
    predictions = []

    for model in client_models:
        preds = model.predict(X_test)
        predictions.append(preds)

    # Aggregate predictions by majority voting
    predictions = np.array(predictions)
    aggregated_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
    
    return aggregated_predictions

# Test the federated Random Forest model
predicted_labels = federated_rf_predict(client_models, testX)

# Calculate accuracy
accuracy = accuracy_score(testY, predicted_labels)
print(f"Federated Random Forest Accuracy: {accuracy * 100:.2f}%")

# Print classification report
print(classification_report(testY, predicted_labels, target_names=le.classes_))

# Confusion matrix
conf_matrix = confusion_matrix(testY, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# Calculate the ROC curve and AUC for each class
n_classes = len(le.classes_)
fpr = dict()
tpr = dict()
roc_auc = dict()

# For Random Forest, we can get the probabilities for ROC calculation
def federated_rf_predict_proba(client_models, X_test):
    X_test = X_test.reshape(X_test.shape[0], -1)
    probabilities = []

    for model in client_models:
        probs = model.predict_proba(X_test)
        probabilities.append(probs)

    probabilities = np.array(probabilities)
    return probabilities.mean(axis=0)  # Average probabilities

predicted_probs = federated_rf_predict_proba(client_models, testX)

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(testY == i, predicted_probs[:, i])  # Binary classification for class i
    roc_auc[i] = auc(fpr[i], tpr[i])  # Area under the curve

# Plot the ROC curves
plt.figure()
for i in range(n_classes):
    plt.plot(fpr[i], tpr[i], label=f'ROC curve (area = {roc_auc[i]:.2f}) for class {le.classes_[i]}')
    
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()


work on MobileNet, ResNet, Update the word doc without fedearted learning, **Random Forest, SVM (without federated learning).**

# Random Forest

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import seaborn as sns  # Make sure to import seaborn for heatmap plotting


# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten the image data for Random Forest
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Build and train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(trainX, trainY)

# Predictions
predictions = rf_model.predict(testX)

# Print classification report
print(classification_report(testY, predictions, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_prob = rf_model.predict_proba(testX)

# Binarize the true labels for multi-class ROC
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# SVM

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.svm import SVC
from sklearn import metrics
import seaborn as sns  # Make sure to import seaborn for heatmap plotting


# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten the image data for SVM
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Normalize image data for SVM (scaling improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Build and train an SVM model
svm_model = SVC(kernel='linear', probability=True, random_state=42)

# Train the model
svm_model.fit(trainX, trainY)

# Predictions
predictions = svm_model.predict(testX)

# Print classification report
print(classification_report(testY, predictions, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Get probabilities for ROC calculation
y_prob = svm_model.predict_proba(testX)

# Binarize the true labels for multi-class ROC
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# MobileNet without Data Augmentation

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
import seaborn as sns

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # Resize to 224x224 for MobileNet
    img = img_to_array(img)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data, dtype="float32") / 255.0  # Normalize the data
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Load the MobileNetV2 model, excluding the top layer (to add our custom layers)
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of MobileNetV2
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce the dimensionality
x = Dense(256, activation='relu')(x)  # Add fully connected layer
x = Dense(128, activation='relu')(x)  # Another fully connected layer

# Output layer (softmax for multi-class classification)
num_classes = labels.shape[1]
predictions = Dense(num_classes, activation='softmax')(x)

# Combine the base model with the new layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers in the base model to avoid retraining them
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(trainX, trainY, epochs=10, batch_size=32, validation_data=(testX, testY))

# Plot accuracy and loss curves
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)


# ROC curve and AUC calculation
# Binarize the true labels for multi-class ROC
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# MobileNet with Data Augmentation

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import seaborn as sns

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # Resize to 224x224 for MobileNet
    img = img_to_array(img)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data, dtype="float32") / 255.0  # Normalize the data
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Create an instance of ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,       # Random rotation
    width_shift_range=0.2,   # Random horizontal shift
    height_shift_range=0.2,  # Random vertical shift
    shear_range=0.2,        # Random shear
    zoom_range=0.2,         # Random zoom
    horizontal_flip=True,    # Random horizontal flip
    fill_mode='nearest'      # Fill in new pixels
)

# Fit the generator to the training data
datagen.fit(trainX)

# Load the MobileNetV2 model, excluding the top layer (to add our custom layers)
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of MobileNetV2
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce the dimensionality
x = Dense(256, activation='relu')(x)  # Add fully connected layer
x = Dense(128, activation='relu')(x)  # Another fully connected layer

# Output layer (softmax for multi-class classification)
num_classes = labels.shape[1]
predictions = Dense(num_classes, activation='softmax')(x)

# Combine the base model with the new layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers in the base model to avoid retraining them
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the data generator
history = model.fit(datagen.flow(trainX, trainY, batch_size=32),
                    epochs=10,
                    validation_data=(testX, testY),
                    steps_per_epoch=len(trainX) // 32)

# Plot accuracy and loss curves
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Binarize the true labels for multi-class ROC
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# ResNet50 without Data Augmentation

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
import seaborn as sns

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # Resize to 224x224 for ResNet
    img = img_to_array(img)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data, dtype="float32") / 255.0  # Normalize the data
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Load the ResNet50 model, excluding the top layer (to add our custom layers)
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce the dimensionality
x = Dense(256, activation='relu')(x)  # Add fully connected layer
x = Dense(128, activation='relu')(x)  # Another fully connected layer

# Output layer (softmax for multi-class classification)
num_classes = labels.shape[1]
predictions = Dense(num_classes, activation='softmax')(x)

# Combine the base model with the new layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers in the base model to avoid retraining them
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(trainX, trainY, epochs=10, batch_size=32, validation_data=(testX, testY))

# Plot accuracy and loss curves
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Binarize the true labels for multi-class ROC
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


Update the doc file results& discussion, Code: CNN - {MobileNet, ResNet50} -(with data Augmentation)

# ResNet50 With Data Augmentation

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import seaborn as sns

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track the progress

# Loop through each image, load it, and preprocess it
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # Resize to 224x224 for ResNet
    img = img_to_array(img)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode the labels as integers
data = np.array(data, dtype="float32") / 255.0  # Normalize the data
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Convert labels to categorical (one-hot encoding)
labels = to_categorical(labels)

# Split dataset into 70% training and 30% testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Create an instance of ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Fit the generator on the training data
train_datagen.fit(trainX)

# Load the ResNet50 model, excluding the top layer (to add our custom layers)
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Add custom layers on top of ResNet50
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce the dimensionality
x = Dense(256, activation='relu')(x)  # Add fully connected layer
x = Dense(128, activation='relu')(x)  # Another fully connected layer

# Output layer (softmax for multi-class classification)
num_classes = labels.shape[1]
predictions = Dense(num_classes, activation='softmax')(x)

# Combine the base model with the new layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers in the base model to avoid retraining them
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using the augmented data
history = model.fit(train_datagen.flow(trainX, trainY, batch_size=32),
                    epochs=10,
                    validation_data=(testX, testY))

# Plot accuracy and loss curves
plt.figure(figsize=(12, 5))

# Accuracy plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Predictions
predictions = model.predict(testX)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(testY, axis=1)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=le.classes_))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion matrix is:")
print(conf_matrix)

# ROC curve and AUC calculation
# Binarize the true labels for multi-class ROC
y_bin = label_binarize(true_labels, classes=list(range(num_classes)))

# Variables for ROC calculation
tpr = dict()
fpr = dict()
roc_auc = dict()

# Calculate ROC for each class
for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve for each class
plt.figure(figsize=(5, 5))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal line for random guessing
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# FEDERATED LEARNING

In [None]:
pip install --upgrade opencv-python


# Naive Bayes -  Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.naive_bayes import GaussianNB
import seaborn as sns  # For heatmap visualization

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten images for Naïve Bayes
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Normalize image data (scaling improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (Naïve Bayes) on each client
client_models = []
for i in range(num_clients):
    model = GaussianNB()
    model.fit(client_data[i], client_labels[i])
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
global_model = GaussianNB()

# Compute average probabilities from all client models
def aggregate_models(client_models, testX):
    client_probs = [model.predict_proba(testX) for model in client_models]
    avg_probs = np.mean(client_probs, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_probs, axis=1), avg_probs

# Get predictions from federated model
predictions, y_prob = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated Naïve Bayes Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()



# Troubleshooting, Time takes to run, Code to show the time it took.

# KNN - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns  # For heatmap visualization

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten images for KNN
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Normalize image data (scaling improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (KNN) on each client
client_models = []
for i in range(num_clients):
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(client_data[i], client_labels[i])
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_probs = [model.predict_proba(testX) for model in client_models]
    avg_probs = np.mean(client_probs, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_probs, axis=1), avg_probs

# Get predictions from federated model
predictions, y_prob = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated KNN Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# Random Forest - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns  # For heatmap visualization

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten images for Random Forest
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Normalize image data (scaling improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (Random Forest) on each client
client_models = []
for i in range(num_clients):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(client_data[i], client_labels[i])
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
global_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Compute average probabilities from all client models
def aggregate_models(client_models, testX):
    client_probs = [model.predict_proba(testX) for model in client_models]
    avg_probs = np.mean(client_probs, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_probs, axis=1), avg_probs

# Get predictions from federated model
predictions, y_prob = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated Random Forest Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# SVM - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
from sklearn.svm import SVC
import seaborn as sns  # For heatmap visualization

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Flatten images for SVM
data_flattened = data.reshape((data.shape[0], -1))

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data_flattened, labels, test_size=0.30, random_state=42)

# Normalize image data (scaling improves performance)
trainX = trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (SVM) on each client
client_models = []
for i in range(num_clients):
    model = SVC(kernel='linear', probability=True, random_state=42)
    model.fit(client_data[i], client_labels[i])
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
global_model = SVC(kernel='linear', probability=True, random_state=42)

# Compute average probabilities from all client models
def aggregate_models(client_models, testX):
    client_probs = [model.predict_proba(testX) for model in client_models]
    avg_probs = np.mean(client_probs, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_probs, axis=1), avg_probs

# Get predictions from federated model
predictions, y_prob = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated SVM Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], y_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# CNN without Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Define the CNN model
def create_cnn_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(len(le.classes_), activation='softmax'))  # For multi-class classification
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (CNN) on each client
client_models = []
for i in range(num_clients):
    model = create_cnn_model()
    model.fit(client_data[i], client_labels[i], epochs=10, batch_size=32, class_weight=class_weights_dict, verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated CNN Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()



# CNN with Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_AREA)
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Create ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Define the CNN model
def create_cnn_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(len(le.classes_), activation='softmax'))  # For multi-class classification
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (CNN) on each client
client_models = []
for i in range(num_clients):
    model = create_cnn_model()
    model.fit(train_datagen.flow(client_data[i], client_labels[i], batch_size=32), 
              epochs=10, 
              class_weight=class_weights_dict, 
              verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated CNN Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# CNN Via VGG16 with Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # VGG16 input size
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Create ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

def create_vgg16_cnn_model():
    # Load VGG16 as a base model without the top layers
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the pre-trained layers

    # Create custom CNN layers on top of VGG16
    model = models.Sequential([
        base_model,
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        # Removed unnecessary pooling layer to avoid dimension issue
        layers.GlobalAveragePooling2D(),  # Global Average Pooling to handle small feature maps
        layers.Dense(128, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')  # For multi-class classification
    ])
    
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (CNN with VGG16 backbone) on each client
client_models = []
for i in range(num_clients):
    model = create_vgg16_cnn_model()
    model.fit(train_datagen.flow(client_data[i], client_labels[i], batch_size=32), 
              epochs=10, 
              class_weight=class_weights_dict, 
              verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated CNN with VGG16 Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# CNN Via VGG16 without Data Augementation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # VGG16 input size
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Remove ImageDataGenerator (no augmentation now)
# Create the model function
def create_vgg16_cnn_model():
    # Load VGG16 as a base model without the top layers
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the pre-trained layers

    # Create custom CNN layers on top of VGG16
    model = models.Sequential([
        base_model,
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        # Removed unnecessary pooling layer to avoid dimension issue
        layers.GlobalAveragePooling2D(),  # Global Average Pooling to handle small feature maps
        layers.Dense(128, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')  # For multi-class classification
    ])
    
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (CNN with VGG16 backbone) on each client
client_models = []
for i in range(num_clients):
    model = create_vgg16_cnn_model()
    model.fit(client_data[i], client_labels[i], batch_size=32, 
              epochs=10, 
              class_weight=class_weights_dict, 
              verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated CNN with VGG16 Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# MobileNet with Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # MobileNet input size
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Create ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the model function using MobileNet
def create_mobilenet_model():
    # Load MobileNet as a base model without the top layers
    base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the pre-trained layers

    # Use MobileNet with global average pooling and a dense classification layer
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),  # Global Average Pooling to handle small feature maps
        layers.Dense(128, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')  # For multi-class classification
    ])
    
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (MobileNet backbone) on each client
client_models = []
for i in range(num_clients):
    model = create_mobilenet_model()
    model.fit(train_datagen.flow(client_data[i], client_labels[i], batch_size=32), 
              epochs=10, 
              class_weight=class_weights_dict, 
              verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated MobileNet Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# MobileNet without Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # MobileNet input size
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Create the model function using MobileNet
def create_mobilenet_model():
    # Load MobileNet as a base model without the top layers
    base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the pre-trained layers

    # Use MobileNet with global average pooling and a dense classification layer
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),  # Global Average Pooling to handle small feature maps
        layers.Dense(128, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')  # For multi-class classification
    ])
    
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (MobileNet backbone) on each client
client_models = []
for i in range(num_clients):
    model = create_mobilenet_model()
    model.fit(client_data[i], client_labels[i], batch_size=32, epochs=10, 
              class_weight=class_weights_dict, verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated MobileNet Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# ResNet50 with Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # ResNet50 input size
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Create ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the model function using ResNet50
def create_resnet_model():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the pre-trained layers

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')
    ])
    
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (ResNet50 backbone) on each client
client_models = []
for i in range(num_clients):
    model = create_resnet_model()
    model.fit(train_datagen.flow(client_data[i], client_labels[i], batch_size=32), 
              epochs=10, 
              class_weight=class_weights_dict, 
              verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated ResNet50 Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()

# ResNet50 without Data Augmentation - Federated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # ResNet50 input size
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

def create_resnet50_model():
    # Load ResNet50 as a base model without the top layers
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the pre-trained layers

    # Use ResNet50 with global average pooling and a dense classification layer
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),  # Global Average Pooling to handle small feature maps
        layers.Dense(128, activation='relu'),
        layers.Dense(len(le.classes_), activation='softmax')  # For multi-class classification
    ])
    
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (ResNet50 backbone) on each client
client_models = []
for i in range(num_clients):
    model = create_resnet50_model()
    model.fit(client_data[i], client_labels[i], batch_size=32, 
              epochs=10, class_weight=class_weights_dict, verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated ResNet50 Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# challenges While working with GoogleNet (Inception V3)

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import label_binarize

# Set GPU memory growth before importing TensorFlow
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Optional: specify GPU if multiple GPUs are available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Set memory growth for all GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            # Optional: set memory limit if needed
            # tf.config.set_logical_device_configuration(gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=4096)])
    except RuntimeError as e:
        print("Error setting GPU memory growth:", e)

# Build the model function
def build_model(input_shape=(224, 224, 3), num_classes=10):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Simulated data loading function (replace with actual data loading)
def load_data():
    X_train = np.random.random((2310, 224, 224, 3))  # Random images (replace with real images)
    y_train = np.random.randint(0, 10, 2310)  # Random labels (replace with real labels)
    X_test = np.random.random((500, 224, 224, 3))  # Random test data
    y_test = np.random.randint(0, 10, 500)  # Random test labels
    return X_train, y_train, X_test, y_test

# Ensure that the model uses GPU
with tf.device('/GPU:0'):  # Use the first GPU
    X_train, y_train, X_test, y_test = load_data()

    # Convert labels to one-hot encoding
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

    # Initialize the model
    model = build_model()

    # Train the model
    model.fit(X_train, y_train, epochs=5, batch_size=32)

    # Evaluate the model on test data
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"Test Loss: {test_loss}, Test Accuracy: {test_acc}")

    # Make predictions with the trained model
    predictions = model.predict(X_test)

    # Convert predictions to labels
    y_pred = np.argmax(predictions, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

    # Accuracy
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

    # ROC Curve & AUC Calculation
    le = LabelEncoder()
    y_bin = label_binarize(y_true, classes=range(10))  # Binarize the labels
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(10):
        fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], predictions[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Plot ROC Curve for each class
    plt.figure(figsize=(10, 8))

    for i in range(10):
        plt.plot(fpr[i], tpr[i], label=f'Class {i} (AUC = {roc_auc[i]:.2f})')

    # Plot diagonal reference line
    plt.plot([0, 1], [0, 1], 'k--')

    # Set plot limits and labels
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate (FPR)')
    plt.ylabel('True Positive Rate (TPR)')
    plt.title('ROC Curves for Each Class')
    plt.legend(loc="lower right")
    plt.show()


In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)


# Inception V3(GoogleNet)

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, confusion_matrix
import seaborn as sns  # For heatmap visualization
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # InceptionV3 expects 224x224 input
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Handle class imbalance by computing class weights
class_weights = compute_class_weight('balanced', classes=np.unique(trainY), y=trainY)
class_weights_dict = dict(zip(np.unique(trainY), class_weights))

# Define the InceptionV3 model with fine-tuning
def create_inceptionv3_model(input_shape=(224, 224, 3), num_classes=10):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    predictions = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs=base_model.input, outputs=predictions)

    # Freeze the layers of the base model (InceptionV3)
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Train local models (InceptionV3) on each client
client_models = []
for i in range(num_clients):
    model = create_inceptionv3_model()
    model.fit(client_data[i], client_labels[i], epochs=10, batch_size=32, class_weight=class_weights_dict, verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Federated Aggregation: Combine client models into a global model
def aggregate_models(client_models, testX):
    client_preds = [model.predict(testX) for model in client_models]
    avg_preds = np.mean(client_preds, axis=0)  # Averaging predicted probabilities
    return np.argmax(avg_preds, axis=1)

# Get predictions from federated model
predictions = aggregate_models(client_models, testX)

# Classification Report
print(classification_report(testY, predictions, target_names=le.classes_))

# Accuracy
accuracy = accuracy_score(testY, predictions)
print(f"Federated InceptionV3 Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    # Get class probabilities from each client model
    probs = np.mean([model.predict(testX)[:, i] for model in client_models], axis=0)
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], probs)
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()


# DenseNet - Fedearated Learning

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, auc
import seaborn as sns  # For heatmap visualization
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Function to get a list of all files in a directory and its subdirectories
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = []
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
    return allFiles

# Fetch image paths from different subdirectories
imagePaths_fp = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/test")
imagePaths_fl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/train")
imagePaths_dl = getListOfFiles("/kaggle/input/cotton-disease-dataset/Cotton Disease/val")

# Combine all image paths into a single list
imagePaths = imagePaths_dl + imagePaths_fl + imagePaths_fp

data = []
labels = []
c = 0  # To track progress

# Load images and preprocess
for image in imagePaths:
    label = os.path.split(os.path.split(image)[0])[1]
    labels.append(label)

    img = cv2.imread(image)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_AREA)  # Resize to 224x224 for DenseNet
    data.append(img)
    c += 1
print(f"Processed {c} images")

# Encode labels
data = np.array(data)
labels = np.array(labels)

le = LabelEncoder()
labels = le.fit_transform(labels)

# Normalize image data (scaling improves performance)
data = data.astype('float32') / 255.0

# Split dataset into training and testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.30, random_state=42)

# Number of clients for federated learning
num_clients = 4

# Split data among clients
client_data = np.array_split(trainX, num_clients)
client_labels = np.array_split(trainY, num_clients)

# Build DenseNet model function
def build_densenet_model():
    input_layer = Input(shape=(224, 224, 3))  # DenseNet input shape
    base_model = DenseNet121(weights='imagenet', include_top=False, input_tensor=input_layer)
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dense(len(np.unique(trainY)), activation='softmax')(x)  # Number of classes
    model = Model(inputs=input_layer, outputs=x)

    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Federated Learning: Train local models (DenseNet) on each client
client_models = []
for i in range(num_clients):
    model = build_densenet_model()
    model.fit(client_data[i], client_labels[i], epochs=10, validation_split=0.1, batch_size=32, verbose=1)
    client_models.append(model)
    print(f"Client {i+1} trained.")

# Aggregation function: Averaging weights of client models
def aggregate_models(client_models):
    # Initialize a list to store layer-wise aggregated weights
    aggregated_weights = []
    
    # Get weights of the first client model
    first_client_weights = client_models[0].get_weights()
    
    # Iterate through the layers of the models and average their weights
    for layer_idx in range(len(first_client_weights)):
        # Get weights for the current layer from all client models
        layer_weights = np.array([model.get_weights()[layer_idx] for model in client_models])
        
        # Average the weights for this layer
        avg_layer_weights = np.mean(layer_weights, axis=0)
        
        # Append the averaged weights for this layer to the aggregated weights
        aggregated_weights.append(avg_layer_weights)
    
    # Create a new model with the same architecture (use your build function here)
    global_model = build_densenet_model()
    
    # Set the averaged weights to the global model
    global_model.set_weights(aggregated_weights)
    
    return global_model

# Aggregate the weights from the client models into a global model
global_model = aggregate_models(client_models)

# Evaluate the global model on the test set
test_loss, test_acc = global_model.evaluate(testX, testY)
print(f"Accuracy of DenseNet model is: {test_acc * 100:.2f}%")

# Classification Report
predictions = np.argmax(global_model.predict(testX), axis=1)
print(classification_report(testY, predictions, target_names=le.classes_))

# Confusion Matrix
conf_matrix = confusion_matrix(testY, predictions)
print("Confusion Matrix:")
print(conf_matrix)

# ROC Curve & AUC Calculation
y_bin = label_binarize(testY, classes=list(range(len(le.classes_))))

# Compute ROC for each class
tpr = dict()
fpr = dict()
roc_auc = dict()

for i in range(len(le.classes_)):
    fpr[i], tpr[i], _ = roc_curve(y_bin[:, i], global_model.predict(testX)[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC Curve for each class
plt.figure(figsize=(6, 6))

for i in range(len(le.classes_)):
    plt.plot(fpr[i], tpr[i], label=f'Class {le.classes_[i]} (AUC = {roc_auc[i]:.2f})')

# Plot diagonal reference line
plt.plot([0, 1], [0, 1], 'k--')

# Set plot limits and labels
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC Curves for Each Class')
plt.legend(loc="lower right")
plt.show()
