In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Efficient-Net-B4 - 96% accuracy**

In [None]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch.backends.cudnn as cudnn
import torch.nn.functional as F

cudnn.benchmark = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------------------------
# 1. Dataset Directory Setup
# ---------------------------
data_dir = "/kaggle/input/cotton-leaf/Enhanced-cotton-leaf-disease"

# ---------------------------
# 2. Image & Label Extraction
# ---------------------------
def get_image_paths_and_labels(root_dir):
    image_paths, labels = [], []
    class_names = sorted(os.listdir(root_dir))
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(root_dir, class_name)
        for file in os.listdir(class_dir):
            if file.endswith(('.png', '.jpg', '.jpeg')):
                image_paths.append(os.path.join(class_dir, file))
                labels.append(label)
    return image_paths, labels, class_names

image_paths, labels, classes = get_image_paths_and_labels(data_dir)
train_images, val_images, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=42)

# ---------------------------
# 3. Data Augmentations
# ---------------------------
IMG_SIZE = 380
train_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.RandomBrightnessContrast(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.5),
    A.GridDistortion(p=0.3),
    A.CLAHE(p=0.2),
    A.CoarseDropout(max_holes=1, max_height=30, max_width=30, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

test_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# ---------------------------
# 4. Dataset Class
# ---------------------------
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.labels[idx]
        image = self.transform(image=image)["image"]
        return image, label

# ---------------------------
# 5. Weighted Sampler
# ---------------------------
def get_weighted_sampler(labels):
    class_counts = np.bincount(labels)
    class_weights = 1. / np.maximum(class_counts, 1)
    weights = [class_weights[label] for label in labels]
    return WeightedRandomSampler(weights, len(weights), replacement=True)

# ---------------------------
# 6. Model Definition
# ---------------------------
def get_model(num_classes):
    model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1)
    in_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Linear(in_features, 1024),
        nn.ReLU(),
        nn.LayerNorm(1024),
        nn.Dropout(0.4),
        nn.Linear(1024, 512),
        nn.ReLU(),
        nn.LayerNorm(512),
        nn.Dropout(0.3),
        nn.Linear(512, num_classes)
    )
    return model.to(device)

# ---------------------------
# 7. Label Smoothing Loss
# ---------------------------
class LabelSmoothingLoss(nn.Module):
    def __init__(self, smoothing=0.05):
        super().__init__()
        self.smoothing = smoothing

    def forward(self, preds, targets):
        n_classes = preds.size(1)
        log_preds = F.log_softmax(preds, dim=-1)
        targets = F.one_hot(targets, n_classes).float()
        targets = targets * (1 - self.smoothing) + self.smoothing / n_classes
        return torch.mean(torch.sum(-targets * log_preds, dim=-1))

# ---------------------------
# 8. Optimizer and Scheduler
# ---------------------------
def get_optimizer(model, num_epochs=70):
    optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    return optimizer, scheduler

# ---------------------------
# 9. Training Function
# ---------------------------
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=70, patience=10):
    scaler = torch.cuda.amp.GradScaler()
    history = {"train_loss": [], "val_acc": []}
    best_acc, counter = 0, 0

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * images.size(0)
            correct += (outputs.argmax(dim=1) == labels).sum().item()
            total += labels.size(0)

        scheduler.step()
        val_acc = evaluate_model(model, val_loader, show_cm=False)
        history["train_loss"].append(running_loss / total)
        history["val_acc"].append(val_acc)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "/kaggle/working/best_model.pth")
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered!")
                break

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {running_loss/total:.4f}, Val Acc: {val_acc:.2f}%")

    return model, history

# ---------------------------
# 10. Evaluation Function
# ---------------------------
def evaluate_model(model, loader, show_cm=True):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * np.mean(np.array(all_preds) == np.array(all_labels))

    if show_cm:
        cm = confusion_matrix(all_labels, all_preds)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        plt.show()

    return accuracy

# ---------------------------
# 11. Main Training Script
# ---------------------------
if __name__ == "__main__":
    train_loader = DataLoader(
        CustomDataset(train_images, train_labels, train_transform),
        batch_size=16,
        sampler=get_weighted_sampler(train_labels),
        num_workers=2,
        drop_last=True
    )

    val_loader = DataLoader(
        CustomDataset(val_images, val_labels, test_transform),
        batch_size=16,
        shuffle=False,
        num_workers=2
    )

    model = get_model(len(classes))
    optimizer, scheduler = get_optimizer(model)
    model, history = train_model(model, train_loader, val_loader, LabelSmoothingLoss(), optimizer, scheduler)

    print("\nFinal Evaluation on Validation Set:")
    final_acc = evaluate_model(model, val_loader, show_cm=True)
    print(f"Final Validation Accuracy: {final_acc:.2f}%")

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history["train_loss"], label="Train Loss", color="blue", marker='o')
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training Loss over Epochs")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history["val_acc"], label="Validation Accuracy", color="green", marker='o')
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy (%)")
    plt.title("Validation Accuracy over Epochs")
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:


# ✅ Step 2: Set dataset path
dataset_path = '/kaggle/input/cotton-leaf/Enhanced-cotton-leaf-disease'

# ✅ Step 3: Import Libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

# ✅ Step 4: Prepare Data Generators with stronger augmentation
IMAGE_SIZE = 256
BATCH_SIZE = 16

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=40,
    zoom_range=0.4,
    shear_range=0.3,
    width_shift_range=0.3,
    height_shift_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_data = datagen.flow_from_directory(
    dataset_path,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

class_names = list(train_data.class_indices.keys())

# ✅ Step 5: Build Model with DenseNet121
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = Dropout(0.5)(x)
outputs = Dense(train_data.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# ✅ Step 6: Initial Training
initial_epochs = 20
history = model.fit(
    train_data,
    epochs=initial_epochs,
    validation_data=val_data
)

# ✅ Step 7: Fine-Tuning
base_model.trainable = True
fine_tune_at = 200  # Unfreeze top 100 layers or so
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

checkpoint_path = '/kaggle/working/best_densenet_model_improved.keras'
callbacks = [
    EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True),
    ModelCheckpoint(checkpoint_path, monitor='val_accuracy', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, min_lr=1e-7)
]

fine_tune_epochs = 20
total_epochs = initial_epochs + fine_tune_epochs

history_fine = model.fit(
    train_data,
    epochs=total_epochs,
    validation_data=val_data,
    initial_epoch=history.epoch[-1],
    callbacks=callbacks
)

# ✅ Step 8: Plot Accuracy & Loss
def plot_metrics(hist1, hist2):
    acc = hist1.history['accuracy'] + hist2.history['accuracy']
    val_acc = hist1.history['val_accuracy'] + hist2.history['val_accuracy']
    loss = hist1.history['loss'] + hist2.history['loss']
    val_loss = hist1.history['val_loss'] + hist2.history['val_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend()
    plt.title('Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend()
    plt.title('Loss')

    plt.show()

plot_metrics(history, history_fine)

# ✅ Step 9: Confusion Matrix & Final Accuracy
val_data.reset()
Y_pred = model.predict(val_data, verbose=1)
y_pred = np.argmax(Y_pred, axis=1)
y_true = val_data.classes

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
plt.figure(figsize=(10, 8))
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Confusion Matrix")
plt.show()

# ✅ Classification Report + Final Accuracy
from sklearn.metrics import accuracy_score
final_acc = accuracy_score(y_true, y_pred)

print("Classification Report:\n")
print(classification_report(y_true, y_pred, target_names=class_names))
print(f"✅ Final Validation Accuracy: {final_acc * 100:.2f}%")
