# Importing Libraries

In [None]:
# General-purpose libraries
import numpy as np
import pandas as pd
import os
import cv2

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# TensorFlow and Keras for deep learning
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import EfficientNetV2L
from tensorflow.keras.regularizers import l2
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input

# Sklearn utilities for evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load & Read Data

In [None]:
# Paths
image_dir = '/kaggle/input/computer-vision-xm/images/kaggle/working/Reorganized_Data/images/'
labels_csv = '/kaggle/input/computer-vision-xm/train.csv'

# Load the CSV file with labels
labels_df = pd.read_csv(labels_csv)
labels_df.head()


# Preprocessing

In [None]:
# Image size and batch size
IMG_SIZE = 224
BATCH_SIZE = 32

# Preprocess Images
def load_and_preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))  # Resize for EfficientNetV2M
    image = preprocess_input(image)  # EfficientNetV2M preprocessing
    return image

images = []
labels = []

for _, row in labels_df.iterrows():
    image_path = os.path.join(image_dir, row['Images'])
    images.append(load_and_preprocess_image(image_path))
    labels.append(row['Labels'])

# Convert to numpy arrays
X = np.array(images)
y = np.array(labels)

# Split The Data

In [None]:
# Split Data into Training and Validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training data: {X_train.shape}, Validation data: {X_val.shape}")

# Data Augmentation

In [None]:
# Data Augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator()

train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCH_SIZE)
val_generator = val_datagen.flow(X_val, y_val, batch_size=BATCH_SIZE)

# Building Model

In [None]:
from tensorflow.keras.applications import ConvNeXtXLarge
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

# Load and Build the ConvNeXtXLarge Model
base_model = ConvNeXtXLarge(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model initially

# Build the Model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),  # Pooling to reduce dimensionality
    Dense(256, activation='relu', kernel_regularizer=l2(0.01)),  # Add L2 regularization
    Dropout(0.5),  # Regularization to prevent overfitting
    Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile the Model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# View Model Summary
model.summary()

# Training Initial Model

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Set up learning rate scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',  # Monitor validation loss
    factor=0.5,  # Reduce the learning rate by half
    patience=3,  # Number of epochs with no improvement before reducing learning rate
    verbose=1
)

# Define EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# Train the Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,
    callbacks=[early_stopping,lr_scheduler],
    verbose=1
)

# Fine-Tune Model

In [None]:
# Fine-Tuning the model
base_model.trainable = True  # Unfreeze base model layers

# Recompile the model with a smaller learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model again (fine-tuning)
fine_tuning_history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=[early_stopping, lr_scheduler],
    verbose=1
)


# Save The Model

In [None]:
# Save the model
model.save('leaf_disease_classifier.h5')

# Evaluating The Model

In [None]:
# Predictions on validation data
y_pred = model.predict(X_val, batch_size=BATCH_SIZE)
y_pred_classes = np.where(y_pred > 0.5, 1, 0)

# Evaluate performance
print(f"Accuracy: {accuracy_score(y_val, y_pred_classes)}")
print("Classification Report:")
print(classification_report(y_val, y_pred_classes))

# Confusion Matrix
cm = confusion_matrix(y_val, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Loss-epoch Graph

In [None]:
# Plot training & validation accuracy and loss over epochs
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Make Predictions For Submission

In [None]:
# Paths for Test Data
test_csv = '/kaggle/input/computer-vision-xm/test.csv'
test_df = pd.read_csv(test_csv)

image_filenames = test_df['Images'].tolist()
image_paths = [os.path.join(image_dir, img) for img in image_filenames if img.lower().endswith('.jpg')]

# Preprocess Test Images
def preprocess_test_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))
    image = preprocess_input(image)  # Consistent with training preprocessing
    return image

test_images = np.array([preprocess_test_image(path) for path in image_paths])

# Predictions
predictions = model.predict(test_images, batch_size=BATCH_SIZE)
predicted_labels = (predictions > 0.5).astype(int).flatten()

# Create Submission File
submission_df = pd.DataFrame({
    'Images': [os.path.basename(path) for path in image_paths],
    'Labels': predicted_labels
})
submission_df.to_csv('/kaggle/working/submission.csv', index=False)

print('Submission file saved.')