# Final

### Computer Vision 

In [3]:
import os
import cv2
import numpy as np
from skimage.filters import frangi
import matplotlib.pyplot as plt
from glob import glob

def preprocess_leaf_image(image_path, output_size=(224, 224)):
    """
    Preprocess a leaf image to extract RGB, venation map, and edge map
    
    Args:
        image_path (str): Path to the leaf image
        output_size (tuple): Size to resize images to (height, width)
        
    Returns:
        tuple: (RGB image, venation map, edge map)
    """
    # Read the original image
    original_image = cv2.imread(image_path)
    if original_image is None:
        raise ValueError(f"Could not read image at {image_path}")
    
    # 1. RGB Image Extraction
    rgb_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    rgb_image = cv2.resize(rgb_image, output_size)
    
    # 2. Venation Map Extraction
    # Convert to grayscale
    gray_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.resize(gray_image, output_size)
    
    # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_image = clahe.apply(gray_image)
    
    # Apply Frangi filter to enhance vein-like structures
    # Updated parameters to be compatible with current scikit-image version
    venation_map = frangi(clahe_image, 
                         scale_range=(1, 3), 
                         scale_step=0.5,
                         beta=15,  # Using beta instead of beta1/beta2
                         black_ridges=False)
    
    # Normalize the venation map
    venation_map = cv2.normalize(venation_map, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    
    # 3. Edge Map Extraction using Canny edge detection
    edge_map = cv2.Canny(gray_image, 50, 150)
    
    return rgb_image, venation_map, edge_map

def process_dataset(dataset_path, output_dir):
    """
    Process an entire dataset of leaf images
    
    Args:
        dataset_path (str): Path to the dataset directory
        output_dir (str): Directory to save processed images
    """
    # Create output directories if they don't exist
    os.makedirs(os.path.join(output_dir, 'rgb'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'venation'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'edge'), exist_ok=True)
    
    # Get all image files
    image_extensions = ['*.jpg', '*.jpeg', '*.png']
    image_files = []
    for ext in image_extensions:
        image_files.extend(glob(os.path.join(dataset_path, '**', ext), recursive=True))
    
    print(f"Found {len(image_files)} images to process")
    
    for idx, image_path in enumerate(image_files):
        try:
            # Extract class from path (assuming dataset structure: dataset/class/image.jpg)
            # Handle paths with spaces correctly
            path_parts = os.path.normpath(image_path).split(os.sep)
            class_name = path_parts[-2]  # Assuming last directory is the class name
            file_name = os.path.basename(image_path)
            
            # Create class directories if they don't exist
            os.makedirs(os.path.join(output_dir, 'rgb', class_name), exist_ok=True)
            os.makedirs(os.path.join(output_dir, 'venation', class_name), exist_ok=True)
            os.makedirs(os.path.join(output_dir, 'edge', class_name), exist_ok=True)
            
            # Process the image
            rgb_image, venation_map, edge_map = preprocess_leaf_image(image_path)
            
            # Save processed images
            cv2.imwrite(os.path.join(output_dir, 'rgb', class_name, file_name), 
                        cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR))
            cv2.imwrite(os.path.join(output_dir, 'venation', class_name, file_name), venation_map)
            cv2.imwrite(os.path.join(output_dir, 'edge', class_name, file_name), edge_map)
            
            if idx % 100 == 0:
                print(f"Processed {idx}/{len(image_files)} images")
                
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

def visualize_preprocessing(image_path):
    """
    Visualize the preprocessing steps for a single image
    
    Args:
        image_path (str): Path to the leaf image
    """
    rgb_image, venation_map, edge_map = preprocess_leaf_image(image_path)
    
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.imshow(rgb_image)
    plt.title('RGB Image')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(venation_map, cmap='gray')
    plt.title('Venation Map')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(edge_map, cmap='gray')
    plt.title('Edge Map')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# Function to check scikit-image version and available parameters
def check_frangi_parameters():
    """
    Print the available parameters for the frangi function
    """
    import inspect
    from skimage import __version__ as skimage_version
    
    print(f"scikit-image version: {skimage_version}")
    print("Frangi function parameters:")
    print(inspect.signature(frangi))
    print("For more details, refer to the documentation at:")
    print("https://scikit-image.org/docs/stable/api/skimage.filters.html#skimage.filters.frangi")

# Example usage
if __name__ == "__main__":
    # Check frangi parameters to diagnose issues
    check_frangi_parameters()
    
    # Path to the Mendeley leaf dataset
    dataset_path = "/Users/arnavkarnik/Documents/Recognition-of-Medicinal-Plant-Species-Deep-Learning-Project--Sem6/Medicinal Leaf Dataset/Segmented Medicinal Leaf Images"
    output_dir = "/Users/arnavkarnik/Documents/Recognition-of-Medicinal-Plant-Species-Deep-Learning-Project--Sem6/Output"
    
    # Process the entire dataset
    process_dataset(dataset_path, output_dir)
    
    # Alternatively, visualize preprocessing for a single image
    # sample_image = "/Users/arnavkarnik/Documents/Recognition-of-Medicinal-Plant-Species-Deep-Learning-Project--Sem6/Medicinal Leaf Dataset/Segmented Medicinal Leaf Images/Punica Granatum (Pomegranate)/PG-S-022.jpg"
    # visualize_preprocessing(sample_image)

scikit-image version: 0.24.0
Frangi function parameters:
(image, sigmas=range(1, 10, 2), scale_range=None, scale_step=None, alpha=0.5, beta=0.5, gamma=None, black_ridges=True, mode='reflect', cval=0)
For more details, refer to the documentation at:
https://scikit-image.org/docs/stable/api/skimage.filters.html#skimage.filters.frangi
Found 1835 images to process
Processed 0/1835 images


  venation_map = frangi(clahe_image,


Processed 100/1835 images
Processed 200/1835 images
Processed 300/1835 images
Processed 400/1835 images
Processed 500/1835 images
Processed 600/1835 images
Processed 700/1835 images
Processed 800/1835 images
Processed 900/1835 images
Processed 1000/1835 images
Processed 1100/1835 images
Processed 1200/1835 images
Processed 1300/1835 images
Processed 1400/1835 images
Processed 1500/1835 images
Processed 1600/1835 images
Processed 1700/1835 images
Processed 1800/1835 images


# Model 1

In [None]:
import gc
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Input, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Enable GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# Paths to the preprocessed data
base_dir = "/Users/arnavkarnik/Documents/Recognition-of-Medicinal-Plant-Species-Deep-Learning-Project--Sem6/Output"
rgb_dir = os.path.join(base_dir, "rgb")
venation_dir = os.path.join(base_dir, "venation")
edge_dir = os.path.join(base_dir, "edge")

# Image dimensions
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

# Custom data generator for multi-input model
class MultiInputDataGenerator:
    def __init__(self, rgb_generator, venation_generator, edge_generator):
        self.rgb_generator = rgb_generator
        self.venation_generator = venation_generator
        self.edge_generator = edge_generator
        self.n = len(self.rgb_generator)
        self.class_indices = self.rgb_generator.class_indices
        self.classes = self.rgb_generator.classes
        self.num_classes = len(self.class_indices)
        
    def __len__(self):
        return self.n
    
    def __getitem__(self, idx):
        rgb_batch = self.rgb_generator[idx]
        venation_batch = self.venation_generator[idx]
        edge_batch = self.edge_generator[idx]
        
        # Get the images and labels
        x_rgb = rgb_batch[0]
        x_venation = venation_batch[0]
        # Convert grayscale to 3-channel (required for ResNet input)
        x_venation = np.stack([x_venation[:,:,:,0]]*3, axis=-1)
        
        x_edge = edge_batch[0]
        # Convert grayscale to 3-channel
        x_edge = np.stack([x_edge[:,:,:,0]]*3, axis=-1)
        
        y = rgb_batch[1]  # Labels are the same for all generators
        
        return [x_rgb, x_venation, x_edge], y
    
    def reset(self):
        self.rgb_generator.reset()
        self.venation_generator.reset()
        self.edge_generator.reset()

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Only rescaling for validation data
val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# RGB data generators
train_rgb_generator = train_datagen.flow_from_directory(
    rgb_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

val_rgb_generator = val_datagen.flow_from_directory(
    rgb_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

# Venation data generators
train_venation_generator = train_datagen.flow_from_directory(
    venation_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='grayscale',
    subset='training',
    shuffle=True,
    seed=42
)

val_venation_generator = val_datagen.flow_from_directory(
    venation_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='grayscale',
    subset='validation',
    shuffle=False,
    seed=42
)

# Edge data generators
train_edge_generator = train_datagen.flow_from_directory(
    edge_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='grayscale',
    subset='training',
    shuffle=True,
    seed=42
)

val_edge_generator = val_datagen.flow_from_directory(
    edge_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='grayscale',
    subset='validation',
    shuffle=False,
    seed=42
)

# Create combined generators
train_generator = MultiInputDataGenerator(train_rgb_generator, train_venation_generator, train_edge_generator)
val_generator = MultiInputDataGenerator(val_rgb_generator, val_venation_generator, val_edge_generator)

# Build the three-stream model
def build_multi_stream_resnet50_model(num_classes):
    # RGB stream
    rgb_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="rgb_input")
    rgb_base = ResNet50(weights='imagenet', include_top=False, input_tensor=rgb_input)
    
    # Freeze early layers of ResNet
    for layer in rgb_base.layers[:-30]:  # Freeze all but the last 30 layers
        layer.trainable = False
    
    rgb_features = GlobalAveragePooling2D()(rgb_base.output)
    
    # Venation stream
    venation_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="venation_input")
    venation_base = ResNet50(weights='imagenet', include_top=False, input_tensor=venation_input)
    
    # Freeze early layers of ResNet
    for layer in venation_base.layers[:-30]:
        layer.trainable = False
        
    venation_features = GlobalAveragePooling2D()(venation_base.output)
    
    # Edge stream
    edge_input = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="edge_input")
    edge_base = ResNet50(weights='imagenet', include_top=False, input_tensor=edge_input)
    
    # Freeze early layers of ResNet
    for layer in edge_base.layers[:-30]:
        layer.trainable = False
        
    edge_features = GlobalAveragePooling2D()(edge_base.output)
    
    # Combine features from all streams
    combined_features = Concatenate()([rgb_features, venation_features, edge_features])
    
    # Classification head
    x = Dense(512, activation='relu')(combined_features)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    # Create the model
    model = Model(inputs=[rgb_input, venation_input, edge_input], outputs=outputs)
    
    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Build model
num_classes = len(train_rgb_generator.class_indices)
model = build_multi_stream_resnet50_model(num_classes)

# Print model summary
model.summary()

# Callbacks
checkpoint_path = "best_model.h5"
model_checkpoint = ModelCheckpoint(
    checkpoint_path,
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True,
    mode='max',
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# Train the model
EPOCHS = 50
steps_per_epoch = len(train_generator)
validation_steps = len(val_generator)

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_generator,
    validation_steps=validation_steps,
    callbacks=[model_checkpoint, early_stopping, reduce_lr],
    verbose=1
)

# Save the final model
model.save("final_venation_model.h5")

# Plot training results
def plot_training_results(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # Accuracy plot
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True)
    
    # Loss plot
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.savefig('training_results.png', dpi=300)
    plt.show()

plot_training_results(history)

# Evaluate the model
def evaluate_model(model, generator, class_names):
    # Get predictions
    y_true = []
    y_pred = []
    
    generator.reset()
    for i in range(len(generator)):
        x, y = generator[i]
        batch_pred = model.predict(x)
        batch_pred_classes = np.argmax(batch_pred, axis=1)
        batch_true_classes = np.argmax(y, axis=1)
        
        y_true.extend(batch_true_classes)
        y_pred.extend(batch_pred_classes)
    
    # Classification report
    print("\nClassification Report:")
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print(classification_report(y_true, y_pred, target_names=class_names))
    
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png', dpi=300)
    plt.show()
    
    return report

# Get class names
class_names = list(train_rgb_generator.class_indices.keys())

# Evaluate the model
report = evaluate_model(model, val_generator, class_names)

# Save classification report
import json
with open('classification_report.json', 'w') as f:
    json.dump(report, f)

print(f"Training complete. Model and results saved.")

# Function to visualize the attention to venation patterns
def visualize_feature_importance(model, sample_image_path, output_dir="feature_importance"):
    """
    Visualize which parts of the leaf the model pays attention to, especially venation patterns
    """
    import cv2
    from tensorflow.keras.preprocessing import image
    from tf_keras_vis.gradcam import Gradcam
    from tf_keras_vis.utils import normalize
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Load and preprocess a sample image
    rgb_img = cv2.imread(os.path.join(rgb_dir, sample_image_path))
    rgb_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2RGB)
    rgb_img = cv2.resize(rgb_img, (IMG_HEIGHT, IMG_WIDTH))
    rgb_img_array = np.expand_dims(rgb_img/255.0, axis=0)
    
    venation_img = cv2.imread(os.path.join(venation_dir, sample_image_path), cv2.IMREAD_GRAYSCALE)
    venation_img = cv2.resize(venation_img, (IMG_HEIGHT, IMG_WIDTH))
    venation_img_array = np.stack([venation_img/255.0]*3, axis=-1)
    venation_img_array = np.expand_dims(venation_img_array, axis=0)
    
    edge_img = cv2.imread(os.path.join(edge_dir, sample_image_path), cv2.IMREAD_GRAYSCALE)
    edge_img = cv2.resize(edge_img, (IMG_HEIGHT, IMG_WIDTH))
    edge_img_array = np.stack([edge_img/255.0]*3, axis=-1)
    edge_img_array = np.expand_dims(edge_img_array, axis=0)
    
    # Create a GradCAM instance
    gradcam = Gradcam(model)
    
    # Generate gradcam for each input
    # Note: This is a simplified approach - you may need to adapt this for multi-input models
    
    # Display the results
    plt.figure(figsize=(15, 10))
    
    plt.subplot(3, 2, 1)
    plt.imshow(rgb_img)
    plt.title('Original RGB Image')
    plt.axis('off')
    
    plt.subplot(3, 2, 3)
    plt.imshow(venation_img, cmap='gray')
    plt.title('Venation Map')
    plt.axis('off')
    
    plt.subplot(3, 2, 5)
    plt.imshow(edge_img, cmap='gray')
    plt.title('Edge Map')
    plt.axis('off')
    
    # Save the visualization
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"feature_importance_{os.path.basename(sample_image_path)}"), dpi=300)
    plt.show()

# Uncomment to visualize feature importance for a sample image
# sample_image_path = "class_name/image_filename.jpg"  # Replace with an actual image path
# visualize_feature_importance(model, sample_image_path)

SyntaxError: unterminated string literal (detected at line 25) (1539707832.py, line 25)