In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
import shutil
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# IMPROVED: Enhanced data augmentation for better field robustness
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.35),                   # Increased from 0.1
    layers.RandomZoom(0.35),                       # Increased from 0.1
    layers.RandomContrast(0.45),                   # Increased from 0.3
    layers.RandomBrightness(0.35),                 # Increased from 0.1
    layers.RandomTranslation(0.25, 0.25),          # Added for positioning
    layers.CenterCrop(200, 200),
])

# IMPROVED: Add attention mechanism for better feature focus
def attention_block(x, filters):
    # Squeeze-and-Excitation attention
    se = layers.GlobalAveragePooling2D()(x)
    se = layers.Dense(filters // 16, activation='relu')(se)
    se = layers.Dense(filters, activation='sigmoid')(se)
    se = layers.Reshape((1, 1, filters))(se)
    return layers.Multiply()([x, se])

In [None]:
# IMPROVED: Your original model structure with key enhancements
cnn = tf.keras.models.Sequential()

# Add improved data augmentation
cnn.add(data_augmentation)
cnn.add(layers.Rescaling(1./255))
cnn.add(layers.GaussianNoise(0.15))  # Increased noise robustness

# Block 1 - Your original structure with improvements
cnn.add(layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(200, 200, 3)))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Conv2D(32, 3, activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.MaxPooling2D(2, 2))
cnn.add(layers.Dropout(0.1))

# Block 2 - Your original structure + attention
cnn.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Conv2D(64, 3, activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Lambda(lambda x: attention_block(x, 64)))  # IMPROVED: Add attention
cnn.add(layers.MaxPooling2D(2, 2))
cnn.add(layers.Dropout(0.15))  # Slightly increased

# Block 3 - Your original structure + attention
cnn.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Conv2D(128, 3, activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Lambda(lambda x: attention_block(x, 128)))  # IMPROVED: Add attention
cnn.add(layers.MaxPooling2D(2, 2))
cnn.add(layers.Dropout(0.2))

# Block 4 - Your original structure + attention
cnn.add(layers.Conv2D(256, 3, padding='same', activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Conv2D(256, 3, activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Lambda(lambda x: attention_block(x, 256)))  # IMPROVED: Add attention
cnn.add(layers.MaxPooling2D(2, 2))
cnn.add(layers.Dropout(0.25))  # Increased

# Block 5 - Your original structure + attention
cnn.add(layers.Conv2D(512, 3, padding='same', activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Conv2D(512, 3, activation='relu'))
cnn.add(layers.BatchNormalization())
cnn.add(layers.Lambda(lambda x: attention_block(x, 512)))  # IMPROVED: Add attention
cnn.add(layers.MaxPooling2D(2, 2))

# IMPROVED: Enhanced output structure
cnn.add(layers.Dropout(0.35))  # Increased from 0.3
cnn.add(layers.GlobalAveragePooling2D())
cnn.add(layers.Dense(1024, activation='relu'))  # Increased from 512
cnn.add(layers.Dropout(0.5))
cnn.add(layers.Dense(256, activation='relu'))   # Added extra layer
cnn.add(layers.Dropout(0.3))
cnn.add(layers.Dense(num_classes, activation='softmax'))

cnn.summary()

In [None]:
# IMPROVED: Enhanced training configuration
# Warm restart cosine decay for better convergence
lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
    initial_learning_rate=1.5e-3,  # Slightly higher initial LR
    first_decay_steps=1000,
    t_mul=2.0,
    m_mul=0.9,
    alpha=1e-6
)

# IMPROVED: Better optimizer with higher weight decay
optimizer = tf.keras.optimizers.AdamW(
    learning_rate=lr_schedule,
    weight_decay=2e-4,  # Increased from 1e-4
    beta_1=0.9,
    beta_2=0.999
)

# IMPROVED: Enhanced focal loss for better classification
def improved_focal_loss(alpha=0.3, gamma=2.5):  # Adjusted parameters
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        p_t = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
        alpha_factor = tf.ones_like(y_true) * alpha
        alpha_t = tf.where(tf.equal(y_true, 1), alpha_factor, 1 - alpha_factor)
        cross_entropy = -tf.math.log(p_t)
        weight = alpha_t * tf.pow((1 - p_t), gamma)
        loss = weight * cross_entropy
        return tf.reduce_mean(tf.reduce_sum(loss, axis=1))
    return focal_loss_fixed

cnn.compile(
    optimizer=optimizer, 
    loss=improved_focal_loss(),
    metrics=['accuracy', 'top_k_categorical_accuracy']  # Added top-k metric
)

In [None]:
# IMPROVED: Calculate class weights for balanced training
def get_class_weights(dataset):
    labels = []
    for _, label_batch in dataset:
        labels.extend(np.argmax(label_batch.numpy(), axis=1))
    
    class_weights = compute_class_weight(
        'balanced',
        classes=np.unique(labels),
        y=labels
    )
    return dict(enumerate(class_weights))

class_weights = get_class_weights(train_ds)
print(f"Class weights: {class_weights}")

In [None]:
# IMPROVED: Enhanced callbacks with more sophisticated monitoring
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=25,  # Increased patience
        restore_best_weights=True,
        verbose=1,
        min_delta=0.001  # Added minimum improvement threshold
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,  # More aggressive reduction
        patience=8,  # Increased patience
        min_lr=1e-8,
        verbose=1,
        cooldown=3  # Added cooldown period
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'ImprovedEnhancedRiceModel.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1,
        save_weights_only=False
    ),
    # IMPROVED: Add learning rate logging
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        verbose=1
    )
]

In [None]:
# IMPROVED: Optimize dataset performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# IMPROVED: Enhanced training with class weights
EPOCHS = 75  # Increased epochs for better convergence

print("Starting improved enhanced training...")
history = cnn.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
    callbacks=callbacks,
    class_weight=class_weights,  # IMPROVED: Add class balancing
    verbose=1
)

In [None]:
# IMPROVED: Enhanced evaluation and visualization
def plot_improved_training_history(history):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
    
    # Accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
    ax1.set_title('Model Accuracy', fontsize=14, fontweight='bold')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Loss
    ax2.plot(history.history['loss'], label='Training Loss', linewidth=2)
    ax2.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
    ax2.set_title('Model Loss', fontsize=14, fontweight='bold')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Top-K Accuracy
    if 'top_k_categorical_accuracy' in history.history:
        ax3.plot(history.history['top_k_categorical_accuracy'], label='Training Top-K', linewidth=2)
        ax3.plot(history.history['val_top_k_categorical_accuracy'], label='Validation Top-K', linewidth=2)
        ax3.set_title('Top-K Accuracy', fontsize=14, fontweight='bold')
        ax3.set_xlabel('Epoch')
        ax3.set_ylabel('Top-K Accuracy')
        ax3.legend()
        ax3.grid(True, alpha=0.3)
    
    # Learning Rate
    if 'lr' in history.history:
        ax4.plot(history.history['lr'], linewidth=2, color='red')
        ax4.set_title('Learning Rate Schedule', fontsize=14, fontweight='bold')
        ax4.set_xlabel('Epoch')
        ax4.set_ylabel('Learning Rate')
        ax4.set_yscale('log')
        ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

plot_improved_training_history(history)

In [None]:
# IMPROVED: Comprehensive evaluation and save
print("Evaluating improved enhanced model...")
test_loss, test_acc, test_top_k = cnn.evaluate(val_ds, verbose=0)
print(f"Improved Model Accuracy: {test_acc:.4f}")
print(f"Improved Model Top-K Accuracy: {test_top_k:.4f}")
print(f"Improved Model Loss: {test_loss:.4f}")

# Save the improved model
cnn.save('ImprovedEnhancedCNN.keras')
print("Improved model saved as 'ImprovedEnhancedCNN.keras'")

# IMPROVED: Save training history
import pickle
with open('improved_training_history.pkl', 'wb') as f:
    pickle.dump(history.history, f)
print("Training history saved as 'improved_training_history.pkl'")