## 1. Setup and Check GPU

In [4]:
import tensorflow as tf
import os

# Check TensorFlow version and GPU availability
print("TensorFlow version:", tf.__version__)

# Check for GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"\n{'='*60}")
        print(f"GPU ENABLED: {len(gpus)} GPU(s) detected")
        for i, gpu in enumerate(gpus):
            print(f"  GPU {i}: {gpu.name}")
        print(f"{'='*60}\n")
    except RuntimeError as e:
        print(f"GPU configuration error: {e}")
else:
    print("\nNo GPU detected. Running on CPU.")
    print("In Colab: Runtime > Change runtime type > Hardware accelerator > GPU")

TensorFlow version: 2.20.0

No GPU detected. Running on CPU.
In Colab: Runtime > Change runtime type > Hardware accelerator > GPU


## 2. Install Required Libraries



In [2]:
# AUTOMATIC INSTALLATION - Run this cell to install all required packages
# This cell will install packages automatically for a blank Python environment

import sys
import subprocess

def install_packages():
    packages = [
        'tensorflow>=2.10.0',  # TensorFlow with GPU support
        'numpy>=1.21.0',
        'pillow>=9.0.0',
        'tqdm>=4.64.0',
        'matplotlib>=3.5.0',
    ]
    
    print("🔧 Installing required packages for blank environment...")
    print("="*60)
    
    for package in packages:
        print(f"\n📦 Installing {package}...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        except subprocess.CalledProcessError as e:
            print(f"⚠️ Error installing {package}: {e}")
            print("Continuing with next package...")
    
    print("\n" + "="*60)
    print("✅ Package installation completed!")
    print("="*60)
    print("\n⚠️ IMPORTANT: For GPU support, ensure you have:")
    print("  1. NVIDIA GPU with CUDA Compute Capability 3.5+")
    print("  2. CUDA Toolkit 11.2 or higher")
    print("  3. cuDNN 8.1 or higher")
    print("\n💡 After installation, restart the kernel if needed.")
    print("   Kernel > Restart Kernel")

# Run installation automatically
print("Starting automatic package installation...\n")
install_packages()

Starting automatic package installation...

🔧 Installing required packages for blank environment...

📦 Installing tensorflow>=2.10.0...

📦 Installing numpy>=1.21.0...

📦 Installing pillow>=9.0.0...

📦 Installing tqdm>=4.64.0...

📦 Installing matplotlib>=3.5.0...

✅ Package installation completed!

⚠️ IMPORTANT: For GPU support, ensure you have:
  1. NVIDIA GPU with CUDA Compute Capability 3.5+
  2. CUDA Toolkit 11.2 or higher
  3. cuDNN 8.1 or higher

💡 After installation, restart the kernel if needed.
   Kernel > Restart Kernel


In [3]:
# Create requirements.txt file for easy package management
requirements_content = """# Image Captioning Project Requirements
# Install all with: pip install -r requirements.txt

tensorflow>=2.10.0
numpy>=1.21.0
pillow>=9.0.0
tqdm>=4.64.0
matplotlib>=3.5.0

# Optional: For Kaggle dataset download
# kaggle>=1.5.12
"""

with open('requirements.txt', 'w') as f:
    f.write(requirements_content)

print("✓ requirements.txt created!")
print("\nTo install all packages, run in terminal:")
print("  pip install -r requirements.txt")
print("\nFor GPU support, ensure CUDA and cuDNN are installed.")

✓ requirements.txt created!

To install all packages, run in terminal:
  pip install -r requirements.txt

For GPU support, ensure CUDA and cuDNN are installed.


## 3. Import Libraries

In [5]:
import string
import numpy as np
from PIL import Image
from pickle import dump, load
import time

from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import add, concatenate, multiply, RepeatVector, Reshape, BatchNormalization
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tqdm import tqdm

print("All libraries imported successfully!")

All libraries imported successfully!


## 4. Configure Local Dataset Paths

Set up paths to your local Flickr8k dataset

In [6]:
# LOCAL SETUP: Configure your dataset paths here
# Download Flickr8k from: https://www.kaggle.com/datasets/adityajn105/flickr8k

# Option 1: Set your local paths (RECOMMENDED)
base_path = r"path to your dataset"
dataset_text = os.path.join(base_path, "Flickr8k_text")
dataset_images = os.path.join(base_path, "Flicker8k_Dataset")

# Option 2: Or use relative paths if dataset is in the same directory
# base_path = "Flickr8k"
# dataset_text = os.path.join(base_path, "Flickr8k_text")
# dataset_images = os.path.join(base_path, "Flicker8k_Dataset")

print("Dataset Configuration:")
print(f"  Text files: {dataset_text}")
print(f"  Images: {dataset_images}")

# Verify paths exist
if os.path.exists(dataset_text) and os.path.exists(dataset_images):
    print("\n✓ Dataset paths found!")
    print(f"  Images found: {len([f for f in os.listdir(dataset_images) if f.endswith(('.jpg', '.png'))])} files")
else:
    print("\n⚠ WARNING: Dataset paths not found!")
    print("  Please update the paths above to point to your Flickr8k dataset location.")

Dataset Configuration:
  Text files: C:\Users\jebar\OneDrive\Bureau\Deep Learning\Flickr8k\Flickr8k_text
  Images: C:\Users\jebar\OneDrive\Bureau\Deep Learning\Flickr8k\Flicker8k_Dataset

✓ Dataset paths found!
  Images found: 8091 files


## 5. Download Dataset (If Needed)

If you don't have the Flickr8k dataset yet, download it from Kaggle

## 6. Data Loading Functions

In [7]:
def load_doc(filename):
    """Load a text file into memory"""
    with open(filename, 'r') as file:
        text = file.read()
    return text

def all_img_captions(filename):
    """Extract all image captions from the token file"""
    file = load_doc(filename)
    captions = file.split('\n')
    descriptions = {}
    for caption in captions[:-1]:
        img, caption = caption.split('\t')
        if img[:-2] not in descriptions:
            descriptions[img[:-2]] = [caption]
        else:
            descriptions[img[:-2]].append(caption)
    return descriptions

print("Data loading functions defined!")

Data loading functions defined!


## 7. Text Cleaning Functions

In [8]:
def cleaning_text(captions):
    """Clean and preprocess text captions"""
    table = str.maketrans('', '', string.punctuation)
    for img, caps in captions.items():
        for i, img_caption in enumerate(caps):
            img_caption = img_caption.replace("-", " ")
            desc = img_caption.split()
            desc = [word.lower() for word in desc]
            desc = [word.translate(table) for word in desc]
            desc = [word for word in desc if len(word) > 1]
            desc = [word for word in desc if word.isalpha()]
            img_caption = ' '.join(desc)
            captions[img][i] = img_caption
    return captions

def text_vocabulary(descriptions):
    """Build vocabulary of all unique words"""
    vocab = set()
    for key in descriptions.keys():
        [vocab.update(d.split()) for d in descriptions[key]]
    return vocab

def save_descriptions(descriptions, filename):
    """Save descriptions to file"""
    lines = []
    for key, desc_list in descriptions.items():
        for desc in desc_list:
            lines.append(key + '\t' + desc)
    data = "\n".join(lines)
    with open(filename, "w") as file:
        file.write(data)

print("Text cleaning functions defined!")

Text cleaning functions defined!


## 8. Prepare Text Data

In [9]:
if os.path.exists("descriptions.txt"):
    print("descriptions.txt already exists. Skipping preprocessing...")
else:
    print("Processing captions...")
    filename = os.path.join(dataset_text, "Flickr8k.token.txt")
    
    # Load all captions
    descriptions = all_img_captions(filename)
    print(f"Loaded {len(descriptions)} images with captions")
    
    # Clean descriptions
    clean_descriptions = cleaning_text(descriptions)
    
    # Build vocabulary
    vocabulary = text_vocabulary(clean_descriptions)
    print(f"Vocabulary size: {len(vocabulary)} words")
    
    # Save to file
    save_descriptions(clean_descriptions, "descriptions.txt")
    print("descriptions.txt created successfully!")

Processing captions...
Loaded 8092 images with captions
Vocabulary size: 8422 words
descriptions.txt created successfully!


## 9. Feature Extraction with Xception

In [10]:
def extract_features(directory, batch_size=16):
    """Extract features from images using Xception model"""
    # Load Xception model
    model = Xception(include_top=False, pooling='avg', weights='imagenet')
    print("Xception model loaded!")
    
    features = {}
    valid_images = ['.jpg', '.jpeg', '.png']
    
    # Get all image paths
    image_paths = []
    for img in os.listdir(directory):
        ext = os.path.splitext(img)[1].lower()
        if ext in valid_images:
            image_paths.append(img)
    
    print(f"Found {len(image_paths)} images to process")
    
    # Process in batches for efficiency on GPU
    for i in tqdm(range(0, len(image_paths), batch_size), desc="Extracting features"):
        batch_paths = image_paths[i:i + batch_size]
        batch_images = []
        valid_batch_names = []
        
        for img in batch_paths:
            filepath = os.path.join(directory, img)
            try:
                image = Image.open(filepath)
                image = image.resize((299, 299))
                image = np.array(image)
                
                # Handle grayscale
                if len(image.shape) == 2:
                    image = np.stack([image] * 3, axis=-1)
                # Handle RGBA
                elif image.shape[2] == 4:
                    image = image[:, :, :3]
                
                image = preprocess_input(image)
                batch_images.append(image)
                valid_batch_names.append(img)
            except Exception as e:
                print(f"Error processing {img}: {e}")
                continue
        
        if batch_images:
            batch_array = np.array(batch_images)
            batch_features = model.predict(batch_array, verbose=0)
            
            for idx, img_name in enumerate(valid_batch_names):
                features[img_name] = batch_features[idx].reshape(1, -1)
    
    return features

print("Feature extraction function defined!")

Feature extraction function defined!


## 10. Extract and Save Features

In [11]:
if os.path.exists("features.pkl"):
    print("features.pkl already exists. Loading features...")
    features = load(open("features.pkl", "rb"))
    print(f"Loaded features for {len(features)} images")
else:
    print("Extracting features from images...")
    print("This may take several minutes with GPU...")
    
    features = extract_features(dataset_images, batch_size=16)
    
    # Save features
    dump(features, open("features.pkl", "wb"))
    print(f"Features extracted and saved for {len(features)} images!")

Extracting features from images...
This may take several minutes with GPU...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
Xception model loaded!
Found 8091 images to process


Extracting features: 100%|███████████████████████████████████████████████████████████| 506/506 [26:00<00:00,  3.08s/it]


Features extracted and saved for 8091 images!


## 11. Load Training Data

In [12]:
def load_photos(filename):
    """Load list of photo identifiers"""
    file = load_doc(filename)
    photos = file.split("\n")[:-1]
    return photos

def load_clean_descriptions(filename, photos):
    """Load clean descriptions for photos"""
    file = load_doc(filename)
    descriptions = {}
    for line in file.split("\n"):
        words = line.split()
        if len(words) < 1:
            continue
        image, image_caption = words[0], words[1:]
        if image in photos:
            if image not in descriptions:
                descriptions[image] = []
            desc = '<start> ' + " ".join(image_caption) + ' <end>'
            descriptions[image].append(desc)
    return descriptions

def load_features(photos, all_features):
    """Load features for specific photos"""
    return {k: all_features[k] for k in photos if k in all_features}

# Load training set
train_filename = os.path.join(dataset_text, "Flickr_8k.trainImages.txt")
train_imgs = load_photos(train_filename)
train_descriptions = load_clean_descriptions("descriptions.txt", train_imgs)
train_features = load_features(train_imgs, features)

print(f"Training set: {len(train_imgs)} images")
print(f"Loaded descriptions for {len(train_descriptions)} images")
print(f"Loaded features for {len(train_features)} images")

Training set: 6000 images
Loaded descriptions for 6000 images
Loaded features for 6000 images


## 12. Create Tokenizer

In [13]:
def dict_to_list(descriptions):
    """Convert dictionary to list of descriptions"""
    all_desc = []
    for key in descriptions.keys():
        [all_desc.append(d) for d in descriptions[key]]
    return all_desc

def create_tokenizer(descriptions):
    """Create tokenizer for text"""
    desc_list = dict_to_list(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(desc_list)
    return tokenizer

# Create and save tokenizer
if os.path.exists('tokenizer.pkl'):
    print("Loading existing tokenizer...")
    tokenizer = load(open('tokenizer.pkl', 'rb'))
else:
    print("Creating tokenizer...")
    tokenizer = create_tokenizer(train_descriptions)
    dump(tokenizer, open('tokenizer.pkl', 'wb'))
    print("Tokenizer created and saved!")

vocab_size = len(tokenizer.word_index) + 1
print(f"Vocabulary size: {vocab_size}")

Creating tokenizer...
Tokenizer created and saved!
Vocabulary size: 7318


## 13. Calculate Maximum Sequence Length

In [14]:
def max_length(descriptions):
    """Calculate maximum length of descriptions"""
    desc_list = dict_to_list(descriptions)
    return max(len(d.split()) for d in desc_list)

max_length = max_length(train_descriptions)
print(f"Maximum sequence length: {max_length}")

Maximum sequence length: 35


## 14. Data Generator



In [15]:
def create_sequences(tokenizer, max_length, desc_list, feature, vocab_size, label_smoothing=0.1):
    """Create input-output sequence pairs with optional label smoothing"""
    X1, X2, y = [], [], []
    for desc in desc_list:
        seq = tokenizer.texts_to_sequences([desc])[0]
        for i in range(1, len(seq)):
            in_seq, out_seq = seq[:i], seq[i]
            # CRITICAL FIX: Use padding='post' for right-padding (required for cuDNN LSTM on GPU)
            in_seq = pad_sequences([in_seq], maxlen=max_length, padding='post')[0]
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
            
            # Apply label smoothing for better generalization
            if label_smoothing > 0:
                out_seq = out_seq * (1 - label_smoothing) + (label_smoothing / vocab_size)
            
            X1.append(feature)
            X2.append(in_seq)
            y.append(out_seq)
    return np.array(X1), np.array(X2), np.array(y)

def data_generator(descriptions, features, tokenizer, max_length, vocab_size, batch_size=32, label_smoothing=0.1):
    """Generate batches of data for training with label smoothing"""
    def generator():
        while True:
            for key, description_list in descriptions.items():
                if key not in features:
                    continue
                feature = features[key][0]
                input_image, input_sequence, output_word = create_sequences(
                    tokenizer, max_length, description_list, feature, vocab_size, label_smoothing
                )
                for i in range(len(input_image)):
                    yield {'input_1': input_image[i], 'input_2': input_sequence[i]}, output_word[i]
    
    output_signature = (
        {
            'input_1': tf.TensorSpec(shape=(2048,), dtype=tf.float32),
            'input_2': tf.TensorSpec(shape=(max_length,), dtype=tf.int32)
        },
        tf.TensorSpec(shape=(vocab_size,), dtype=tf.float32)
    )
    
    dataset = tf.data.Dataset.from_generator(generator, output_signature=output_signature)
    return dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

print("Enhanced data generator with label smoothing defined!")

Enhanced data generator with label smoothing defined!


## 15. Define Model

In [16]:
def define_model(vocab_size, max_length):
    """Define the enhanced image captioning model with attention-like mechanism"""
    # Image feature input - Deeper processing with residual connection
    inputs1 = Input(shape=(2048,), name='input_1')
    fe1 = Dense(512, activation='relu')(inputs1)
    fe2 = BatchNormalization()(fe1)
    fe3 = Dropout(0.4)(fe2)
    fe4 = Dense(512, activation='relu')(fe3)
    fe5 = BatchNormalization()(fe4)
    # Residual connection
    fe_residual = add([fe1, fe4])
    fe6 = Dense(256, activation='relu')(fe_residual)
    
    # Sequence input - Deeper LSTM with more capacity
    inputs2 = Input(shape=(max_length,), name='input_2')
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.4)(se1)
    # First LSTM layer with return_sequences
    se3 = LSTM(512, return_sequences=True, recurrent_dropout=0.2)(se2)
    se4 = BatchNormalization()(se3)
    # Second LSTM layer
    se5 = LSTM(512, return_sequences=False, recurrent_dropout=0.2)(se4)
    se6 = BatchNormalization()(se5)
    se7 = Dense(256, activation='relu')(se6)
    
    # Advanced decoder with gating mechanism (attention-like)
    # Create a gating vector to weight image and text features
    combined = concatenate([fe6, se7])
    
    # Gating mechanism - learns to balance image vs text importance
    gate = Dense(256, activation='sigmoid', name='gate')(combined)
    gated_image = multiply([fe6, gate])
    gated_text = multiply([se7, gate])
    
    # Combine gated features
    decoder1 = concatenate([gated_image, gated_text])
    decoder2 = Dense(512, activation='relu')(decoder1)
    decoder3 = BatchNormalization()(decoder2)
    decoder4 = Dropout(0.5)(decoder3)
    decoder5 = Dense(512, activation='relu')(decoder4)
    decoder6 = BatchNormalization()(decoder5)
    decoder7 = Dropout(0.5)(decoder6)
    decoder8 = Dense(256, activation='relu')(decoder7)
    outputs = Dense(vocab_size, activation='softmax')(decoder8)
    
    # Create model
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    
    # Use a more sophisticated optimizer configuration
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=0.0003,  # Slightly higher initial LR
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7
    )
    
    model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy']
    )
    
    return model

print("Enhanced model architecture with gating mechanism defined!")

Enhanced model architecture with gating mechanism defined!


## 16. Create Model and View Summary

In [17]:
model = define_model(vocab_size, max_length)
print(model.summary())

# Calculate training steps
def get_steps_per_epoch(descriptions, batch_size=32):
    total_sequences = 0
    for img_captions in descriptions.values():
        for caption in img_captions:
            words = caption.split()
            total_sequences += len(words) - 1
    return max(1, total_sequences // batch_size)

batch_size = 32
steps_per_epoch = get_steps_per_epoch(train_descriptions, batch_size)
print(f"\nSteps per epoch: {steps_per_epoch}")

None

Steps per epoch: 9596


## 17. Setup Training Callbacks

In [18]:
# Create models directory
os.makedirs("models", exist_ok=True)

# Define callbacks
checkpoint = ModelCheckpoint(
    filepath='models/best_model.keras',
    monitor='loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

# More aggressive learning rate schedule
reduce_lr = ReduceLROnPlateau(
    monitor='loss',
    factor=0.3,  # Reduce by 70%
    patience=2,
    min_lr=0.00000001,
    verbose=1
)

# Add early stopping
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='loss',
    patience=6,  # More patience for better convergence
    restore_best_weights=True,
    verbose=1
)

# Add learning rate warmup using custom callback
class WarmUpLearningRate(tf.keras.callbacks.Callback):
    def __init__(self, warmup_epochs=3, initial_lr=0.00003, target_lr=0.0003):
        super(WarmUpLearningRate, self).__init__()
        self.warmup_epochs = warmup_epochs
        self.initial_lr = initial_lr
        self.target_lr = target_lr
        self.current_epoch = 0
    
    def on_epoch_begin(self, epoch, logs=None):
        self.current_epoch = epoch
        if epoch < self.warmup_epochs:
            # Gradually increase learning rate
            lr = self.initial_lr + (self.target_lr - self.initial_lr) * (epoch / self.warmup_epochs)
            # Fix for newer TensorFlow versions - use learning_rate instead of lr
            self.model.optimizer.learning_rate.assign(lr)
            print(f"Warmup: Setting learning rate to {lr:.6f}")

warmup = WarmUpLearningRate(warmup_epochs=3, initial_lr=0.00003, target_lr=0.0003)

print("Enhanced callbacks configured with learning rate warmup!")

Enhanced callbacks configured with learning rate warmup!


## 18. Train the Model

In [19]:
# Training configuration
epochs = 25  # More epochs with early stopping
batch_size = 64  # Larger batch size for more stable gradients

# Recalculate steps with new batch size
steps_per_epoch = get_steps_per_epoch(train_descriptions, batch_size)

print(f"\nEnhanced Training Configuration:")
print(f"  Epochs: {epochs}")
print(f"  Batch size: {batch_size} (increased for stability)")
print(f"  Steps per epoch: {steps_per_epoch}")
print(f"  Vocabulary size: {vocab_size}")
print(f"  Max sequence length: {max_length}")
print(f"\nOptimizations:")
print(f"  ✓ Gating mechanism for feature fusion")
print(f"  ✓ Residual connections in encoder")
print(f"  ✓ Deeper LSTM with recurrent dropout")
print(f"  ✓ Learning rate warmup (3 epochs)")
print(f"  ✓ Aggressive LR reduction on plateau")
print(f"\nStarting training...\n")

# Train the model
best_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
    print(f"\n{'='*60}")
    print(f"Epoch {epoch + 1}/{epochs}")
    print(f"{'='*60}")
    
    # Create fresh dataset for each epoch
    dataset = data_generator(
        train_descriptions,
        train_features,
        tokenizer,
        max_length,
        vocab_size,
        batch_size
    )
    
    # Train for one epoch with all callbacks
    history = model.fit(
        dataset,
        epochs=1,
        steps_per_epoch=steps_per_epoch,
        callbacks=[checkpoint, reduce_lr, early_stop, warmup],
        verbose=1
    )
    
    # Save model after each epoch
    model_path = f"models/model_epoch_{epoch + 1}.keras"
    model.save(model_path)
    print(f"Model saved: {model_path}")
    
    # Save as latest
    model.save("models/latest_model.keras")
    
    # Print metrics
    loss = history.history['loss'][0]
    acc = history.history['accuracy'][0]
    # Fix for newer TensorFlow versions - use learning_rate instead of lr
    current_lr = float(model.optimizer.learning_rate.numpy())
    
    print(f"\nEpoch {epoch + 1} Results:")
    print(f"  Loss: {loss:.4f}")
    print(f"  Accuracy: {acc:.4f} ({acc*100:.2f}%)")
    print(f"  Learning Rate: {current_lr:.8f}")
    
    # Track improvement
    if loss < best_loss:
        improvement = best_loss - loss
        best_loss = loss
        patience_counter = 0
        print(f"  ✓ Improvement: {improvement:.4f}")
    else:
        patience_counter += 1
        print(f"  ⚠ No improvement (patience: {patience_counter}/6)")
    
    # Check if early stopping triggered
    if early_stop.stopped_epoch > 0:
        print(f"\n{'='*60}")
        print(f"Early stopping triggered at epoch {epoch + 1}")
        print(f"Best loss achieved: {best_loss:.4f}")
        print(f"{'='*60}")
        break

print("\n" + "="*60)
print("Training completed!")
print(f"Final best loss: {best_loss:.4f}")
print("="*60)


Enhanced Training Configuration:
  Epochs: 25
  Batch size: 64 (increased for stability)
  Steps per epoch: 4798
  Vocabulary size: 7318
  Max sequence length: 35

Optimizations:
  ✓ Gating mechanism for feature fusion
  ✓ Residual connections in encoder
  ✓ Deeper LSTM with recurrent dropout
  ✓ Learning rate warmup (3 epochs)
  ✓ Aggressive LR reduction on plateau

Starting training...


Epoch 1/25
Warmup: Setting learning rate to 0.000030
[1m4798/4798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1093 - loss: 6.7520
Epoch 1: loss improved from None to 6.20228, saving model to models/best_model.keras
[1m4798/4798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6330s[0m 1s/step - accuracy: 0.1347 - loss: 6.2023 - learning_rate: 3.0000e-05
Restoring model weights from the end of the best epoch: 1.
Model saved: models/model_epoch_1.keras

Epoch 1 Results:
  Loss: 6.2023
  Accuracy: 0.1347 (13.47%)
  Learning Rate: 0.00003000
  ✓ Improvement: inf

Epoch 2

KeyboardInterrupt: 