In [62]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.regularizers import l2

In [63]:
CONFIG = {
    'data_path': '../dataset/',
    'image_size': (160, 160),# downlgaded from 224,224 to 160,160
    'batch_size': 32,  # Increased batch size
    'epochs': 30,      # Reduced epochs
    'initial_learning_rate': 0.0005,  # Lower initial learning rate
    'num_classes': 10,
    'validation_split': 0.2,
    'weight_decay': 0.001,
    'dropout_rate': 0.2,
    'early_stopping_patience': 5,  # Reduced patience
    'min_epochs': 10   # Reduced minimum epochs
}

In [64]:
def display_sample_images(data_path, num_samples=2):
    """
    Display sample images from each category
    """
    categories = os.listdir(data_path)
    plt.figure(figsize=(15, 10))
    
    for idx, category in enumerate(categories):
        category_path = os.path.join(data_path, category)
        images = os.listdir(category_path)[:num_samples]
        
        for i, image in enumerate(images):
            plt.subplot(len(categories), num_samples, idx * num_samples + i + 1)
            img = Image.open(os.path.join(category_path, image))
            plt.imshow(img)
            plt.title(f'{category} - Sample {i+1}')
            plt.axis('off')
    
    plt.tight_layout()
    plt.show()

def dataset_statistics(data_path):
    """
    Display dataset statistics
    """
    categories = os.listdir(data_path)
    stats = {}
    
    for category in categories:
        category_path = os.path.join(data_path, category)
        num_images = len(os.listdir(category_path))
        stats[category] = num_images
    
    # Plot statistics
    plt.figure(figsize=(12, 6))
    plt.bar(stats.keys(), stats.values())
    plt.xticks(rotation=45)
    plt.title('Number of Images per Category')
    plt.xlabel('Category')
    plt.ylabel('Number of Images')
    plt.tight_layout()
    plt.show()
    
    return pd.DataFrame.from_dict(stats, orient='index', columns=['Count'])

In [65]:
def create_optimized_generators(config):
    """
    Enhanced data generators with better augmentation
    """
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        validation_split=config['validation_split']
    )
    
    train_generator = train_datagen.flow_from_directory(
        config['data_path'],
        target_size=config['image_size'],
        batch_size=config['batch_size'],
        class_mode='categorical',
        subset='training',
        shuffle=True
    )
    
    val_generator = train_datagen.flow_from_directory(
        config['data_path'],
        target_size=config['image_size'],
        batch_size=config['batch_size'],
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )
    
    return train_generator, val_generator

In [66]:
def build_model(config):
    """
    Enhanced model architecture with better regularization and learning strategies
    """
    # Use EfficientNetB0 with improved preprocessing
    base_model = EfficientNetB0(
        include_top=False,
        weights='imagenet',
        input_shape=(*config['image_size'], 3)
    )
    
    # Implement gradual unfreezing
    base_model.trainable = True
    for layer in base_model.layers[:-20]:  # Freeze more layers initially
        layer.trainable = False
    
    model = models.Sequential([
        # Input preprocessing and augmentation
        layers.RandomRotation(0.2),
        layers.RandomTranslation(0.1, 0.1),
        layers.RandomZoom(0.1),
        
        # Base model
        base_model,
        
        # Enhanced feature extraction
        layers.GlobalAveragePooling2D(),
        layers.BatchNormalization(),
        
        # First dense block with residual connection
        layers.Dense(512, use_bias=False),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.4),
        
        # Second dense block
        layers.Dense(256, use_bias=False),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.3),
        
        # Output layer
        layers.Dense(config['num_classes'], activation='softmax', dtype='float32')
    ])
    
    return model

In [72]:
def calculate_class_weights(train_generator):
    """
    Calculate balanced class weights based on the dataset statistics
    """
    class_counts = train_generator.classes.shape[0]
    class_weights = {}
    
    # Using inverse class frequency with smoothing
    for class_idx in range(train_generator.num_classes):
        count = np.sum(train_generator.classes == class_idx)
        weight = (1 / count) * (class_counts / train_generator.num_classes)
        # Apply smoothing to prevent extreme weights 
        weight = np.sqrt(weight)
        class_weights[class_idx] = weight
    
    return class_weights

## khaso yt7yd 

In [73]:
def train_model(model, train_generator, val_generator, config):
    """
    Enhanced training function with better learning rate schedule and callbacks
    """
    # Cosine decay learning rate schedule
    initial_learning_rate = config['initial_learning_rate']
    decay_steps = train_generator.samples // config['batch_size'] * config['epochs']
    lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
        initial_learning_rate, decay_steps, alpha=0.1
    )
    
    # Optimizer with gradient clipping
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=lr_schedule,
        clipnorm=1.0
    )
    optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)
    
    # Compile with label smoothing
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
        metrics=[
            tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
            tf.keras.metrics.TopKCategoricalAccuracy(k=2, name='top_2_accuracy', dtype='float32')
        ]
    )
    
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            'best_model.keras',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max',
            verbose=1
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            mode='min',
            verbose=1
        ),
        # tf.keras.callbacks.ReduceLROnPlateau(
        #     monitor='val_loss',
        #     factor=0.7,
        #     patience=5,
        #     min_lr=1e-6,
        #     verbose=1
        # )
    ]
    
    return model.fit(
        train_generator,
        epochs=config['epochs'],
        validation_data=val_generator,
        callbacks=callbacks,
        workers=4,
        use_multiprocessing=True
    )

In [69]:
def predict_document(model, image_path, config):
    """
    Make predictions on new images
    """
    # Load and preprocess image
    img = tf.keras.preprocessing.image.load_img(
        image_path,
        target_size=config['image_size']
    )
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array /= 255.
    
    # Make prediction
    prediction = model.predict(img_array)
    return prediction

In [70]:
def create_tf_dataset(generator, config):
    return tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32)
    ).prefetch(tf.data.AUTOTUNE)

In [71]:

def main():
    try:
        print("Creating data generators...")
        train_generator, val_generator = create_optimized_generators(CONFIG)
        
        # Set mixed precision policy
        policy = tf.keras.mixed_precision.Policy('mixed_float16')
        tf.keras.mixed_precision.set_global_policy(policy)
        
        # Build model
        print("Building and compiling model...")
        model = build_model(CONFIG)
        
        # Explicitly compile the model with float32 metrics
        optimizer = tf.keras.optimizers.Adam(learning_rate=CONFIG['initial_learning_rate'])
        optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)
        
        model.compile(
            optimizer=optimizer,
            loss='categorical_crossentropy',
            metrics=[
                tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
                tf.keras.metrics.TopKCategoricalAccuracy(k=2, name='top_2_accuracy', dtype='float32')
            ]
        )
        
        print("Model compiled successfully")
        print("Starting training...")
        history = train_model(model, train_generator, val_generator, CONFIG)
        
        return model, history
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None, None
# Clean up - remove any duplicate calls
if __name__ == "__main__":
    # Enable mixed precision training
    # tf.keras.mixed_precision.set_global_policy('mixed_float16')
    
    # Run main function
    model, history = main()

Creating data generators...
Found 2788 images belonging to 10 classes.
Found 694 images belonging to 10 classes.
Building and compiling model...
Model compiled successfully
Starting training...


  self._warn_if_super_not_called()


Epoch 1/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.1703 - loss: 3.2516 - top_2_accuracy: 0.3014
Epoch 1: val_accuracy improved from -inf to 0.17867, saving model to best_model.keras
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 4s/step - accuracy: 0.1709 - loss: 3.2466 - top_2_accuracy: 0.3024 - val_accuracy: 0.1787 - val_loss: 2.4174 - val_top_2_accuracy: 0.3501 - learning_rate: 0.0010
Epoch 2/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2375 - loss: 2.6494 - top_2_accuracy: 0.3980
Epoch 2: val_accuracy did not improve from 0.17867
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 4s/step - accuracy: 0.2377 - loss: 2.6490 - top_2_accuracy: 0.3981 - val_accuracy: 0.1787 - val_loss: 2.4148 - val_top_2_accuracy: 0.3026 - learning_rate: 0.0010
Epoch 3/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2388 - loss: 2.5942 - top_

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

OPTIMIZED_CONFIG = {
    'data_path': '/kaggle/input/scanned-images-dataset-for-ocr-and-vlm-finetuning/dataset',
    'image_size': (128, 128),
    'batch_size': 32,
    'epochs': 50,
    'initial_learning_rate': 0.0001,  # Reduced learning rate
    'num_classes': 10,
    'validation_split': 0.2,
    'weight_decay': 0.0005,  # Reduced weight decay
    'dropout_rate': 0.3,    # Reduced dropout
    'early_stopping_patience': 10  # Increased patience
}

def create_data_generators(config):
    """Create training and validation data generators"""
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=config['validation_split'],
        rotation_range=15,        # Reduced augmentation
        width_shift_range=0.1,    # Reduced augmentation
        height_shift_range=0.1,   # Reduced augmentation
        zoom_range=0.1,          # Reduced augmentation
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    train_generator = train_datagen.flow_from_directory(
        config['data_path'],
        target_size=config['image_size'],
        batch_size=config['batch_size'],
        class_mode='categorical',
        subset='training',
        shuffle=True
    )
    
    validation_generator = train_datagen.flow_from_directory(
        config['data_path'],
        target_size=config['image_size'],
        batch_size=config['batch_size'],
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )
    
    # Make generators repeat indefinitely
    return train_generator, validation_generator

def create_model(config):
    model = models.Sequential([
        layers.Input(shape=(*config['image_size'], 3)),
        
        # Simplified architecture with gradual scaling
        layers.Conv2D(32, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(config['weight_decay'])),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(config['weight_decay'])),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(128, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=tf.keras.regularizers.l2(config['weight_decay'])),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu',
                    kernel_regularizer=tf.keras.regularizers.l2(config['weight_decay'])),
        layers.BatchNormalization(),
        layers.Dropout(config['dropout_rate']),
        layers.Dense(config['num_classes'], activation='softmax')
    ])
    
    return model

def main(config):
    train_generator, validation_generator = create_data_generators(config)
    
    # Calculate steps correctly
    steps_per_epoch = train_generator.samples // config['batch_size']
    validation_steps = validation_generator.samples // config['batch_size']
    
    model = create_model(config)
    
    optimizer = tf.keras.optimizers.Adam(
        config['initial_learning_rate'],
        clipnorm=1.0  # Added gradient clipping
    )
    
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=config['early_stopping_patience'],
            restore_best_weights=True,
            min_delta=0.001  # Added minimum change threshold
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,     # More gradual reduction
            patience=3,
            min_lr=1e-6
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath='best_model.keras',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max'
        )
    ]
    
    history = model.fit(
        train_generator,
        epochs=config['epochs'],
        steps_per_epoch=steps_per_epoch,
        validation_data=validation_generator,
        validation_steps=validation_steps,
        callbacks=callbacks,
    )
    
    return model, history

if __name__ == "__main__":
    model, history = main(OPTIMIZED_CONFIG)