In [1]:
from google.colab import drive
drive.mount('/content/drive')
print('Drive mounted')

Mounted at /content/drive
Drive mounted


In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
from collections import Counter
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_fscore_support

## Training Environment and Computational Considerations

Due to the substantial computational requirements associated with training deep neural networks, **Google Colaboratory** was selected as the primary training environment. This decision was motivated by the impracticality of training large-scale, pre-trained architectures locally within reasonable time constraints.

### Model Architectures

The following architectures were trained and evaluated:

* **ResNet152V2**
* **EfficientNetV2B0**
* **InceptionResNetV2**
* **MobileNetV3Large**
* **Custom-CNN**

### Hardware Acceleration

All experiments were conducted using **NVIDIA A100 GPU accelerators**, which significantly reduced training time and enabled the execution of large-scale experiments that would otherwise be infeasible on local hardware.

---

## Data Loading Bottleneck

During the initial training phase, a substantial **I/O bottleneck** was observed when loading image data directly from **Google Drive**. This bottleneck severely impacted training throughput by underutilizing GPU resources, as the data loading process could not keep pace with model execution.

---

## TFRecord-Based Data Pipeline Optimization

To address this limitation, the dataset was converted into **TFRecord format**, a binary storage format optimized for TensorFlow workflows. This approach offers several key advantages:

* **Efficient sequential reading** of data
* **Reduced disk I/O overhead** through contiguous data storage
* **Serialized data representation**, enabling faster parsing
* **Asynchronous data loading and prefetching**, overlapping I/O with computation

By enabling the data pipeline to operate concurrently with model training, this optimization significantly **maximized GPU utilization** and **reduced idle time during batch loading**, leading to a more efficient and stable training process.



# See TFRecord_Pipeline_HAM10000.ipynb for the implementation

In [3]:
TFRECORD_DIR = '/content/drive/MyDrive/HAM10000/processed_tfrecords/tfrecords'
METADATA_DIR = '/content/drive/MyDrive/HAM10000/processed_tfrecords/metadata'
OUTPUT_DIR = '/content/drive/MyDrive/HAM10000/pre_trained_models'
os.makedirs(OUTPUT_DIR, exist_ok=True)

IMG_SIZE = 224
NUM_CLASSES = 7
BATCH_SIZE = 32
SEED = 42

MODEL_CONFIGS = {
    'ResNet152V2': {
        'base_trainable': True,
        'dense1_units': 1024,
        'dense2_units': 512,
        'use_alpha_dropout': False,
        'learning_rate': 1e-4,
        'epochs': 30,
        'paper_accuracy': 89.95,
        'paper_trainable_params': 60817543
    },
    'EfficientNetV2B0': {
        'base_trainable': True,
        'dense1_units': 1024,
        'dense2_units': 256,
        'use_alpha_dropout': False,
        'learning_rate': 1e-3,
        'epochs': 30,
        'paper_accuracy': 86.74,
        'paper_trainable_params': 7437207
    },
    'InceptionResNetV2': {
        'base_trainable': True,
        'dense1_units': 1024,
        'dense2_units': 256,
        'use_alpha_dropout': False,
        'learning_rate': 1e-4,
        'epochs': 30,
        'paper_accuracy': 91.98,
        'paper_trainable_params': 1840647
    },
    'MobileNetV3Large': {
        'base_trainable': True,
        'dense1_units': 1024,
        'dense2_units': 512,
        'use_alpha_dropout': True,
        'learning_rate': 1e-3,
        'epochs': 30,
        'paper_accuracy': 88.78,
        'paper_trainable_params': 1515527
    }
}

IDX_TO_LABEL = {0: 'akiec', 1: 'bcc', 2: 'bkl', 3: 'df', 4: 'mel', 5: 'nv', 6: 'vasc'}

tf.random.set_seed(SEED)
np.random.seed(SEED)

In [4]:
def get_tfrecord_files(tfrecord_dir, split):
    pattern = os.path.join(tfrecord_dir, f'{split}-*.tfrecord')
    return sorted(tf.io.gfile.glob(pattern))

def parse_tfrecord(example_proto):
    feature_desc = {
        'image_id': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
        'image_bytes': tf.io.FixedLenFeature([], tf.string),
        'dx': tf.io.FixedLenFeature([], tf.string),
    }
    return tf.io.parse_single_example(example_proto, feature_desc)

train_files = get_tfrecord_files(TFRECORD_DIR, 'train')
test_files = get_tfrecord_files(TFRECORD_DIR, 'test')
print(f'Train files: {len(train_files)}, Test files: {len(test_files)}')

Train files: 36, Test files: 4


In [5]:
def count_distribution(files):
    dataset = tf.data.TFRecordDataset(files).map(parse_tfrecord)
    counts = Counter()
    for ex in dataset:
        counts[ex['label'].numpy()] += 1
    return dict(counts)

print('Counting samples...')
train_dist = count_distribution(train_files)
TRAIN_SAMPLES = sum(train_dist.values())
TEST_SAMPLES = sum(count_distribution(test_files).values())
print(f'Train: {TRAIN_SAMPLES}, Test: {TEST_SAMPLES}')

CLASS_WEIGHTS = {k: TRAIN_SAMPLES / (NUM_CLASSES * v) for k, v in train_dist.items()}
print('Class weights:')
for idx in sorted(CLASS_WEIGHTS.keys()):
    print(f'  {IDX_TO_LABEL[idx]}: {CLASS_WEIGHTS[idx]:.3f}')

Counting samples...
Train: 9013, Test: 1002
Class weights:
  akiec: 4.379
  bcc: 2.781
  bkl: 1.302
  df: 12.501
  mel: 1.285
  nv: 0.213
  vasc: 10.059


In [6]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(factor=90/360, fill_mode="reflect"),
    tf.keras.layers.RandomZoom(height_factor=(-0.1, 0.1),
                               width_factor=(-0.1, 0.1),
                               fill_mode="reflect"),
], name="paper_augmentation")

def augment(image, label):
    # image este în [0,1] la tine => perfect pentru layers
    image = data_augmentation(image, training=True)
    return image, label

In [7]:
def get_preprocess_fn(model_name):
    if model_name == 'ResNet152V2':
        return tf.keras.applications.resnet_v2.preprocess_input
    elif model_name == 'EfficientNetV2B0':
        return lambda x: (x / 127.5) - 1.0
    elif model_name == 'InceptionResNetV2':
        return tf.keras.applications.inception_resnet_v2.preprocess_input
    elif model_name == 'MobileNetV3Large':
        return lambda x: (x / 127.5) - 1.0
    return lambda x: x

print('Preprocessing functions defined')

Preprocessing functions defined


In [8]:
def create_train_ds(files, batch_size, preprocess_fn):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=tf.data.AUTOTUNE)
    ds = ds.map(parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE)

    def decode(ex):
        img = tf.io.decode_jpeg(ex['image_bytes'], channels=3)
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        img = tf.cast(img, tf.float32) / 255.0
        return img, ex['label']

    ds = ds.map(decode, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.repeat()
    ds = ds.shuffle(2000,seed = SEED, reshuffle_each_iteration=True)
    ds = ds.map(augment, num_parallel_calls=tf.data.AUTOTUNE)

    def apply_preprocess(img, label):
        return preprocess_fn(img * 255.0), label

    ds = ds.map(apply_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size, drop_remainder=True)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

def create_test_ds(files, batch_size, preprocess_fn):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=tf.data.AUTOTUNE)
    ds = ds.map(parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE)

    def decode(ex):
        img = tf.io.decode_jpeg(ex['image_bytes'], channels=3)
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        img = tf.cast(img, tf.float32) / 255.0
        return img, ex['label']

    ds = ds.map(decode, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.cache()

    def apply_preprocess(img, label):
        return preprocess_fn(img * 255.0), label

    ds = ds.map(apply_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size, drop_remainder=False)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    return ds

print('Dataset functions defined')

Dataset functions defined


In [9]:
def build_model(model_name, config):

    input_shape = (IMG_SIZE, IMG_SIZE, 3)
    # Get base model
    if model_name == 'ResNet152V2':
        base = tf.keras.applications.ResNet152V2( include_top=False, weights='imagenet', input_shape=input_shape )
    elif model_name == 'EfficientNetV2B0':
        base = tf.keras.applications.EfficientNetV2B0( include_top=False, weights='imagenet', input_shape=input_shape )
    elif model_name == 'InceptionResNetV2':
        base = tf.keras.applications.InceptionResNetV2( include_top=False, weights='imagenet', input_shape=input_shape )
    elif model_name == 'MobileNetV3Large':
        base = tf.keras.applications.MobileNetV3Large( include_top=False, weights='imagenet', input_shape=input_shape )

    base.trainable = config['base_trainable']

    # Build classification head - EXACT from paper
    inputs = tf.keras.Input(shape=input_shape)

    # For frozen models, use training=False
    if config['base_trainable']:
        x = base(inputs, training=True)
    else:
        x = base(inputs, training=False)

    # Global Average Pooling
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    # Dense 1
    if config['use_alpha_dropout']:
        x = tf.keras.layers.Dense(config['dense1_units'], activation='selu')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.AlphaDropout(0.1)(x)
    else:
        x = tf.keras.layers.Dense(config['dense1_units'], activation='relu')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.3)(x)

    # Dense 2
    if config['use_alpha_dropout']:
        x = tf.keras.layers.Dense(config['dense2_units'], activation='selu')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.AlphaDropout(0.1)(x)
    else:
        x = tf.keras.layers.Dense(config['dense2_units'], activation='relu')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.3)(x)

    # Output
    outputs = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs, name=model_name)

    # Print summary
    total_params = model.count_params()
    trainable_params = sum(tf.keras.backend.count_params(w) for w in model.trainable_weights)

    status = 'UNFROZEN' if config['base_trainable'] else 'FROZEN'
    print(f'\n{model_name} ({status}):')
    print(f'  Architecture: GAP -> Dense({config["dense1_units"]}) -> Dense({config["dense2_units"]}) -> Dense(7)')
    print(f'  Total params: {total_params:,}')
    print(f'  Trainable params: {trainable_params:,}')
    print(f'  Paper trainable: {config["paper_trainable_params"]:,}')
    print(f'  Match: {"YES" if abs(trainable_params - config["paper_trainable_params"]) < 100000 else "NO"}')

    return model

print('Model builder defined')

Model builder defined


In [10]:
def get_callbacks(model_name):
    return [
        tf.keras.callbacks.ModelCheckpoint(
            os.path.join(OUTPUT_DIR, f'{model_name}_best.keras'),
            monitor='val_accuracy', mode='max', save_best_only=True, verbose=1
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy', mode='max', patience=10,
            restore_best_weights=True, verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1
        )
    ]

print('Callbacks defined')

Callbacks defined


In [11]:
def evaluate_model(model, test_ds, model_name):
    print(f'Evaluating {model_name}...')

    y_true, y_pred, y_proba = [], [], []
    for imgs, labels in test_ds:
        preds = model.predict(imgs, verbose=0)
        y_proba.extend(preds)
        y_pred.extend(np.argmax(preds, axis=1))
        y_true.extend(labels.numpy())

    y_true, y_pred, y_proba = np.array(y_true), np.array(y_pred), np.array(y_proba)

    acc = np.mean(y_true == y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted', zero_division=0)
    try:
        auc = roc_auc_score(y_true, y_proba, multi_class='ovr', average='weighted')
    except:
        auc = 0.0

    print(f'Accuracy: {acc*100:.2f}%')
    print(f'Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}, AUC: {auc:.4f}')

    return {
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1,
        'auc': auc,
        'cm': confusion_matrix(y_true, y_pred)
    }

def plot_results(history, cm, model_name, config):
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    # Accuracy
    axes[0].plot(history.history['accuracy'], label='Train')
    axes[0].plot(history.history['val_accuracy'], label='Val')
    axes[0].axhline(y=config['paper_accuracy']/100, color='r', linestyle='--', label=f'Paper ({config["paper_accuracy"]}%)')
    axes[0].set_title(f'{model_name} - Accuracy')
    axes[0].legend()
    axes[0].grid(True)

    # Loss
    axes[1].plot(history.history['loss'], label='Train')
    axes[1].plot(history.history['val_loss'], label='Val')
    axes[1].set_title(f'{model_name} - Loss')
    axes[1].legend()
    axes[1].grid(True)

    # Confusion Matrix
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[2],
                xticklabels=[IDX_TO_LABEL[i] for i in range(NUM_CLASSES)],
                yticklabels=[IDX_TO_LABEL[i] for i in range(NUM_CLASSES)])
    axes[2].set_title(f'{model_name} - Confusion Matrix')

    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, f'{model_name}_results.png'), dpi=150)
    plt.show()

print('Evaluation functions defined')

Evaluation functions defined


In [12]:
def train_model(model_name):
    config = MODEL_CONFIGS[model_name]

    print(f'\n{"#"*70}')
    print(f'# TRAINING: {model_name}')
    print(f'# Base: {"UNFROZEN" if config["base_trainable"] else "FROZEN"}')
    print(f'# Architecture: Dense({config["dense1_units"]}) -> Dense({config["dense2_units"]})')
    print(f'# Learning Rate: {config["learning_rate"]}')
    print(f'# Paper Accuracy: {config["paper_accuracy"]}%')
    print(f'{"#"*70}')

    start = time.time()

    # Create datasets
    preprocess_fn = get_preprocess_fn(model_name)
    train_ds = create_train_ds(train_files, BATCH_SIZE, preprocess_fn)
    test_ds = create_test_ds(test_files, BATCH_SIZE, preprocess_fn)

    steps = TRAIN_SAMPLES // BATCH_SIZE
    print(f'Steps per epoch: {steps}')

    # Build model
    model = build_model(model_name, config)

    # Compile
    print(f'\nCompiling with LR={config["learning_rate"]}')
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=config['learning_rate']),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    # Train
    history = model.fit(
        train_ds,
        epochs=config['epochs'],
        steps_per_epoch=steps,
        validation_data=test_ds,
        class_weight=CLASS_WEIGHTS,
        callbacks=get_callbacks(model_name),
        verbose=1
    )

    # Evaluate
    results = evaluate_model(model, test_ds, model_name)

    # Plot
    plot_results(history, results['cm'], model_name, config)

    # Save
    model.save(os.path.join(OUTPUT_DIR, f'{model_name}_final.keras'))

    elapsed = (time.time() - start) / 60
    print(f'\nTime: {elapsed:.1f} min')
    print(f'Our accuracy: {results["accuracy"]*100:.2f}% vs Paper: {config["paper_accuracy"]}%')

    results['time'] = elapsed
    results['history'] = history.history
    results['paper_accuracy'] = config['paper_accuracy']

    tf.keras.backend.clear_session()
    return results

print('Training function defined')

Training function defined


In [13]:
# Test each model's preprocessing
for model_name in ['ResNet152V2', 'EfficientNetV2B0', 'InceptionResNetV2', 'MobileNetV3Large']:
    preprocess_fn = get_preprocess_fn(model_name)
    test_ds = create_test_ds(test_files, 4, preprocess_fn)

    for images, labels in test_ds.take(1):
        min_val = tf.reduce_min(images).numpy()
        max_val = tf.reduce_max(images).numpy()
        print(f"{model_name:20s}: range = [{min_val:.2f}, {max_val:.2f}]")

ResNet152V2         : range = [-1.00, 0.96]
EfficientNetV2B0    : range = [-1.00, 0.96]
InceptionResNetV2   : range = [-1.00, 0.96]
MobileNetV3Large    : range = [-1.00, 0.96]


In [None]:
resnet_results = train_model('ResNet152V2')

In [None]:
efficient_results = train_model('EfficientNetV2B0')

In [None]:
tf.keras.backend.clear_session()
inception_results = train_model('InceptionResNetV2')

In [None]:
tf.keras.backend.clear_session()
mobile_results = train_model('MobileNetV3Large')

In [None]:
# ============================================
# DEBUG CELL - Rulează asta pentru diagnostic
# ============================================

print("=" * 60)
print("DIAGNOSTIC COMPLET")
print("=" * 60)

# 1. Verifică funcția curentă
import inspect
print("\n1. FUNCȚIA CREATE_TRAIN_DS (primele 20 linii):")
source = inspect.getsource(create_train_ds)
for i, line in enumerate(source.split('\n')[:20]):
    print(f"  {i+1}: {line}")

# 2. Verifică dacă .repeat() e în funcție
print(f"\n2. '.repeat()' în funcție: {'DA ✅' if '.repeat()' in source else 'NU ❌'}")

# 3. Test practic - verifică dacă dataset-ul se repetă
print("\n3. TEST PRACTIC - Câte batch-uri pot lua?")
test_preprocess = get_preprocess_fn('ResNet152V2')
test_ds = create_train_ds(train_files, BATCH_SIZE, test_preprocess)

batch_count = 0
for batch in test_ds.take(400):  # 400 batches × 32 = 12,800 > 9,013 samples
    batch_count += 1
    if batch_count % 100 == 0:
        print(f"  ... {batch_count} batches")

print(f"\n  Total batches obținute: {batch_count}")
expected_without_repeat = TRAIN_SAMPLES // BATCH_SIZE  # ~281
print(f"  Fără .repeat() ar fi: ~{expected_without_repeat}")
print(f"  Cu .repeat() ar fi: 400")

if batch_count >= 400:
    print("  ✅ Dataset-ul SE REPETĂ corect!")
else:
    print("  ❌ Dataset-ul NU se repetă! Problema e aici!")

# 4. Verifică dimensiunile unui batch
print("\n4. VERIFICARE BATCH:")
for images, labels in test_ds.take(1):
    print(f"  Image shape: {images.shape}")
    print(f"  Labels shape: {labels.shape}")
    print(f"  Image range: [{tf.reduce_min(images).numpy():.3f}, {tf.reduce_max(images).numpy():.3f}]")
    print(f"  Unique labels in batch: {np.unique(labels.numpy())}")

# 5. Verifică class weights
print("\n5. CLASS WEIGHTS:")
for idx in sorted(CLASS_WEIGHTS.keys()):
    print(f"  {IDX_TO_LABEL[idx]}: {CLASS_WEIGHTS[idx]:.3f}")

print("\n" + "=" * 60)
print("DIAGNOSTIC COMPLET")
print("=" * 60)

In [None]:
train_ds = create_train_ds(train_files, BATCH_SIZE, preprocess_fn)
print("Cardinality:", train_ds.cardinality().numpy())
