In [None]:
print("Hello")

In [None]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, Input, GlobalAveragePooling2D, BatchNormalization, Dropout, Layer
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt

# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# Path to your Data
BASE_PATH = r'/kaggle/input/111111111111100000000000/alive'
SUB_FOLDERS = os.listdir(BASE_PATH)

# Image preprocessing
def preprocess_image(img_path):
    img = load_img(img_path, target_size=(299, 299))
    img_array = img_to_array(img) / 255.0
    return img_array

# Create triplets
def create_triplets(sub_folders, base_path):
    triplets = []
    for anchor_folder in sub_folders:
        anchor_images = os.listdir(os.path.join(base_path, anchor_folder))
        for anchor_img in anchor_images:
            anchor_img_path = os.path.join(base_path, anchor_folder, anchor_img)
            
            # Choose a random negative image from a different sub-folder
            negative_folder = random.choice(sub_folders)
            while negative_folder == anchor_folder:
                negative_folder = random.choice(sub_folders)
            negative_images = os.listdir(os.path.join(base_path, negative_folder))
            negative_img = random.choice(negative_images)
            negative_img_path = os.path.join(base_path, negative_folder, negative_img)
            
            # Choose a random positive image from the same anchor folder
            positive_img = random.choice(anchor_images)
            while positive_img == anchor_img:
                positive_img = random.choice(anchor_images)
            positive_img_path = os.path.join(base_path, anchor_folder, positive_img)
            
            triplets.append([anchor_img_path, positive_img_path, negative_img_path])
    
    print(f"Created {len(triplets)} triplets")
    return np.array(triplets)

# Load images on the fly
def load_image(img_path):
    return preprocess_image(img_path)

# Create tf.data.Dataset
def triplet_generator(triplets):
    for triplet in triplets:
        anchor_img = load_image(triplet[0])
        positive_img = load_image(triplet[1])
        negative_img = load_image(triplet[2])
        yield ({
            'anchor': anchor_img,
            'positive': positive_img,
            'negative': negative_img
        }, [0.0])  # Dummy label

batch_size = 8  # Batch size

# Embedding Model with InceptionV3
def make_embedding():
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
    base_model.trainable = False  # Freeze base model

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.4),
        Dense(128, activation='tanh', kernel_regularizer=l2(0.01)),
        BatchNormalization(),
        Dropout(0.3)
    ])
    return model

# Custom layer for concatenation and distance calculation
class TripletConcatenateLayer(Layer):
    def call(self, inputs):
        anchor, positive, negative = inputs
        return tf.concat([anchor, positive, negative], axis=1)

# Triplet Loss function
def triplet_loss(margin=0.35):
    def loss(y_true, y_pred):
        anchor, positive, negative = tf.split(y_pred, num_or_size_splits=3, axis=1)
        pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=1)
        neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=1)
        basic_loss = pos_dist - neg_dist + margin
        loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0))
        return loss
    return loss

EPOCHS = 5

# Function to ensure directory exists
def ensure_dir_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

# Main training loop
# choose number of folders to process at a time in steps_per_run
total_folders = len(SUB_FOLDERS)
steps_per_run = 100

# Create main model directory and subdirectories
# Checkpoint, save your model here
model_dir = r'/kaggle/working/'
ensure_dir_exists(model_dir)
embedding_model_dir = os.path.join(model_dir, 'embedding')
siamese_model_dir = os.path.join(model_dir, 'siamese')
keras_model_dir = os.path.join(model_dir, 'keras')
ensure_dir_exists(embedding_model_dir)
ensure_dir_exists(siamese_model_dir)
ensure_dir_exists(keras_model_dir)

# Load or create models
def load_or_create_model(batch_index):
    if batch_index == 0:
        embedding = make_embedding()
        input_anchor = Input(shape=(299, 299, 3), name='anchor')
        input_positive = Input(shape=(299, 299, 3), name='positive')
        input_negative = Input(shape=(299, 299, 3), name='negative')
        
        embedding_anchor = embedding(input_anchor)
        embedding_positive = embedding(input_positive)
        embedding_negative = embedding(input_negative)
        
        concatenated_embeddings = TripletConcatenateLayer()([embedding_anchor, embedding_positive, embedding_negative])
        
        siamese_model = Model(inputs=[input_anchor, input_positive, input_negative], outputs=concatenated_embeddings)
        siamese_model.compile(optimizer=SGD(learning_rate=0.0005, momentum=0.9), loss=triplet_loss())
    else:
        siamese_model = load_model(
            os.path.join(siamese_model_dir, f'siamese_model_{batch_index}.h5'), 
            custom_objects={'loss': triplet_loss(), 'TripletConcatenateLayer': TripletConcatenateLayer}
        )
        embedding = siamese_model.layers[3]  # Assuming the embedding model is the third layer in the siamese model

    return embedding, siamese_model

# Store history of each batch
combined_history = {
    'loss': [],
    'val_loss': [],
    'lr': []
}

In [None]:
for i in range(0, total_folders, steps_per_run):
    current_folders = SUB_FOLDERS[i:i + steps_per_run]
    triplets = create_triplets(current_folders, BASE_PATH)
    
    triplet_dataset = tf.data.Dataset.from_generator(
        lambda: triplet_generator(triplets), 
        output_signature=(
            {
                'anchor': tf.TensorSpec(shape=(299, 299, 3), dtype=tf.float32),
                'positive': tf.TensorSpec(shape=(299, 299, 3), dtype=tf.float32),
                'negative': tf.TensorSpec(shape=(299, 299, 3), dtype=tf.float32),
            },
            tf.TensorSpec(shape=(1,), dtype=tf.float32)  # Dummy label
        )
    )

    triplet_dataset = triplet_dataset.shuffle(buffer_size=512).batch(batch_size).repeat().prefetch(tf.data.experimental.AUTOTUNE)
    total_size = len(triplets)
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    steps_per_epoch = train_size // batch_size 
    validation_steps = test_size // batch_size

    train_dataset = triplet_dataset.take(train_size)
    test_dataset = triplet_dataset.skip(train_size).take(test_size)

    embedding, siamese_model = load_or_create_model(i // steps_per_run)

    # Callbacks
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
    embedding_checkpoint = ModelCheckpoint(
        os.path.join(embedding_model_dir, f'{i//steps_per_run + 1}_embedding_model.keras'),
        monitor='val_loss',
        save_best_only=True,
        verbose=1,
        save_weights_only=False
    )
    print("on my way")
    history = siamese_model.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=steps_per_epoch, 
                                validation_data=test_dataset, validation_steps=validation_steps, 
                                callbacks=[lr_scheduler, early_stopping, embedding_checkpoint])

    print(f"Training completed for batch {i//steps_per_run + 1}")

    model_path = os.path.join(keras_model_dir, f'siamese_model_{i//steps_per_run + 1}.keras')
    if os.path.exists(model_path):
        os.remove(model_path)

    siamese_model.save(model_path)
    print(f"Model {i//steps_per_run + 1} Saved")

    model_path_h5 = os.path.join(siamese_model_dir, f'siamese_model_{i//steps_per_run + 1}.keras')
    siamese_model.save(model_path_h5)
    print(f"Full Siamese model {i//steps_per_run + 1} saved as .keras file")

    embedding_model_path = os.path.join(embedding_model_dir, f'embedding_model_{i//steps_per_run + 1}.keras')
    embedding.save(embedding_model_path)
    print(f"Embedding model {i//steps_per_run + 1} saved as .keras file")

    # Combine history for plotting later
    for key in history.history.keys():
        combined_history[key] = combined_history.get(key, []) + history.history[key]

## save and plot fig

In [None]:
# Save combined history
np.save(os.path.join(model_dir, 'combined_history.npy'), combined_history)

# Plot combined history
plt.figure(figsize=(12, 6))
plt.plot(combined_history['loss'], label='Training Loss')
plt.plot(combined_history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss Over Batches')
plt.xlabel('Batch')
plt.ylabel('Loss')
plt.ylim(0, 800)  # Set y-axis limits for better visualization
plt.legend()
plt.show()

# View embedding of a image