In [None]:
# 08_extract_image_embeddings.ipynb
# --------------------------------------------------
import os
os.chdir(r"C:\Users\Negar\Desktop\paper_results\Myself\cr_coad_project")

import tensorflow as tf
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm

# --------------------------------------------------
# CONFIG
MODEL_DIR = Path("results/images/baseline_image_model_clean_tf")
INDEX = Path("data/processed/images/rep_index.csv")
OUTPUT_DIR = Path("data/processed/images")
IMG_SIZE = (224, 224)
BATCH_SIZE = 16
# --------------------------------------------------

print("TensorFlow:", tf.__version__)
print("✅ Loading model from:", MODEL_DIR)

# Load SavedModel
loaded = tf.saved_model.load(str(MODEL_DIR))
print("✅ Model loaded successfully as a TensorFlow SavedModel.")

# Get callable inference function
infer = loaded.signatures["serving_default"]
input_name = list(infer.structured_input_signature[1].keys())[0]
print(f"✅ Found input key: {input_name}")

# --------------------------------------------------
# IMAGE PIPELINE
def decode_img(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    return img

def make_dataset(paths):
    ds = tf.data.Dataset.from_tensor_slices(paths)
    ds = ds.map(lambda p: decode_img(p), num_parallel_calls=tf.data.AUTOTUNE)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# --------------------------------------------------
# Load image index
df = pd.read_csv(INDEX)
print("Total slices:", len(df))

paths = df["rep_slice"].astype(str).tolist()
ids = df["series_id"].tolist()
ds = make_dataset(paths)

# --------------------------------------------------
# Generate embeddings using the model’s inference function
# This will output the same tensor as the last Dense layer (sigmoid)
# We'll take the activations from before sigmoid as features.

embeddings = []
for batch in tqdm(ds, desc="Extracting embeddings"):
    # Prepare input dict for the SavedModel
    out = infer(tf.constant(batch))
    # Extract first output tensor
    feats = list(out.values())[0].numpy()
    embeddings.append(feats)

embeddings = np.vstack(embeddings)
print("✅ Embeddings shape:", embeddings.shape)

# --------------------------------------------------
# SAVE EMBEDDINGS
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
embed_df = pd.DataFrame(embeddings)
embed_df.insert(0, "series_id", ids)

embed_path = OUTPUT_DIR / "image_embeddings.csv"
embed_df.to_csv(embed_path, index=False)
print(f"✅ Saved embeddings to: {embed_path}")


TensorFlow: 2.12.0
✅ Loading model from: results\images\baseline_image_model_clean_tf
✅ Model loaded successfully as a TensorFlow SavedModel.
✅ Found input key: efficientnetb0_input
Total slices: 23


Extracting embeddings: 100%|██████████| 2/2 [00:07<00:00,  3.51s/it]

✅ Embeddings shape: (23, 1)
✅ Saved embeddings to: data\processed\images\image_embeddings.csv



