In [3]:
import pandas as pd
import numpy as np
import os
import re
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import Image
import matplotlib.pyplot as plt
import zipfile

# dataset
print("Available in /kaggle/input/:")
for item in os.listdir('/kaggle/input/skin-patches-dataset3/'):
    print(item)

# Find CSV
def find_csv_path(partial_name='data2_FSD_patches.csv'):
    partial_lower = partial_name.lower()
    for root, dirs, files in os.walk('/kaggle/input/skin-patches-dataset3/'):
        for file in files:
            if partial_lower in file.lower() and file.endswith('.csv'):
                full_path = os.path.join(root, file)
                print(f"\nFound CSV: {full_path}")
                return full_path
    raise FileNotFoundError("CSV not found")

csv_path = find_csv_path()
df = pd.read_csv(csv_path)

print(f"\nLoaded {df.shape[0]} rows")
print("Columns:", df.columns.tolist())


df['image_patch'] = df['image_patch'].str.replace('\\', '/')
dataset_folder = os.path.basename(os.path.dirname(csv_path))
base_input = '/kaggle/input/skin-patches-dataset3/' + dataset_folder
sample_path = df['image_patch'].iloc[0]
if not sample_path.startswith('/kaggle'):
    df['image_patch'] = base_input + '/' + df['image_patch'].str.lstrip('/')

# Labels
if 'label' in df.columns and df['label'].dtype == 'object':
    df['label'] = df['label'].map({'no_skin': 0, 'non_skin': 0, 'skin': 1, 'Skin': 1, 'NoSkin': 0}).fillna(0).astype(int)

y = df['label'].values

# Image Loader 
def load_images(paths):
    images = []
    missing = 0
    for path in paths:
        if os.path.exists(path):
            img = load_img(path, target_size=(16,16))
            img_raw = img_to_array(img)
            images.append(img_raw / 255.0)
        else:
            missing += 1
            images.append(np.zeros((16,16,3)))
    print(f"Loaded {len(images)} patches, {missing} missing")
    return np.array(images)

X = load_images(df['image_patch'].values)

# Model 
model = Sequential([
    Input(shape=(16, 16, 3)),
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same', name='last_conv'),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation=None, name='logit')  # NO SIGMOID - raw logit output
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),  # from_logits=True
    metrics=['accuracy']
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

test_acc = model.evaluate(X_test, y_test)[1]
print(f"\nTest Accuracy: {test_acc:.4f}")

# Deep Learning Feature Computation
logits = model.predict(X).flatten()
pred_probs = tf.sigmoid(logits).numpy()
predicted_labels = (pred_probs > 0.5).astype(int)

# Feature 1: ann_logit (raw model output before sigmoid)
ann_logit = logits

# Feature 2: ann_conf (absolute value of logit = model confidence)
ann_conf = np.abs(logits)

# Feature 3: ann_energy (L2 norm of conv activation energy)
# Create a temporary model to extract conv features
conv_output_layer = model.get_layer('last_conv')
conv_model = Model(inputs=model.layers[0].input, outputs=conv_output_layer.output)
conv_maps = conv_model.predict(X)           # (N, H, W, 128)
gap = conv_maps.mean(axis=(1, 2))           # Global Average Pooling -> (N, 128)
ann_energy = np.linalg.norm(gap, axis=1)    # L2 norm -> (N,)

pred_df = pd.read_csv(csv_path)  
pred_df['label_ann'] = predicted_labels
pred_df['ann_logit'] = ann_logit
pred_df['ann_conf'] = ann_conf
pred_df['ann_energy'] = ann_energy

pred_csv_path = '/kaggle/working/predictions_with_ann.csv'
pred_df.to_csv(pred_csv_path, index=False)
print(f"\nPredictions CSV saved with deep learning features to {pred_csv_path}")

# Reconstruction Loop
recon_dir = '/kaggle/working/reconstructions'
os.makedirs(recon_dir, exist_ok=True)

recon_df = pd.read_csv(pred_csv_path)
recon_df['load_path'] = recon_df['image_patch'].str.replace('\\', '/')
if not recon_df['load_path'].iloc[0].startswith('/kaggle'):
    recon_df['load_path'] = base_input + '/' + recon_df['load_path'].str.lstrip('/')

print("\nCreating overlaid segmentation visualizations...")

for image_id in recon_df['image_id'].unique():
    subset = recon_df[recon_df['image_id'] == image_id].reset_index(drop=True)
    n_patches = len(subset)
    if n_patches == 0: continue
    
    cols = int(np.ceil(np.sqrt(n_patches)))
    rows = int(np.ceil(n_patches / cols))
    patch_size = 16
    canvas = np.zeros((rows * patch_size, cols * patch_size, 3), dtype=np.float32)
    green_color = np.array([0, 255, 0], dtype=np.float32)
    alpha = 0.4
    
    current_pos = 0
    for r in range(rows):
        for c in range(cols):
            if current_pos >= n_patches: break
            row_data = subset.iloc[current_pos]
            patch_path = row_data['load_path']
            label_ann = row_data['label_ann']
            
            if os.path.exists(patch_path):
                patch_img = img_to_array(load_img(patch_path))
            else:
                patch_img = np.zeros((16, 16, 3))
            
            if patch_img.max() <= 1.0: patch_img *= 255
            patch_img = patch_img.astype(np.float32)
            y_start, x_start = r * patch_size, c * patch_size
            canvas[y_start:y_start+patch_size, x_start:x_start+patch_size] = patch_img
            
            if label_ann == 1:
                region = canvas[y_start:y_start+patch_size, x_start:x_start+patch_size]
                blended = (alpha * green_color) + ((1 - alpha) * region)
                canvas[y_start:y_start+patch_size, x_start:x_start+patch_size] = blended
            current_pos += 1

    canvas = np.clip(canvas, 0, 255).astype(np.uint8)
    fig = plt.figure(figsize=(cols * 1.5, rows * 1.5))
    plt.imshow(canvas)
    plt.axis('off')
    plt.tight_layout()
    output_path = os.path.join(recon_dir, f'{image_id}_segmentation_overlay.png')
    plt.savefig(output_path, dpi=200, bbox_inches='tight', pad_inches=0.1)
    plt.close()

zip_path = '/kaggle/working/reconstructions.zip'
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(recon_dir):
        for file in files:
            zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), recon_dir))

print(f"\nReconstructions zipped to {zip_path}")

Available in /kaggle/input/:
data2_FSD

Found CSV: /kaggle/input/skin-patches-dataset3/data2_FSD/data2_FSD_patches.csv

Loaded 28160 rows
Columns: ['image_id', 'image_patch', 'mask_patch', 'label']
Loaded 28160 patches, 0 missing
Epoch 1/20
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 16ms/step - accuracy: 0.7731 - loss: 0.4588 - val_accuracy: 0.8260 - val_loss: 0.4508
Epoch 2/20
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - accuracy: 0.8371 - loss: 0.3568 - val_accuracy: 0.8784 - val_loss: 0.3037
Epoch 3/20
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - accuracy: 0.8830 - loss: 0.2734 - val_accuracy: 0.8642 - val_loss: 0.2866
Epoch 4/20
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 15ms/step - accuracy: 0.8718 - loss: 0.2845 - val_accuracy: 0.8804 - val_loss: 0.2635
Epoch 5/20
[1m564/564[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - accuracy: 0.8864 - loss: 0.2