In [4]:
!pip install fairlearn
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from fairlearn.metrics import MetricFrame
import cv2
import matplotlib.pyplot as plt
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load Data
train_df = pd.read_csv('/content/drive/My Drive/BTTKaggleCompetition/bttai-ajl-2025/train.csv')
test_df = pd.read_csv('/content/drive/My Drive/BTTKaggleCompetition/bttai-ajl-2025/test.csv')

# Preprocess file paths
train_df['file_path'] = '/content/drive/My Drive/BTTKaggleCompetition/bttai-ajl-2025/train/train/' + train_df['label'] + '/' + train_df['md5hash'] + '.jpg'
test_df['file_path'] = '/content/drive/My Drive/BTTKaggleCompetition/bttai-ajl-2025/test/test/' + test_df['md5hash'] + '.jpg'

# Create validation split
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['label'])

# Create label encoder
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
train_data['encoded_label'] = label_encoder.fit_transform(train_data['label'])
val_data['encoded_label'] = label_encoder.transform(val_data['label'])

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0
Mounted at /content/drive


In [None]:
# Create the ResNet50 Model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',  # Changed from categorical_crossentropy
    metrics=['accuracy']
)

# Compute Class Weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_data['encoded_label']),
    y=train_data['encoded_label']
)
class_weights_dict = dict(enumerate(class_weights))

# Create generators
batch_size = 32
train_generator = datagen.flow_from_dataframe(
    train_data,
    x_col='file_path',
    y_col='encoded_label',  # Using encoded labels
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='raw'  # Changed to raw for sparse categorical crossentropy
)

val_generator = datagen.flow_from_dataframe(
    val_data,
    x_col='file_path',
    y_col='encoded_label',  # Using encoded labels
    target_size=(224, 224),
    batch_size=batch_size,
    shuffle=False,
    class_mode='raw'  # Changed to raw for sparse categorical crossentropy
)

# Training callbacks
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=1e-6
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Found 1013 validated image filenames.




Found 253 validated image filenames.




In [None]:
# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs= 32,
    class_weight=class_weights_dict,
    callbacks=[reduce_lr, early_stopping]
)


  self._warn_if_super_not_called()


Epoch 1/32
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 10s/step - accuracy: 0.1747 - loss: 4.3509 - val_accuracy: 0.4704 - val_loss: 1.6521 - learning_rate: 0.0010
Epoch 2/32
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 8s/step - accuracy: 0.4241 - loss: 1.9835 - val_accuracy: 0.5494 - val_loss: 1.4837 - learning_rate: 0.0010
Epoch 3/32
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 8s/step - accuracy: 0.4993 - loss: 1.6606 - val_accuracy: 0.5692 - val_loss: 1.4143 - learning_rate: 0.0010
Epoch 4/32
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 9s/step - accuracy: 0.5735 - loss: 1.4181 - val_accuracy: 0.5613 - val_loss: 1.3464 - learning_rate: 0.0010
Epoch 5/32
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m293s[0m 9s/step - accuracy: 0.5969 - loss: 1.3195 - val_accuracy: 0.5494 - val_loss: 1.3608 - learning_rate: 0.0010
Epoch 6/32
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[

In [None]:
# Ensure labels match validation data
val_labels = val_data['encoded_label'].values

# Predict using the full validation set
steps = int(np.ceil(len(val_data) / val_generator.batch_size))  # Ensure all data is processed
val_preds = model.predict(val_generator, steps=steps, verbose=1)
val_preds = np.argmax(val_preds, axis=1)

# Ensure val_labels length matches val_preds
val_labels = val_labels[:len(val_preds)]

# Compute weighted F1-score
weighted_f1 = f1_score(val_labels, val_preds, average='weighted')
print(f'Weighted F1-Score: {weighted_f1:.4f}')


# Fairness Evaluation (if skin_tone column exists)
if 'skin_tone' in val_data.columns:
    skin_tone_labels = val_data['skin_tone'].values
    metric_frame = MetricFrame(
        metrics=f1_score,
        y_true=val_labels,
        y_pred=val_preds,
        sensitive_features=skin_tone_labels
    )
    print("\nF1-Score by Skin Tone:")
    print(metric_frame.by_group)

# Grad-CAM visualization function
def get_grad_cam(model, img_array, layer_name='conv5_block3_out'):
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, np.argmax(predictions[0])]

    grads = tape.gradient(loss, conv_outputs)[0]
    pooled_grads = np.mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]

    for i in range(pooled_grads.shape[-1]):
        conv_outputs[:, :, i] *= pooled_grads[i]

    heatmap = np.mean(conv_outputs, axis=-1)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    return heatmap


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 3s/step 
Weighted F1-Score: 0.0456




In [5]:
# Ensure datagen is defined before using it for test data
test_generator = datagen.flow_from_dataframe(
    test_df,
    x_col='file_path',
    target_size=(224, 224),
    batch_size=batch_size,
    shuffle=False,
    class_mode=None  # No labels for the test set
)

# Generate predictions
steps = int(np.ceil(len(test_df) / test_generator.batch_size))  # Ensure all samples are processed
test_preds = model.predict(test_generator, steps=steps, verbose=1)
test_labels = np.argmax(test_preds, axis=1)
test_df['label'] = label_encoder.inverse_transform(test_labels)

# Save Submission File
test_df[['md5hash', 'label']].to_csv('submission.csv', index=False)
print("submission.csv saved successfully!")


NameError: name 'batch_size' is not defined