In [None]:
from google.colab import drive
import os
import zipfile
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

if not os.path.ismount('/content/drive'):
    drive.mount('/content/drive')
else:
    print("Google Drive is already mounted.")

zip_path = '/content/drive/MyDrive/Attendance_Checkpoints/Copy of Attendance_Checkpoints.zip'
extract_dir = '/content/Attendance_Dataset'
checkpoint_dir = '/content/drive/MyDrive/Attendance_Checkpoints'
os.makedirs(extract_dir, exist_ok=True)
os.makedirs(checkpoint_dir, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
    print("Dataset extracted successfully.")

base_dir = extract_dir
train_dir = os.path.join(base_dir, 'train')
augmented_train_dir = os.path.join(base_dir, 'Augmented_Train')
val_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

if not all(os.path.exists(d) for d in [train_dir, val_dir, test_dir]):
    raise ValueError("One or more dataset directories are missing after extraction. Please check the zip content.")

os.makedirs(augmented_train_dir, exist_ok=True)

IMG_SIZE = (224, 224)
BATCH_SIZE = 32
NUM_CLASSES = 5  
EPOCHS = 50
CONFIDENCE_THRESHOLD = 0.7
AUGMENTATIONS_PER_IMAGE = 5 
team = ['omar', 'ayat', 'mohammed', 'rana', 'unknown']

def custom_augmentation(image):
    noise = tf.random.normal(shape=tf.shape(image), mean=0.0, stddev=0.05)
    image = image + noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    if tf.random.uniform(()) > 0.5:
        cutout_size = tf.random.uniform((), 20, 60, dtype=tf.int32)
        h, w = IMG_SIZE
        y = tf.random.uniform((), 0, h, dtype=tf.int32)
        x = tf.random.uniform((), 0, w, dtype=tf.int32)
        y1 = tf.clip_by_value(y - cutout_size // 2, 0, h)
        y2 = tf.clip_by_value(y + cutout_size // 2, 0, h)
        x1 = tf.clip_by_value(x - cutout_size // 2, 0, w)
        x2 = tf.clip_by_value(x + cutout_size // 2, 0, w)
        image = tf.image.crop_to_bounding_box(image, y1, x1, y2 - y1, x2 - x1)
        image = tf.image.pad_to_bounding_box(image, y1, x1, h, w)
    
    return image

augment_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=45,
    width_shift_range=0.4,
    height_shift_range=0.4,
    shear_range=0.4,
    zoom_range=[0.6, 1.4],
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.6, 1.4],
    fill_mode='nearest',
    preprocessing_function=custom_augmentation
)

def generate_augmented_images(input_dir, output_dir, classes, augmentations_per_image):
    for class_name in classes:
        class_input_dir = os.path.join(input_dir, class_name)
        class_output_dir = os.path.join(output_dir, class_name)
        os.makedirs(class_output_dir, exist_ok=True)

        for img_name in os.listdir(class_input_dir):
            img_path = os.path.join(class_input_dir, img_name)
            try:
                img = load_img(img_path, target_size=IMG_SIZE)
                img_array = img_to_array(img)
                img_array = img_array / 255.0  
                img_array = np.expand_dims(img_array, axis=0)

                for i in range(augmentations_per_image):
                    aug_iter = augment_datagen.flow(img_array, batch_size=1)
                    aug_img = next(aug_iter)[0]
                    aug_img = (aug_img * 255).astype(np.uint8) 
                    aug_img_path = os.path.join(class_output_dir, f"aug_{i}_{img_name}")
                    tf.keras.preprocessing.image.save_img(aug_img_path, aug_img)
                    print(f"Saved augmented image: {aug_img_path}")
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

if not os.path.exists(augmented_train_dir) or len(os.listdir(augmented_train_dir)) == 0:
    print("Generating augmented images...")
    generate_augmented_images(train_dir, augmented_train_dir, team, AUGMENTATIONS_PER_IMAGE)
else:
    print("Augmented images already exist.")

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.4,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    augmented_train_dir,  
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    classes=team,
    shuffle=True
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    classes=team,
    shuffle=False
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    classes=team,
    shuffle=False
)

real_labels = train_generator.classes
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(real_labels), y=real_labels)
class_weight_dict = dict(enumerate(class_weights))
print("Class weights:", class_weight_dict)

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu', kernel_regularizer=l2(0.01))(x)
x = Dropout(0.6)(x)
predictions = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer=Adam(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
print("Model Summary:")
model.summary()

checkpoint = ModelCheckpoint(
    os.path.join(checkpoint_dir, 'attendance_checkpoint_epoch_{epoch:02d}.h5'),
    save_weights_only=False,
    save_best_only=False,
    monitor='val_accuracy',
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=2,
    min_lr=1e-6,
    verbose=1
)
steps_per_epoch = train_generator.samples // BATCH_SIZE
validation_steps = val_generator.samples // BATCH_SIZE

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    class_weight=class_weight_dict,
    callbacks=[checkpoint, early_stopping, reduce_lr]
)

model.trainable = True
for layer in model.layers[:120]:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=5e-5), loss='categorical_crossentropy', metrics=['accuracy'])

history_fine = model.fit(
    train_generator,
    epochs=EPOCHS,
    initial_epoch=history.epoch[-1] + 1,
    validation_data=val_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    class_weight=class_weight_dict,
    callbacks=[checkpoint, early_stopping, reduce_lr]
)

test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")

test_generator.reset()
y_true = []
y_pred = []
for i in range(len(test_generator)):
    images, labels = next(test_generator)
    predictions = model.predict(images)
    for pred in predictions:
        if np.max(pred) < CONFIDENCE_THRESHOLD:
            y_pred.append(4)  
        else:
            y_pred.append(np.argmax(pred))
    y_true.extend(np.argmax(labels, axis=1))

class_names = team
print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

final_model_path = os.path.join(checkpoint_dir, 'attendance_mobilenetv2_final.h5')
model.save(final_model_path)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'] + history_fine.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'] + history_fine.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'] + history_fine.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'] + history_fine.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.savefig(os.path.join(checkpoint_dir, 'training_history.png'))