In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install deepface

In [None]:
import os
import re
import cv2
import pandas as pd
import matplotlib.pyplot as plt

images_path = "/kaggle/input/fgnet-dataset/FGNET/images"

print("Total Images:", len(os.listdir(images_path)))

img = cv2.imread(os.path.join(images_path, os.listdir(images_path)[0]))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.title("Example FG-NET Image")
plt.show()

In [None]:
data = []
for img_name in os.listdir(images_path):
    if img_name.lower().endswith(".jpg"):  
        person_id = re.match(r"(\d{3})", img_name).group(1)
        age_match = re.search(r"A(\d+)", img_name, re.IGNORECASE)
        age = int(age_match.group(1)) if age_match else None
        data.append([img_name, person_id, age])

df_age = pd.DataFrame(data, columns=["image", "person_id", "age"])
df_age["path"] = df_age["image"].apply(lambda x: os.path.join(images_path, x))

print(df_age["person_id"].value_counts().head(20))


In [None]:
from sklearn.model_selection import train_test_split

df_age["age_bins"] = pd.cut(df_age["age"], bins=[-1,5,10,15,20,30,40,50,60,70], labels=False)
df_age = df_age.dropna(subset=["age_bins"])

train_df, val_df = train_test_split(
    df_age,
    test_size=0.2,
    random_state=42,
    stratify=df_age["age_bins"]
)

print("Train:", len(train_df), "Validation:", len(val_df))


In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam

base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers[:-50]:
    layer.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='linear')(x)

age_model = Model(inputs=base_model.input, outputs=output)
age_model.compile(optimizer=Adam(learning_rate=1e-4), loss="mse", metrics=["mae"])
age_model.summary()

In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 16

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_datagen.flow_from_dataframe(
    train_df, x_col="path", y_col="age",
    target_size=img_size, class_mode="raw", batch_size=batch_size
)

val_gen = val_datagen.flow_from_dataframe(
    val_df, x_col="path", y_col="age",
    target_size=img_size, class_mode="raw", batch_size=batch_size
)

history = age_model.fit(train_gen, validation_data=val_gen, epochs=30)


In [None]:
plt.plot(history.history["mae"], label="Train MAE")
plt.plot(history.history["val_mae"], label="Validation MAE")
plt.xlabel("Epoch")
plt.ylabel("MAE")
plt.legend()
plt.title("Age Prediction - Mean Absolute Error")
plt.show()


In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt
import cv2

def predict_age(img_path, show=True):

    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = preprocess_input(np.expand_dims(img_array, axis=0))

    pred_age = age_model.predict(img_array)[0][0]
    pred_age = round(pred_age, 1)
    
    if show:
        img_cv = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        plt.imshow(img_cv)
        plt.title(f"Predicted Age: {pred_age}")
        plt.axis("off")
        plt.show()
    
    return pred_age

img_test = val_df.iloc[0]["path"]
pred = predict_age(img_test)
print(f"Predicted Age: {pred}, Real Age: {val_df.iloc[0]['age']}")


In [None]:
import random

positive_pairs = []
grouped = df_age.groupby("person_id")

for pid, group in grouped:
    if len(group) > 1:
        imgs = group["path"].tolist()
        pairs = [(imgs[i], imgs[j]) for i in range(len(imgs)) for j in range(i+1, len(imgs))]
        positive_pairs.extend(pairs)

all_paths = df_age["path"].tolist()
negative_pairs = []
for _ in range(len(positive_pairs)):
    img1, img2 = random.sample(all_paths, 2)
    while df_age[df_age["path"] == img1]["person_id"].values[0] == df_age[df_age["path"] == img2]["person_id"].values[0]:
        img1, img2 = random.sample(all_paths, 2)
    negative_pairs.append((img1, img2))

print(f"Positive Pairs: {len(positive_pairs)}, Negative Pairs: {len(negative_pairs)}")


In [None]:
from deepface import DeepFace
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score

def get_distance(img1, img2, model="Facenet512"):
    result = DeepFace.verify(img1, img2, model_name=model, enforce_detection=False)
    return result["distance"]

distances, labels = [], []

for img1, img2 in tqdm(positive_pairs[:500], desc="Positive Pairs"):
    distances.append(get_distance(img1, img2))
    labels.append(1)

for img1, img2 in tqdm(negative_pairs[:500], desc="Negative Pairs"):
    distances.append(get_distance(img1, img2))
    labels.append(0)

threshold = 0.4
preds = [1 if d <= threshold else 0 for d in distances]

acc = accuracy_score(labels, preds)
roc_auc = roc_auc_score(labels, [-d for d in distances])

print(f"Accuracy: {acc:.3f}")
print(f"ROC-AUC: {roc_auc:.3f}")

In [None]:
fpr, tpr, _ = roc_curve(labels, [-d for d in distances])
plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"ROC-AUC = {roc_auc:.3f}")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Face Matching ROC Curve")
plt.legend()
plt.show()
