In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator
from keras.api.applications import ResNet50
from keras.api.models import Model
from keras.api.layers import Input, GlobalAveragePooling2D, Dense, Dropout
from keras.api.optimizers import Adam
from keras.api.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

BATCH_SIZE = 32
IMG_SIZE = (224, 224)
EPOCHS = 50
LEARNING_RATE = 1e-4

df = pd.read_csv(r"C:\Users\DEVANSH\Downloads\BoneAge_Dataset\BoneAge_train.csv")
image_folder = r"C:\Users\DEVANSH\Downloads\BoneAge_Dataset\Training_dataset_BoneAge"

df['image_path'] = df['ID'].astype(str) + '.png'
df['image_path'] = df['image_path'].apply(lambda x: os.path.join(image_folder, x))


def is_valid_image(path):
    try:
        with Image.open(path) as img:
            img.verify()
        return True
    except Exception:
        return False

df = df[df['image_path'].apply(is_valid_image)]


datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255,
    rotation_range=10,
    zoom_range=0.1,
    horizontal_flip=True,
)

def clahe_preprocessing(img):
    img = tf.image.rgb_to_grayscale(img)
    img = tf.image.adjust_contrast(img, 2.0)
    return tf.image.grayscale_to_rgb(img)

train_gen = datagen.flow_from_dataframe(
    df,
    preprocessing_function=clahe_preprocessing,
    x_col='image_path',
    y_col='Target',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    subset='training',
    class_mode='raw',
    shuffle=True,
)

val_gen = datagen.flow_from_dataframe(
    df,
    x_col='image_path',
    y_col='Target',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    subset='validation',
    class_mode='raw',
    shuffle=True,
)


def build_model():
    img_input = Input(shape=(*IMG_SIZE, 3), name='image_input')

    base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=img_input)

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(128, activation='relu')(x)

    output = Dense(1, activation='linear')(x)

    model = Model(inputs=img_input, outputs=output)

    return model

model = build_model()

model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='mse',
    metrics=['mae']
)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5),
    ModelCheckpoint('best_model.h5', save_best_only=True),
]

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=callbacks,
)

val_preds = model.predict(val_gen).flatten()
val_true = val_gen.labels

mae = mean_absolute_error(val_true, val_preds)
mse = mean_squared_error(val_true, val_preds)
r2 = r2_score(val_true, val_preds)

print(f"MAE: {mae:.2f} months")
print(f"MSE: {mse:.2f}")
print(f"R²: {r2:.4f}")

errors = val_preds - val_true
plt.figure(figsize=(12, 6))
plt.scatter(val_true, errors, alpha=0.6)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('True Bone Age (months)')
plt.ylabel('Prediction Error (months)')
plt.title('Prediction Error Distribution')
plt.grid(True)
plt.show()

submission_df = pd.DataFrame({
    'ID': df['ID'].astype(str),
    'Predicted_Bone_Age': val_preds
})

submission_df.to_csv('submission.csv', index=False)
