In [3]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.resnet50 import preprocess_input
from PIL import Image, UnidentifiedImageError

# Hàm kiểm tra file ảnh hợp lệ
def is_valid_image(filepath):
    try:
        with Image.open(filepath) as img:
            img.verify()
        return True
    except (UnidentifiedImageError, IOError):
        return False

# Hàm load toàn bộ dataset
def load_full_dataset(path):
    """
    Load toàn bộ X-ray dataset
    """
    dataset = []
    for folder in os.listdir(path):  # train/test
        folder_path = os.path.join(path, folder)
        if os.path.isdir(folder_path):
            for body_part in os.listdir(folder_path):  # Elbow, Hand, Shoulder
                part_path = os.path.join(folder_path, body_part)
                if os.path.isdir(part_path):
                    for patient_id in os.listdir(part_path):  # patient folders
                        patient_path = os.path.join(part_path, patient_id)
                        for lab in os.listdir(patient_path):  # positive/negative
                            if lab.split('_')[-1] == 'positive':
                                label = 'fractured'
                            elif lab.split('_')[-1] == 'negative':
                                label = 'normal'
                            lab_path = os.path.join(patient_path, lab)
                            for img in os.listdir(lab_path):
                                img_path = os.path.join(lab_path, img)
                                # Chỉ thêm ảnh hợp lệ
                                if is_valid_image(img_path):
                                    dataset.append({
                                        'body_part': body_part,
                                        'patient_id': patient_id,
                                        'label': label,
                                        'image_path': img_path
                                    })
    return dataset

# Hàm huấn luyện mô hình
def train_model():
    image_dir = '/kaggle/input/mura-dataset/MURA-v1.1'
    data = load_full_dataset(image_dir)
    labels = []
    filepaths = []

    # Chuẩn bị dữ liệu
    for row in data:
        labels.append(row['label'])
        filepaths.append(row['image_path'])

    filepaths = pd.Series(filepaths, name='Filepath').astype(str)
    labels = pd.Series(labels, name='Label')

    images = pd.concat([filepaths, labels], axis=1)
    train_df, test_df = train_test_split(images, train_size=0.9, shuffle=True, random_state=1)

    # Tạo DataGenerator
    train_generator = ImageDataGenerator(
        horizontal_flip=True,
        preprocessing_function=preprocess_input,
        validation_split=0.2
    )

    test_generator = ImageDataGenerator(
        preprocessing_function=preprocess_input
    )

    train_images = train_generator.flow_from_dataframe(
        dataframe=train_df,
        x_col='Filepath',
        y_col='Label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=64,
        shuffle=True,
        seed=42,
        subset='training'
    )

    val_images = train_generator.flow_from_dataframe(
        dataframe=train_df,
        x_col='Filepath',
        y_col='Label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=64,
        shuffle=True,
        seed=42,
        subset='validation'
    )

    test_images = test_generator.flow_from_dataframe(
        dataframe=test_df,
        x_col='Filepath',
        y_col='Label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        shuffle=False
    )

    model = load_model("/kaggle/input/resnet_fulldata/tensorflow2/default/1/ResNet50_full_frac.h5")
    
    # Biên dịch lại mô hình với các metrics cần thiết
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
    )

    # Dự đoán trên tập test
    y_pred_prob = model.predict(test_images)
    y_pred = np.argmax(y_pred_prob, axis=1)

    # Lấy y_true từ test_images
    y_true = test_images.classes

    # Tính toán Precision và Recall
    precision = precision_score(y_true, y_pred, pos_label=1)
    recall = recall_score(y_true, y_pred, pos_label=1)
    print(f"Test Precision: {precision * 100}%")
    print(f"Test Recall: {recall * 100}%")

# Chạy mô hình
if __name__ == "__main__":
    train_model()


Found 28804 validated image filenames belonging to 2 classes.
Found 7200 validated image filenames belonging to 2 classes.
Found 4001 validated image filenames belonging to 2 classes.


  self._warn_if_super_not_called()
I0000 00:00:1734767368.887554      92 service.cc:145] XLA service 0x7cf8d8001e50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734767368.887598      92 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1734767368.887601      92 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m  1/126[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:07[0m 6s/step

I0000 00:00:1734767373.667609      92 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 174ms/step
Test Precision: 78.07909604519774%
Test Recall: 86.6638795986622%
