In [None]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

class DatasetLoader:
    def __init__(self, dataset_path, img_size=(224, 224), batch_size=64):
        self.dataset_path = dataset_path
        self.img_size = img_size
        self.batch_size = batch_size
        self.autotune = tf.data.AUTOTUNE

    def load_data(self, csv_file, image_folder):
        df = pd.read_csv(os.path.join(self.dataset_path, csv_file))
        df["file_name"] = df["file_name"].apply(lambda x: os.path.join(self.dataset_path, image_folder, os.path.basename(x)))
        return df

    def preprocess_image(self, image_path, label=None):
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, self.img_size)
        image = tf.image.convert_image_dtype(image, tf.float32)
        return (image, label) if label is not None else image

    def prepare_dataset(self, df, is_training=True):
        paths, labels = df["file_name"].values, pd.get_dummies(df["label"]).astype(int).values
        dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
        dataset = dataset.map(self.preprocess_image, num_parallel_calls=self.autotune)
        if is_training:
            dataset = dataset.shuffle(1000)
        return dataset.batch(self.batch_size).prefetch(self.autotune)


In [2]:
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0

class EfficientNetModel:
    def __init__(self, input_shape=(224, 224, 3), num_classes=2):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.model = self.build_model()

    def build_model(self):
        base_model = EfficientNetB0(include_top=False, weights="imagenet", input_shape=self.input_shape)
        base_model.trainable = False

        model = models.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.Dense(256, activation="relu"),
            layers.Dropout(0.3),
            layers.Dense(self.num_classes, activation="softmax")
        ])
        return model

    def compile_model(self, learning_rate=0.001):
        from tensorflow.keras.optimizers import Adam
        from tensorflow.keras.metrics import Precision, Recall, AUC

        self.model.compile(
            optimizer=Adam(learning_rate=learning_rate),
            loss="categorical_crossentropy",
            metrics=["accuracy", Precision(name="precision"), Recall(name="recall"), AUC(name="auc")]
        )


In [3]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

class ModelTrainer:
    def __init__(self, model, train_dataset, val_dataset, model_path="model/best_model.keras"):
        self.model = model
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.model_path = model_path
        self.callbacks = [
            EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),
            ModelCheckpoint(self.model_path, save_best_only=True, monitor="val_accuracy", mode="max")
        ]

    def train(self, epochs=20):
        history = self.model.fit(
            self.train_dataset,
            validation_data=self.val_dataset,
            epochs=epochs,
            callbacks=self.callbacks
        )
        return history

    def evaluate(self):
        results = self.model.evaluate(self.val_dataset)
        print(f"Validation Results: {results}")


In [4]:
from sklearn.metrics import accuracy_score, roc_curve
import matplotlib.pyplot as plt

class ModelPredictor:
    def __init__(self, model_path):
        self.model = tf.keras.models.load_model(model_path)

    def predict(self, dataset):
        predictions = self.model.predict(dataset)
        return np.argmax(predictions, axis=1)

    def evaluate(self, true_labels, predictions):
        accuracy = accuracy_score(np.argmax(true_labels, axis=1), predictions)
        print(f"Accuracy: {accuracy:.4f}")
        return accuracy

    def plot_roc_curve(self, true_labels, predictions):
        fpr, tpr, _ = roc_curve(np.argmax(true_labels, axis=1), predictions)
        plt.plot(fpr, tpr, label="ROC Curve", color="blue")
        plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
        plt.xlabel("False Positive Rate (FPR)")
        plt.ylabel("True Positive Rate (TPR)")
        plt.title("ROC Curve")
        plt.legend()
        plt.show()


In [None]:
if __name__ == "__main__":
    # Define paths
    dataset_path = "data"
    train_csv = "train.csv"
    test_csv = "test.csv"
    
    # Load dataset
    data_loader = DatasetLoader(dataset_path,  img_size=(224, 224), batch_size=8)
    train_df = data_loader.load_data(train_csv, "train_data")
    test_df = data_loader.load_data(test_csv, "test_data_v2")

    # Split data
    train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['label'], random_state=42)

    # Prepare datasets
    train_dataset = data_loader.prepare_dataset(train_df, is_training=True)
    val_dataset = data_loader.prepare_dataset(val_df, is_training=False)

    # Create model
    model_builder = EfficientNetModel(num_classes=train_df['label'].nunique())
    model = model_builder.model
    model_builder.compile_model()

    # Train model
    trainer = ModelTrainer(model, train_dataset, val_dataset)
    trainer.train(epochs=20)

    # Load trained model and evaluate
    predictor = ModelPredictor("model/best_model.keras")
    predictions = predictor.predict(val_dataset)
    predictor.evaluate(pd.get_dummies(val_df["label"]).values, predictions)
    predictor.plot_roc_curve(pd.get_dummies(val_df["label"]).values, predictions)
