In [None]:



import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# =============================
# PATHS (IMPORTANT)
# =============================
CSV_DIR = "../dataset/processed"
IMAGE_ROOT ="../dataset/raw/vehicle_damage_dataset"

CSV_PATH = os.path.join(CSV_DIR, "labels_no_blur.csv")

# =============================
# LOAD CSV
# =============================
df = pd.read_csv(CSV_PATH)
print("‚úÖ CSV loaded:", df.shape)

# =============================
# LABEL ENCODING
# =============================
# real -> 0, fake -> 1
df["label"] = df["class"].map({"real": 0, "fake": 1})

# =============================
# IMAGE PREPROCESSING
# =============================
IMG_SIZE = (224, 224)

X = []
y = []
missing = 0

for _, row in df.iterrows():
    img_path = os.path.join(IMAGE_ROOT, row["image_path"])

    img = cv2.imread(img_path)
    if img is None:
        missing += 1
        continue

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, IMG_SIZE)
    img = img.astype("float32") / 255.0

    X.append(img)
    y.append(row["label"])

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.int32)

print("‚úÖ Images loaded:", X.shape)
print("‚ö†Ô∏è Missing images skipped:", missing)

# =============================
# SAFETY CHECK (CRITICAL)
# =============================
if len(X) == 0:
    raise ValueError("‚ùå No images loaded. Check IMAGE_ROOT or image_path in CSV!")

# =============================
# TRAIN / VAL / TEST SPLIT
# =============================
TRAIN_SIZE = 0.70
VAL_SIZE   = 0.15
TEST_SIZE  = 0.15
RANDOM_STATE = 42

# ---- Train vs Temp ----
X_train, X_temp, y_train, y_temp = train_test_split(
    X,
    y,
    test_size=(1 - TRAIN_SIZE),
    random_state=RANDOM_STATE,
    stratify=y
)

# ---- Validation vs Test ----
val_ratio = VAL_SIZE / (VAL_SIZE + TEST_SIZE)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp,
    y_temp,
    test_size=(1 - val_ratio),
    random_state=RANDOM_STATE,
    stratify=y_temp
)

# =============================
# FINAL SHAPE OUTPUT
# =============================
print("\n‚úÖ DATASET SPLIT SUCCESSFUL")
print("Train       :", X_train.shape, y_train.shape)
print("Validation  :", X_val.shape, y_val.shape)
print("Test        :", X_test.shape, y_test.shape)

# =============================
# PERCENTAGE CALCULATION
# =============================
total_samples = len(X)

train_pct = (len(X_train) / total_samples) * 100
val_pct   = (len(X_val)   / total_samples) * 100
test_pct  = (len(X_test)  / total_samples) * 100


print("\nüìä DATASET DISTRIBUTION (PERCENTAGE)")
print("Train       :", round(train_pct, 2), "%")
print("Validation  :", round(val_pct, 2), "%")
print("Test        :", round(test_pct, 2), "%")

# =============================
# OPTIONAL: CLASS DISTRIBUTION
# =============================
print("\nüìä CLASS DISTRIBUTION (0=REAL, 1=FAKE)")
print("Train :", np.bincount(y_train))
print("Val   :", np.bincount(y_val))
print("Test  :", np.bincount(y_test))


‚úÖ CSV loaded: (7183, 3)
‚úÖ Images loaded: (7183, 224, 224, 3)
‚ö†Ô∏è Missing images skipped: 0

‚úÖ DATASET SPLIT SUCCESSFUL
Train       : (5028, 224, 224, 3) (5028,)
Validation  : (1077, 224, 224, 3) (1077,)
Test        : (1078, 224, 224, 3) (1078,)

üìä DATASET DISTRIBUTION (PERCENTAGE)
Train       : 70.0 %
Validation  : 14.99 %
Test        : 15.01 %

üìä CLASS DISTRIBUTION (0=REAL, 1=FAKE)
Train : [2098 2930]
Val   : [449 628]
Test  : [450 628]


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Dense, Flatten,
    Dropout, BatchNormalization, GlobalAveragePooling2D
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50, EfficientNetB0
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd





In [23]:
from sklearn.metrics import (
    confusion_matrix,
    ConfusionMatrixDisplay,
    roc_curve,
    auc
)
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score



In [5]:
EPOCHS = 10
BATCH_SIZE = 32

callbacks = [
    EarlyStopping(patience=3, restore_best_weights=True),
    ReduceLROnPlateau(patience=2, factor=0.3)
]


In [15]:
def build_resnet50(input_shape):
    base_model = ResNet50(
        weights="imagenet",
        include_top=False,
        input_shape=input_shape
    )

    base_model.trainable = False  

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)

    model.compile(
        optimizer=Adam(1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model



resnet_model = build_resnet50(X_train.shape[1:])
resnet_model.summary()

resnet_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_3[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                        

<keras.src.callbacks.History at 0x1fc9952ab10>

In [16]:
# Predict probabilities
y_val_prob = resnet_model.predict(X_val)

# Convert to binary predictions
y_val_pred = (y_val_prob > 0.5).astype(int)




In [17]:
acc = accuracy_score(y_val, y_val_pred)
prec = precision_score(y_val, y_val_pred)
rec = recall_score(y_val, y_val_pred)

print("üìä ResNet50 Validation Metrics")
print(f"Accuracy  : {acc:.4f}")
print(f"Precision : {prec:.4f}")
print(f"Recall    : {rec:.4f}")


üìä ResNet50 Validation Metrics
Accuracy  : 0.8812
Precision : 0.8561
Recall    : 0.9570


In [6]:
def build_custom_cnn(input_shape):
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D(2,2),

        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2,2),

        Conv2D(128, (3,3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2,2),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer=Adam(1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model


cnn_model = build_custom_cnn(X_train.shape[1:])
cnn_model.summary()

cnn_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 batch_normalization (Batch  (None, 222, 222, 32)      128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 batch_normalization_1 (Bat  (None, 109, 109, 64)      256       
 chNormalization)                                                
                                                      

<keras.src.callbacks.History at 0x1fc45b13450>

In [8]:
# Predict probabilities (Custom CNN)
y_val_prob = cnn_model.predict(X_val)

# Convert to binary predictions
y_val_pred = (y_val_prob > 0.5).astype(int)




In [10]:
acc = accuracy_score(y_val, y_val_pred)
prec = precision_score(y_val, y_val_pred)
rec = recall_score(y_val, y_val_pred)

print("üìä CNN VALIDATION METRICS")
print("Accuracy  :", round(acc, 4))
print("Precision :", round(prec, 4))
print("Recall    :", round(rec, 4))


üìä CNN VALIDATION METRICS
Accuracy  : 0.9972
Precision : 0.9984
Recall    : 0.9968


In [12]:
def build_efficientnet(input_shape):
    base_model = EfficientNetB0(
        weights="imagenet",
        include_top=False,
        input_shape=input_shape
    )

    base_model.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)

    model.compile(
        optimizer=Adam(1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model


eff_model = build_efficientnet(X_train.shape[1:])
eff_model.summary()

eff_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 rescaling_2 (Rescaling)     (None, 224, 224, 3)          0         ['input_2[0][0]']             
                                                                                                  
 normalization_1 (Normaliza  (None, 224, 224, 3)          7         ['rescaling_2[0][0]']         
 tion)                                                                                            
                                                                                                  
 rescaling_3 (Rescaling)     (None, 224, 224, 3)          0         ['normalization_1[0][0]'

<keras.src.callbacks.History at 0x1fc5b782210>

In [13]:
# Predict probabilities using EfficientNet
y_val_prob = eff_model.predict(X_val)

# Convert probabilities to binary predictions
y_val_pred = (y_val_prob > 0.5).astype(int)




In [14]:
acc = accuracy_score(y_val, y_val_pred)
prec = precision_score(y_val, y_val_pred)
rec = recall_score(y_val, y_val_pred)

print("üìä EfficientNet Validation Metrics")
print("Accuracy  :", round(acc, 4))
print("Precision :", round(prec, 4))
print("Recall    :", round(rec, 4))

üìä EfficientNet Validation Metrics
Accuracy  : 0.5831
Precision : 0.5831
Recall    : 1.0


In [21]:
def evaluate_model(model, X, y, model_name):
    # Probabilities
    y_prob = model.predict(X).ravel()
    
    # Binary predictions
    y_pred = (y_prob > 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y, y_pred)
    prec = precision_score(y, y_pred)
    rec = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    roc = roc_auc_score(y, y_prob)

    # Confusion Matrix
    cm = confusion_matrix(y, y_pred)

    print(f"\nüìä {model_name} Confusion Matrix")
    print(cm)

    return {
        "Model": model_name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-score": f1,
        
    }


In [24]:
results = []

results.append(evaluate_model(cnn_model, X_val, y_val, "Custom CNN"))
results.append(evaluate_model(resnet_model, X_val, y_val, "ResNet50"))
results.append(evaluate_model(eff_model, X_val, y_val, "EfficientNetB0"))



üìä Custom CNN Confusion Matrix
[[448   1]
 [  2 626]]

üìä ResNet50 Confusion Matrix
[[348 101]
 [ 27 601]]

üìä EfficientNetB0 Confusion Matrix
[[  0 449]
 [  0 628]]


In [25]:
results_df = pd.DataFrame(results)
print("\nüèÜ VALIDATION METRICS COMPARISON")
print(results_df)



üèÜ VALIDATION METRICS COMPARISON
            Model  Accuracy  Precision    Recall  F1-score
0      Custom CNN  0.997214   0.998405  0.996815  0.997610
1        ResNet50  0.881151   0.856125  0.957006  0.903759
2  EfficientNetB0  0.583101   0.583101  1.000000  0.736657
