In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import random
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [None]:
# GPU Configuration
def configure_gpu():
    """Configure TensorFlow to use GPU resources efficiently"""
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            # Memory growth prevents TensorFlow from allocating all GPU memory at once
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

            print(f"Found {len(gpus)} GPU(s). GPU acceleration enabled.")

            # Optionally, you can limit GPU memory allocation
            # tf.config.experimental.set_virtual_device_configuration(
            #     gpus[0],
            #     [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)]
            # )

        except RuntimeError as e:
            print(f"GPU configuration error: {e}")
    else:
        print("No GPU found. Running on CPU.")

    return len(gpus) > 0

In [None]:
def load_data(data_dir):
    """
    Load the preprocessed data
    """
    # Load arrays
    X_train = np.load(os.path.join(data_dir, "X_train_augmented.npy"))
    y_train = np.load(os.path.join(data_dir, "y_train_augmented.npy"))
    X_valid = np.load(os.path.join(data_dir, "X_valid.npy"))
    y_valid = np.load(os.path.join(data_dir, "y_valid.npy"))
    X_test = np.load(os.path.join(data_dir, "X_test.npy"))
    y_test = np.load(os.path.join(data_dir, "y_test.npy"))

    # Load class names
    #data_dir_classes='/content/drive/MyDrive/Colab Notebooks/processed_data'
    with open(os.path.join(data_dir, 'class_names.pkl'), 'rb') as f:
        class_names = pickle.load(f)
        print(class_names)

    return X_train, y_train, X_valid, y_valid, X_test, y_test, class_names

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import Layer
from tensorflow.keras.layers import (Conv2D, BatchNormalization, MaxPooling2D,
                                    Dropout, Dense, Flatten, Lambda,
                                    Activation, GlobalAveragePooling2D,
                                    Multiply, Add, Reshape)
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.optimizers import AdamW
import tensorflow as tf

class F1Score(tf.keras.metrics.Metric):
    """Custom F1 score metric for sparse categorical data"""
    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred, axis=-1)
        self.precision.update_state(y_true, y_pred)
        self.recall.update_state(y_true, y_pred)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_state(self):
        self.precision.reset_state()
        self.recall.reset_state()


class SelfAttention(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def build(self, input_shape):
        _, h, w, c = input_shape
        self.query_conv = Conv2D(c//8, (1,1))
        self.key_conv = Conv2D(c//8, (1,1))
        self.value_conv = Conv2D(c, (1,1))
        super().build(input_shape)

    def call(self, x):
        query = self.query_conv(x)
        key = self.key_conv(x)
        value = self.value_conv(x)

        # Reshape for attention calculation
        query_flat = tf.reshape(query, [-1, tf.shape(query)[1]*tf.shape(query)[2], tf.shape(query)[3]])
        key_flat = tf.reshape(key, [-1, tf.shape(key)[1]*tf.shape(key)[2], tf.shape(key)[3]])
        value_flat = tf.reshape(value, [-1, tf.shape(value)[1]*tf.shape(value)[2], tf.shape(value)[3]])

        # Attention scores
        attention = tf.matmul(query_flat, key_flat, transpose_b=True)
        attention = tf.nn.softmax(attention)

        # Weighted sum
        out = tf.matmul(attention, value_flat)

        # Reshape back to original
        out = tf.reshape(out, [-1, tf.shape(x)[1], tf.shape(x)[2], tf.shape(value)[3]])
        return out + x  # Residual connection

    def compute_output_shape(self, input_shape):
        return input_shape

class SEBlock(Layer):
    def __init__(self, ratio=16, **kwargs):
        super().__init__(**kwargs)
        self.ratio = ratio

    def build(self, input_shape):
        _, _, _, c = input_shape
        self.se_dense_reduce = Dense(c//self.ratio, activation='relu')
        self.se_dense_expand = Dense(c, activation='sigmoid')
        super().build(input_shape)

    def call(self, x):
        se = GlobalAveragePooling2D()(x)
        se = self.se_dense_reduce(se)
        se = self.se_dense_expand(se)
        return Multiply()([x, Reshape((1,1,-1))(se)])

    def compute_output_shape(self, input_shape):
        return input_shape

def create_enhanced_model(input_shape, num_classes):
    """
    Final enhanced CNN model with proper custom layers
    """
    model = Sequential()

    # Input normalization
    model.add(Lambda(lambda x: tf.image.per_image_standardization(x),
              input_shape=input_shape))

    # --- First Convolutional Block ---
    model.add(Conv2D(32, (3,3), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation(lambda x: x * tf.sigmoid(x)))  # Swish

    # --- Second Convolutional Block ---
    model.add(Conv2D(64, (3,3), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation(lambda x: x * tf.sigmoid(x)))

    # Add self-attention
    model.add(SelfAttention())
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.3))

    # --- Third Convolutional Block ---
    model.add(Conv2D(128, (1,5), padding='same'))
    model.add(Conv2D(128, (5,1), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation(lambda x: x * tf.sigmoid(x)))

    # Add SE block
    model.add(SEBlock(ratio=16))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(0.4))

    # --- Feature Processing ---
    model.add(Conv2D(256, (1,1), activation=lambda x: x * tf.sigmoid(x)))
    model.add(GlobalAveragePooling2D())

    # --- Classification Head ---
    model.add(Flatten())
    model.add(Dense(256,
                   kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4),
                   activity_regularizer=l1_l2(l1=1e-6, l2=1e-5)))
    model.add(Activation(lambda x: x * tf.math.tanh(tf.math.softplus(x))))  # Mish
    model.add(Dropout(0.5))

    # Output with label smoothing
    model.add(Dense(num_classes))
    model.add(Lambda(lambda x: tf.clip_by_value(tf.nn.softmax(x), 1e-7, 1.-1e-7)))

    # Optimizer
    optimizer = AdamW(learning_rate=3e-4, weight_decay=1e-5)

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', F1Score()]
    )

    return model

In [None]:
def plot_training_history(history):
    """
    Plot training & validation accuracy and loss
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Accuracy
    ax1.plot(history.history['accuracy'])
    ax1.plot(history.history['val_accuracy'])
    ax1.set_title('Model Accuracy')
    ax1.set_ylabel('Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.legend(['Train', 'Validation'], loc='lower right')

    # Loss
    ax2.plot(history.history['loss'])
    ax2.plot(history.history['val_loss'])
    ax2.set_title('Model Loss')
    ax2.set_ylabel('Loss')
    ax2.set_xlabel('Epoch')
    ax2.legend(['Train', 'Validation'], loc='upper right')

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.close()

In [None]:
def plot_confusion_matrix(y_true, y_pred, class_names):
    """
    Plot confusion matrix
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.close()

In [None]:
# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Directory containing preprocessed data
data_dir = "/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data"

# Configure hardware acceleration
use_gpu = configure_gpu()

# Load data
print("Loading preprocessed data...")
X_train, y_train, X_valid, y_valid, X_test, y_test, class_names = load_data(data_dir)
# Get original sizes
n_train = len(X_train)
n_test = len(X_test)
n_valid = len(X_valid)

# Combine all data
X_all = np.concatenate([X_train, X_test, X_valid], axis=0)
y_all = np.concatenate([y_train, y_test, y_valid], axis=0)

# Shuffle data
assert len(X_all) == len(y_all)
perm = np.random.permutation(len(X_all))
X_all = X_all[perm]
y_all = y_all[perm]

# Split back using original sizes
X_train= X_all[:n_train]
y_train= y_all[:n_train]
X_test= X_all[n_train:n_train + n_test]
y_test= y_all[n_train:n_train + n_test]
X_valid= X_all[n_train + n_test:]
y_valid= y_all[n_train + n_test:]





Found 1 GPU(s). GPU acceleration enabled.
Loading preprocessed data...
['Cavity', 'Fillings', 'Impacted Tooth', 'Implant', 'Normal']


In [None]:
# Save the shuffled datasets
np.save("/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data/X_train_augmented.npy", X_train)
np.save("/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data/y_train_augmented.npy", y_train)
np.save("/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data/X_test.npy", X_test)
np.save("/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data/y_test.npy", y_test)
np.save("/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data/X_valid.npy", X_valid)
np.save("/content/drive/MyDrive/Colab Notebooks/dental-classification/augmented_data/y_valid.npy", y_valid)

In [9]:
# Print dataset information
print(f"Training set: {X_train.shape[0]} images")
print(f"Validation set: {X_valid.shape[0]} images")
print(f"Test set: {X_test.shape[0]} images")
print(f"Number of classes: {len(class_names)}")


# Input shape and number of classes
input_shape = X_train.shape[1:]  # (64, 64, 1)
num_classes = len(class_names)

# Create model
model = create_enhanced_model(input_shape, num_classes) # Recreate your model architecture

# Display model summary
model.summary()

# Callbacks
callbacks = [
    #EarlyStopping(patience=10, restore_best_weights=True),
    ModelCheckpoint('best_dental_model.keras', save_best_only=True),
    ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6)
]

# Batch size optimization for GPU
batch_size = 64 if use_gpu else 32

from sklearn.utils.class_weight import compute_class_weight
# 1. Compute balanced class weights
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

#force model to build
history = model.fit(
    X_train,y_train,
    epochs=1,
    batch_size=batch_size,
    class_weight=class_weight_dict,
    validation_data=(X_valid, y_valid),
    callbacks=callbacks,
    verbose=1
)
# Then load weights
model.load_weights('/content/drive/MyDrive/Colab Notebooks/dental-classification/best_dental_model.keras')

# Train the model with appropriate batch size
print("\nTraining model...")
history = model.fit(
    X_train,y_train,
    epochs=5,
    batch_size=batch_size,
    class_weight=class_weight_dict,
    validation_data=(X_valid, y_valid),
    callbacks=callbacks,
    verbose=1
)


# Plot training history
plot_training_history(history)



# Save model
model.save('/content/drive/MyDrive/Colab Notebooks/dental-classification/dental_xray_classifier.keras')
print("\nModel saved as 'dental_xray_classifier.keras'")

Training set: 57116 images
Validation set: 2812 images
Test set: 1649 images
Number of classes: 5


  super().__init__(**kwargs)


[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m436s[0m 445ms/step - accuracy: 0.7221 - f1_score: 0.9754 - loss: 0.7463 - val_accuracy: 0.8606 - val_f1_score: 0.9874 - val_loss: 0.4376 - learning_rate: 3.0000e-04



  fn_config = serialization_lib.serialize_keras_object(activation)

  fn_config = serialization_lib.serialize_keras_object(activation)

  fn_config = serialization_lib.serialize_keras_object(activation)

  fn_config = serialization_lib.serialize_keras_object(activation)



Training model...
Epoch 1/5
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 424ms/step - accuracy: 0.9502 - f1_score: 0.9936 - loss: 0.1398 - val_accuracy: 0.9477 - val_f1_score: 0.9918 - val_loss: 0.1530 - learning_rate: 1.5000e-04
Epoch 2/5
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 423ms/step - accuracy: 0.9516 - f1_score: 0.9937 - loss: 0.1375 - val_accuracy: 0.9474 - val_f1_score: 0.9916 - val_loss: 0.1529 - learning_rate: 1.5000e-04
Epoch 3/5
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 423ms/step - accuracy: 0.9515 - f1_score: 0.9937 - loss: 0.1341 - val_accuracy: 0.9438 - val_f1_score: 0.9907 - val_loss: 0.1653 - learning_rate: 1.5000e-04
Epoch 4/5
[1m893/893[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m378s[0m 419ms/step - accuracy: 0.9516 - f1_score: 0.9937 - loss: 0.1327 - val_accuracy: 0.9488 - val_f1_score: 0.9920 - val_loss: 0.1471 - learning_rate: 1.5000e-04
Epoch 5/5
[1m893/893[0m [32m━━━━━━━━━━

In [11]:
# Evaluate on test set
print("\nEvaluating on test set...")
test_metrics = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_metrics[1]:.4f}")

# Make predictions
y_pred = np.argmax(model.predict(X_test), axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

# Plot confusion matrix
plot_confusion_matrix(y_test, y_pred, class_names)


Evaluating on test set...
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 77ms/step - accuracy: 0.9618 - f1_score: 0.9951 - loss: 0.1299
Test accuracy: 0.9594
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 93ms/step

Classification Report:
                precision    recall  f1-score   support

        Cavity       0.97      0.97      0.97       271
      Fillings       0.97      0.88      0.92       301
Impacted Tooth       0.99      0.98      0.99       263
       Implant       0.97      0.99      0.98       278
        Normal       0.93      0.97      0.95       536

      accuracy                           0.96      1649
     macro avg       0.97      0.96      0.96      1649
  weighted avg       0.96      0.96      0.96      1649

