In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Task 1: Import images with labels and print in a tabular format
data_folder = "/home/dhawi/Documents/dataset"
training = data_folder + "/teeth_dataset/Trianing"
testing = data_folder + "/teeth_dataset/test"
model_folder = "/home/dhawi/Documents/model"
history_folder = "/home/dhawi/Documents/History"

In [4]:
# Define the dataset directories
train_dir = training
test_dir = testing

In [5]:
# Function to get image file paths and corresponding labels
def get_image_paths_and_labels(data_dir):
    image_paths = []
    labels = []
    class_names = sorted(os.listdir(data_dir))
    
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(data_dir, class_name)
        if os.path.isdir(class_dir):
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                if os.path.isfile(img_path):
                    image_paths.append(img_path)
                    labels.append(label)
    
    return image_paths, labels, class_names

In [6]:
# Get paths and labels for the training data
train_image_paths, train_labels, class_names = get_image_paths_and_labels(train_dir)

# Split the training data into training and validation sets
train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_image_paths, train_labels, test_size=0.2, stratify=train_labels, random_state=123)

# Compute class weights to handle class imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
class_weights = {i: class_weights[i] for i in range(len(class_weights))}

In [7]:
# Function to load and preprocess images
def load_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 128])
    return image, label

In [8]:
# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(32).shuffle(buffer_size=len(train_paths)).prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
val_dataset = val_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)

# Get paths and labels for the test data
test_image_paths, test_labels, _ = get_image_paths_and_labels(test_dir)

# Create the test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((test_image_paths, test_labels))
test_dataset = test_dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)

In [9]:
# Load the ResNet50 model pre-trained on ImageNet
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

# Freeze the base model
base_model.trainable = False

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)  # 2 classes: caries and non-caries

In [10]:
# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [11]:
# Callbacks
resnet_model = model_folder + "/resnet_model.keras"
checkpoint = ModelCheckpoint(resnet_model,  monitor='val_loss', save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [12]:
# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=50,
    class_weight=class_weights,
    callbacks=[checkpoint, early_stopping]
)


In [13]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f'Test accuracy: {test_accuracy:.2f}')


In [14]:
import matplotlib.pyplot as plt
# Plotting the training history
def plot_training_history(history):
    plt.figure(figsize=(12, 4))
    
    # Plot training & validation accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot training & validation loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.show()
    
plot_training_history(history)

In [15]:
# Generate predictions and plot confusion matrix
y_pred_probs = model.predict(test_dataset)
y_pred = np.argmax(y_pred_probs, axis=1)

In [16]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
# Calculate confusion matrix
cm = confusion_matrix(test_labels, y_pred)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# Classification report
print(classification_report(test_labels, y_pred, target_names=class_names))

In [17]:
import json

model_name = "resnet50"
# Get the dictionary containing each metric and the loss for each epoch
history_dict = history.history
# Save it under the form of a json file
history_file = history_folder + "/" + model_name + "_history.json"
json.dump(history_dict, open(history_file, 'w'))

In [18]:
import cv2
from tqdm import tqdm

def load_images_from_folder(folder, subfolder):
    images = []
    gray = []
    # lab = []
    foldername = os.path.join(folder, subfolder)
    for filename in tqdm(os.listdir(foldername)):
        img = cv2.imread(os.path.join(foldername, filename))
        img = cv2.resize(img, (128, 128))
    # convert the image to RGB (images are read in BGR in OpenCV)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        gry = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        if img is not None:
            images.append(img)
            gray.append(gry)
            # lab.append(label)
    return np.array(images), np.array(gray)

In [19]:
caries_train, c_gray = load_images_from_folder(training, "caries")
caries_test, ct_gray = load_images_from_folder(testing, "caries")
noncaries_train, nc_gray = load_images_from_folder(training, "without_caries")
noncaries_test, nct_gray = load_images_from_folder(testing, "no-caries")

In [20]:
test_image = np.concatenate((caries_test, noncaries_test), axis=0)

In [21]:
np.argmax(model.predict(test_image), axis=1)

In [22]:
model.predict(noncaries_test)

In [23]:
model.predict(caries_test)

In [24]:
y_pred

In [25]:
test_labels

In [26]:
from keras.models import load_model
model_folder = "/home/dhawi/Documents/model"
MODEL_PATH = os.path.join(model_folder, "resnet_model.keras")
testmodel = load_model(MODEL_PATH)
testmodel.make_predict_function()     

np.argmax(testmodel.predict(test_image), axis=1)