#Libraries

In [None]:
import tensorflow as tf
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LeakyReLU, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from PIL import Image
import random
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import f1_score
from sklearn.utils import shuffle
from collections import Counter


In [None]:
data_directory = '/content/drive/MyDrive/Dental Project'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Data Pre-Processing and Augmentation

In [None]:
df1 = pd.read_csv(os.path.join(data_directory,'train/_annotations.csv'))
df1['FileClass'] = 'train'
df2 = pd.read_csv(os.path.join(data_directory,'test/_annotations.csv'))
df2['FileClass'] = 'test'
df3 = pd.read_csv(os.path.join(data_directory,'valid/_annotations.csv'))
df3['FileClass'] = 'valid'

In [None]:
df = pd.concat([df1, df2, df3], axis=0)

In [None]:
df.head(10)

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,FileClass
0,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Implant,175,116,206,153,train
1,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Fillings,170,109,189,133,train
2,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Implant,221,124,257,178,train
3,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Implant,302,126,329,175,train
4,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Implant,335,114,360,154,train
5,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Fillings,358,57,391,106,train
6,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Fillings,330,81,358,106,train
7,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Fillings,311,58,330,109,train
8,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Implant,142,96,174,144,train
9,0674_jpg.rf.e02a155a0c135687b9301ff9a20d220a.jpg,512,256,Fillings,257,127,282,154,train


In [None]:
df.shape

(9283, 9)

In [None]:
df['class'].value_counts()

Unnamed: 0_level_0,count
class,Unnamed: 1_level_1
Fillings,6097
Implant,2047
Cavity,641
Impacted Tooth,498


In [None]:
df['filename'].nunique()

1269

In [None]:
null_count = df['class'].isnull().sum()
null_count

0

In [None]:
df['class'].unique() #check if the classes are imbalanced or not

array(['Implant', 'Fillings', 'Impacted Tooth', 'Cavity'], dtype=object)

In [None]:
def load_data(data_directory):
    images = []
    labels = []
    classes = []
# iterating over classes in a directory, then iterating over the images within each class
    for class_name in os.listdir(data_directory):
        class_path = os.path.join(data_directory, class_name)
        df = pd.read_csv(os.path.join(data_directory,class_name , '_annotations.csv'))
        classes.append(class_name)

        for filename in os.listdir(class_path):
            filepath = os.path.join(class_path, filename)
            filtered_df = df[df['filename'] == filename]
            img = cv2.imread(filepath)
 # Data Processing:
# 1)resizing the images to (224, 224),
# 2)dividing by 255 to normalize pixel values
            if img is not None:
                img = cv2.resize(img, (224, 224))
                img = img / 255.0
                images.append(img)
                if(filtered_df['class'].count() > 1):
                    if 'cavity' in filtered_df['class'].values:
                      labels.append('cavity')
                    else:
                      labels.append(str(random.choice(filtered_df['class'].values)))
                else:
                      labels.append(random.choice(filtered_df['class'].values)) #this is a mistake and must be revised

    imgs = np.array(images)

    return imgs,images, labels, classes

In [None]:
def augmentation(images,labels):
    #vertical flip
    #zoom in
    #zoom out
    flipped_images = []
    flipped_labels = []
    rotated_images = []
    rotated_labels = []
    for img in images:
        #horizontal flip
        img_flipped = cv2.flip(img, 1)
        flipped_images.append(img_flipped)
        #90 degrees rotation
        (h, w) = img.shape[:2]
        center = (w // 2, h // 2)
        matrix = cv2.getRotationMatrix2D(center, 90, 1.0)
        img_rotated = cv2.warpAffine(img, matrix, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
        rotated_images.append(img_rotated)
    for lbl in labels:
        flipped_labels.append(lbl)
        rotated_labels.append(lbl)

    # flipped_labels = np.array(flipped_labels)
    # rotated_labels = np.array(rotated_labels)
    # labels = np.concatenate((flipped_labels,rotate
    return flipped_images + rotated_images , flipped_labels + rotated_labels


In [None]:
imgs, images, labels,classes =load_data(data_directory)

In [None]:
def get_specific_sample(imgs,labels,target_label):

   target_images = [img for img, lbl in zip(images, labels) if lbl == target_label]
   target_labels = [lbl for lbl in labels if lbl == target_label]
   sample_size = len(target_images) // 3
   sampled_indices = random.sample(range(len(target_images)), sample_size)
   sampled_images = [target_images[i] for i in sampled_indices]
   sampled_labels = [target_labels[i] for i in sampled_indices]
   filtered_images = [img for img, lbl in zip(images, labels) if lbl != target_label]
   filtered_labels = [lbl for lbl in labels if lbl != target_label]
   filtered_images.extend(sampled_images)
   filtered_labels.extend(sampled_labels)
   return filtered_images, filtered_labels

In [None]:
len(labels)

1269

In [None]:
Counter(labels) # we have to take only 1/3 of the fillings to have a more balanced data

Counter({'Fillings': 770, 'Cavity': 95, 'Implant': 224, 'Impacted Tooth': 180})

In [None]:
imgs, labels = get_specific_sample(imgs,labels,'Fillings')

In [None]:
set(labels)

{'Cavity', 'Fillings', 'Impacted Tooth', 'Implant'}

In [None]:
Counter(labels)

Counter({'Cavity': 95, 'Implant': 224, 'Impacted Tooth': 180, 'Fillings': 256})

In [None]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

In [None]:
len(labels)

755

In [None]:
X_train, X_test ,y_train, y_test = train_test_split(imgs, labels, test_size=0.2, random_state=42, shuffle=True)

In [None]:
def get_sample(images,labels):
  image_label_pairs = list(zip(images,labels))
  sample = random.sample(image_label_pairs,500)
  sampled_images , sampled_labels = zip(*sample)
  return sampled_images , sampled_labels

In [None]:
sampled_images , sampled_labels = get_sample(images, labels)

In [None]:
augmented_images, augmented_labels = augmentation(sampled_images,sampled_labels)

In [None]:
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)

In [None]:
X_train = np.concatenate((X_train, augmented_images),axis=0)

In [None]:
X_train.shape

(1617, 224, 224, 3)

In [None]:
y_train = np.concatenate((y_train, augmented_labels),axis=0)

In [None]:
y_train.shape

(1617,)

In [None]:
X_train, y_train = shuffle(X_train, y_train)

In [None]:
#validation data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, shuffle=True)