In [None]:
import tensorflow as tf

import pathlib
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np
import sys

sys.path.insert(0, '/home/cesi/datascience/scripts/')
sys.path.insert(0, '/home/cesi/datascience/models/')
import plotImages
import resnet50

In [None]:
image_h, image_w = 150, 150
batch_size = 32
min_h = 25
min_w = 25

In [None]:
data_dir = pathlib.Path('/home/cesi/datascience/data/livrable1_binary')

In [None]:
def check_images(dir_path):
    print('CHECK IMAGES:')
    sub_dirs = os.listdir(dir_path)
    for sub_dir_name in sub_dirs:
      sub_dir_path = os.path.join(dir_path, sub_dir_name)
      if not os.path.isdir(sub_dir_path): continue
      print("-",sub_dir_name.upper())
      for file_name in tqdm(os.listdir(sub_dir_path)):
          file_path = os.path.join(sub_dir_path, file_name)

          if os.path.isfile(file_path):
              # Check if the file is an image (you can add more image formats if needed)
              if file_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                  try:
                      with Image.open(file_path) as img:
                       # if img.width < min_w or img.height < min_h:
                       #   raise Exception(f'Too small img of shape {img.width},{img.height}')
                        cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                        img_bytes = tf.io.read_file(file_path)
                        tf.io.decode_image(img_bytes)
                        img.verify()
                  except Exception as e:
                      #os.remove(file_path)
                      print(f'\nDeleting {file_path} due to an error: {str(e)}')
              else:
                #os.remove(file_path)
                print(f'\nSkipped {file_path} due to bad type {file_name.lower()}')
#check_images(data_dir)

In [None]:
train_set, test_set = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=.2,
  subset='both',
  image_size=(image_h, image_w),
  seed=42,
  batch_size=batch_size
)
class_names = train_set.class_names
num_classes = len(class_names)

In [None]:
classic_model = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal', input_shape=(image_h, image_w, 3)),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),


    tf.keras.layers.Rescaling(1./255, input_shape=(image_h, image_w, 3)),

    tf.keras.layers.Conv2D(16, 3, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Dense(len(class_names))])

In [None]:
models = {
  "classic": classic_model,  
  "resnet50": model_resnet_50
}

model_name = 'resnet50'
model = models[model_name]
from_logits = True
if model_name != 'classic':
    from_logits = False

# compile the model for a binary classification problem
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=from_logits),
              metrics=['accuracy'])

In [None]:
from datetime import datetime

# timestamp de debut
folder = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
path = "/home/cesi/datascience/project/weights/"+folder
# datetime object containing current date and time
folder = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
if not os.path.exists(path):
    os.makedirs(path)
# readme 
content = f"""# Training nÂ°{folder}
number of classes: {num_classes}
model: {model_name}
image size: {image_h}x{image_w}
batch size: {batch_size}
backbone: {model.layers}
## Summary
{model.summary()}

## Compile config
{model.get_compile_config()}
"""

file_path = os.path.join(path, "README.txt")

# Write the string to the text file
with open(file_path, "w") as text_file:
    text_file.write(content)


epochs=15

checkpoint_path = path+"/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

weights_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch')

history = model.fit(train_set, epochs=epochs, validation_data=test_set, callbacks=[weights_callback])

In [None]:
epochs_range = range(epochs)

acc = history['history']['accuracy']
val_acc = history['history']['val_accuracy']

loss = history['history']['loss']
val_loss = history['history']['val_loss']


plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

plt.savefig(path+"/metrics.png")

In [None]:
model_to_load_path = '/home/cesi/datascience/project/weights/2023_10_02_07_55_21/cp-0004.ckpt'
model.load_weights(model_to_load_path)

In [None]:
predictions = model.predict(test_set)
if from_logits:
    predictions = tf.nn.softmax(predictions)

In [None]:
loss, acc = model.evaluate(test_set)

In [None]:
full_tests_images = []
full_tests_labels = []
for i,(batch_images, batch_labels) in enumerate(test_set):
    for im in batch_images:
        full_tests_images.append(im)
    for res in batch_labels:
        full_tests_labels.append(int(res))

In [None]:
def plot_image(prediction_array, true_label, img, class_names):
  img = img/255.0
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img, cmap=plt.cm.binary)
  predicted_label = np.argmax(prediction_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(prediction_array),
                                class_names[true_label]),
                                color=color)

def plot_image_binary(prediction_array, true_label, img, class_names):
  img = img/255.0
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img, cmap=plt.cm.binary)
  if prediction_array[0] > 0.5:
    predicted_label = 1
  else:
    predicted_label = 0
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(prediction_array),
                                class_names[true_label]),
                                color=color)
  
def plot_value_array(predictions_array, true_label):
  plt.grid(False)
  plt.xticks(num_classes)
  plt.yticks([])
  thisplot = plt.bar(range(num_classes), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')


def plot_value_array_binary(predictions_array, true_label):
  plt.grid(False)
  plt.xticks([0,1])
  plt.yticks([])
  predictions_array = [1 - predictions_array[0], predictions_array[0]]
  thisplot = plt.bar(range(num_classes), predictions_array, color="#777777")
  plt.ylim([0, 1])

  if predictions_array[0] > 0.5:
    predicted_label = 1
  else:
    predicted_label = 0

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')


def plot_results(binary, predictions, full_tests_labels, full_tests_images, class_names, index_shift, num_rows, num_cols):
  num_rows = 45  
  num_cols = 5 
  plt.figure(figsize=(2*2*num_cols, 2*num_rows))
  if binary:
    for i in range(64):
      i += index_shift
      plt.subplot(num_rows, 2*num_cols, 2*i+1)
      plot_image_binary(predictions[i], full_tests_labels[i], full_tests_images[i], class_names)

      plt.subplot(num_rows, 2*num_cols, 2*i+2)
      plot_value_array_binary(predictions[i], full_tests_labels[i])
  else:
    for i in range(64):
        i += index_shift
        plt.subplot(num_rows, 2*num_cols, 2*i+1)
        plot_image_binary(predictions[i], full_tests_labels[i], full_tests_images[i], class_names)

        plt.subplot(num_rows, 2*num_cols, 2*i+2)
        plot_value_array_binary(predictions[i], full_tests_labels[i])

  plt.tight_layout()
  plt.show()


plot_results(num_classes == 2, predictions, full_tests_labels, full_tests_images, class_names, 0, 45, 5)

In [None]:
tp = 0
fp = 0
fn = 0
tn = 0

for i in range(len(predictions)):
    predicted_label = class_names[np.argmax(predictions[i])]
    true_label = class_names[full_tests_labels[i]]

    if true_label == 'Non-photo' and predicted_label == 'Non-photo':
        tp += 1
    elif true_label == 'Non-photo' and predicted_label != 'Non-photo':
        fn += 1
    elif true_label != 'Non-photo' and predicted_label == 'Non-photo':
        fp += 1
    elif true_label != 'Non-photo' and predicted_label != 'Non-photo':
        tn += 1    

In [None]:
print(tp, fp, fn, tn)

In [None]:
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * (precision * recall) / (precision + recall)
accuracy = (tp + tn) / (tp + tn + fp + fn)
print(precision, recall, f1, accuracy)

In [None]:
print("Confusion Matrix : ")
print(f"[{tp}] [{fp}]")
print(f"[{fn}] [{tn}]")