#Preliminary operations

In [2]:
#@title Drive mount

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
#@title Main libraries
import pandas as pd
import numpy as np

import os
import random

from sklearn.utils import  compute_class_weight

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix


import tensorflow as tf
from tensorflow import keras
tfk = tf.keras
tfkl = tf.keras.layers

# Display
from IPython.display import Image, display
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import cv2

!pip install visualkeras
import visualkeras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting visualkeras
  Downloading visualkeras-0.0.2-py3-none-any.whl (12 kB)
Collecting aggdraw>=1.3.11
  Downloading aggdraw-1.3.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (992 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m992.2/992.2 KB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: aggdraw, visualkeras
Successfully installed aggdraw-1.3.15 visualkeras-0.0.2


In [4]:
#@title Reproducibility
# Setting a seed for reproducibility
seed = 90 
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print("The seed is: " + str(seed))

The seed is: 90


#Data access

In [5]:
#@title Functions needed to access data
def get1image(name='img_name'):
  ''' 
    It returns the the requested image in form of a batch of 1 element, processed as those in the model creation.
    It will be a ndarray in the shape: (1, 224, 224, 3)
    The values are in the range 0-255
  '''
  generator = ImageDataGenerator().flow_from_directory(directory = test_dir,
                                                                target_size = (data_hyperparameters['resized_shape_height'], data_hyperparameters['resized_shape_width']),
                                                                color_mode = data_hyperparameters['color_mode'],
                                                                class_mode = None,
                                                                batch_size = data_hyperparameters['batch_size'],
                                                                shuffle = False,
                                                                seed = seed)
  idx = generator.filenames.index(name)
  
  for i in range(idx):
    batch = next(generator)
  
  batch = next(generator)
  image = batch
  #print("(Input) image shape:", image.shape)

  image_2_show = batch[0]/255
  fig = plt.figure(figsize=(6, 4))
  plt.imshow(np.double(image_2_show),cmap='gray')
  return image

def get_next_batch(generator, image_index=0): #function to take one batch from generator, to take an image we consider first element of the batch
  batch = next(generator)

  image = batch[0]
  target = batch[1]

  print("(Input) image shape:", image.shape)
  print("Target shape:",target.shape)


  # Visualize only the first sample
  image = image[image_index]
  target = target[image_index]
  target_idx = np.argmax(target)
  
  idx = (generator.batch_index -1) * generator.batch_size
  print(generator.filenames[idx : idx + generator.batch_size][image_index])
  
  print()
  print("Categorical label:", target)
  print("Label:", target_idx)
  print("Class name:", labels[target_idx])
  image_2_show = image[:,:,0]
  fig = plt.figure(figsize=(6, 4))
  plt.imshow(np.double(image_2_show),cmap='gray')

  return batch, image_2_show, target
'''
def get_next_image(generator):
  batch = next(generator)
  image = batch
  idx = generator.batch_index-1
  name = generator.filenames[idx]
  label = generator.labels[idx]
  #image_2_show = batch[0]/255
  #fig = plt.figure(figsize=(6, 4))
  #plt.imshow(np.double(image_2_show),cmap='gray')
  return image, name, label, idx
'''

def get_next_image(generator):
  #NO restituisce un immagine su tre canali, i pixel sono nel range 0-1
  im = next(generator)
  im = im[0]
  im = im[0,:,:,:]#/np.amax(im)
  return im

def visualize_images(batch):
  # generator.next() provides [images, labels] (with dimension batch_size)

  images = batch[0]
  dim = images.shape
  dim = dim[0]
  size,scale = 224, 10
  c=8
    
  for i in range(int(dim/c)):
    f, axs = plt.subplots(1,c, figsize=(20,20))
    axs.flat
    xs = images[i*c:i*c+c]
    for j in range(c):
      img = xs[j]
      axs[j].imshow(img/255)

In [6]:
#@title Invert images when needed
def get_bg_angle(bkgr, angle):
  for _, row in enumerate(angle):
    for _, pix in enumerate(row):
      if pix<255*0.2:
        bkgr.append(0)
      elif pix>255*0.8:
        bkgr.append(1)
  return bkgr

def is_inverted(im):
  #input is a 224x224 image on 3 channels (224,224,3)
  #pixels should range in 0-255
  top_left = im[0:5,0:5,0]
  top_right = im[0:5,-5:225,0]
  bottom_left = im[-5:225, 0:5,0]
  bottom_right = im[-5:225,-5:225,0]

  background = []
  for _,ang in enumerate([top_left, top_right, bottom_left, bottom_right]):
    background = get_bg_angle(background, ang)

  if sum(background) < 0.5*len(background):
    inv = 0
  else: 
    inv = 1
  return inv

def invert(im):
  if is_inverted(im):
    return 255-im
  else:
    return im

In [7]:
#@title Data parameters

data_hyperparameters = {
    'batch_size': 1,
    'resized_shape_height': 224,
    'resized_shape_width': 224,
    'color_mode': 'rgb'
}

n_channels = 1 if data_hyperparameters['color_mode']=='grayscale' else 3
input_shape = (data_hyperparameters['resized_shape_height'],data_hyperparameters['resized_shape_width'],n_channels)

In [9]:
test_dir = '/content/gdrive/MyDrive/HIDDEN_DATASET'
test_set = ImageDataGenerator(preprocessing_function=invert).flow_from_directory(directory = test_dir,
                                                                target_size = (data_hyperparameters['resized_shape_height'], data_hyperparameters['resized_shape_width']),
                                                                color_mode = data_hyperparameters['color_mode'],
                                                                class_mode = 'input',
                                                                batch_size = data_hyperparameters['batch_size'],
                                                                shuffle = False,
                                                                seed = seed)

Found 5144 images belonging to 1 classes.


In [11]:
import imageio
test_set = ImageDataGenerator(preprocessing_function=invert).flow_from_directory(directory = test_dir,
                                                                target_size = (data_hyperparameters['resized_shape_height'], data_hyperparameters['resized_shape_width']),
                                                                color_mode = data_hyperparameters['color_mode'],
                                                                class_mode = 'input',
                                                                batch_size = 1,
                                                                shuffle = False,
                                                                seed = seed)
#nb: creare una cartella per volta
clean_ds = '/content/gdrive/MyDrive/HIDDEN_DATASET/PULITE/'
noise_ds = '/content/gdrive/MyDrive/HIDDEN_DATASET/SPORCHE/'
os.mkdir(clean_ds)
os.mkdir(noise_ds)
os.mkdir(clean_ds+"all/")
os.mkdir(noise_ds+"all/")

for i, name in enumerate(test_set.filenames):
  
  imm = get_next_image(test_set)
  if  np.count_nonzero(imm>254)>500:
    new_name = noise_ds+name
    if np.count_nonzero(imm[80:144, 80:144]>254) < 30:
      new_name = clean_ds+name 
  else:
    new_name = clean_ds+name
  
  imageio.imwrite(new_name, imm)



Found 5144 images belonging to 1 classes.


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m


In [None]:
import imageio
test_set = ImageDataGenerator(preprocessing_function=invert).flow_from_directory(directory = test_dir,
                                                                target_size = (data_hyperparameters['resized_shape_height'], data_hyperparameters['resized_shape_width']),
                                                                color_mode = data_hyperparameters['color_mode'],
                                                                class_mode = 'input',
                                                                batch_size = 1,
                                                                shuffle = False,
                                                                seed = seed)

ds = '/content/gdrive/MyDrive/FINAL_NONOISESPLIT/TEST/'
os.mkdir('/content/gdrive/MyDrive/FINAL_NONOISESPLIT/')
os.mkdir(ds)
os.mkdir(ds+"all/")

for i, name in enumerate(test_set.filenames):
  
  imm = get_next_image(test_set)
  new_name = ds+name
  
  imageio.imwrite(new_name, imm)



Found 3093 images belonging to 1 classes.


