In [None]:
train_path = '/content/gdrive/MyDrive/deep_learning_project/numpy data'
initial_generator_path = '/content/gdrive/MyDrive/deep_learning_project/models/gan/'

In [None]:
import numpy as np
import os 
import matplotlib.pyplot as plt
import shutil
from PIL import Image 
import PIL
import cv2 as cv
from sklearn.metrics import classification_report,confusion_matrix,precision_score,recall_score,roc_curve,roc_curve,fbeta_score,auc
from imblearn.over_sampling import SMOTE
import random
import tensorflow as tf
from tensorflow.keras import models
from mlxtend.plotting import plot_confusion_matrix

def enhance_contrast(images,contrast=1.5):
#def enhance_contrast(images,contrast=2):
  samples_expanded = np.expand_dims(images, -1)
  #print(np.shape(samples_expanded))
  temp = []
  for elem in samples_expanded:
    temp.append(tf.image.adjust_contrast(elem, contrast))

  temp=np.squeeze(temp, axis=-1)
  samples_adjusted=np.array(temp)
  return samples_adjusted

def enhance_image(imgs):
  
  enhanced_images = []
  clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
  for img in imgs:
    enhanced_images.append(clahe.apply(img))
  return enhanced_images

def filter_dataset(dataset):
  filtered_images = []
  for img in dataset:
    info = np.iinfo(img.dtype) # Get the information of the incoming image type
    data = img.astype(np.float64) / info.max # normalize the data to 0 - 1
    data = 255 * data # Now scale by 255
    img = data.astype(np.uint8)
    ret, thresh1 = cv.threshold(img, 0, 255, cv.THRESH_BINARY+cv.THRESH_OTSU  )
    thresh1 = thresh1.astype(np.float64)
    thresh1 = thresh1 / 255
    thresh1 = thresh1 * 65536
    filtered_images.append(thresh1)
  return filtered_images

def gan_oversample(images,labels,traduction):

  traduction_path = ""

  if(traduction==0):
    traduction_path = "benign_malign/"
  else:
    traduction_path = "mass_calcification/"

  indexes_0 = []
  indexes_1 = []

  for i in range(len(images)):
    if(labels[i]==0):
      indexes_0.append(i)
    else:
      indexes_1.append(i)
  
  difference = (len(indexes_0)-len(indexes_1))

  adding_images = []
  adding_labels = []
  oversampling_label = 0
  
  if(difference>0):
    print("Oversampling on label 1")
    total_path = (initial_generator_path + traduction_path +'1/generator.h5')
    oversampling_label = 1
  else:
    print("Oversampling on label 0")
    total_path = (initial_generator_path + traduction_path + '0/generator.h5')

  generator = models.load_model(total_path)

  while(len(adding_images)<np.abs(difference)):
    noise= np.random.normal(loc=0, scale=1, size=[100, 100])
    generated_images = generator.predict(noise)
    generated_images = generated_images * 65536
    generated_images = generated_images.reshape(100,150,150)
    for img in generated_images:
      if(len(adding_images)<np.abs(difference)):
        adding_images.append(np.uint16(img))
    
  #print(np.shape(adding_images))
    
  if(oversampling_label==1):
    adding_labels = np.ones(len(adding_images))
  else:
    adding_labels = np.zeros(len(adding_images))
  
  concatenated_images = np.concatenate((images,adding_images),axis=0)
  concatenated_labels = np.concatenate((labels,adding_labels),axis=0)

  return concatenated_images,concatenated_labels

def in_depth_performance(testing_labels,prediction):

  #print("confusion matrix: \n", confusion_matrix(testing_labels,prediction))
  fig, ax = plot_confusion_matrix(conf_mat=confusion_matrix(testing_labels,prediction), figsize=(6, 6), cmap=plt.cm.Blues)
  plt.xlabel('Predicted label', fontsize=18)
  plt.ylabel('True label', fontsize=18)
  plt.title('Confusion Matrix', fontsize=18)
  classNames = ['0','1']
  tick_marks = np.arange(len(classNames))
  plt.xticks(tick_marks, classNames)
  plt.yticks(tick_marks, classNames)  
  plt.show()
  print("precision score: ", precision_score(testing_labels,prediction))
  print("recall score: ", recall_score(testing_labels,prediction))
  print("f2: ", fbeta_score(testing_labels, prediction,2))
  print("f0.5: ", fbeta_score(testing_labels, prediction,0.5))
  print("classification report: \n", classification_report(testing_labels,prediction))
  fpr, tpr, _ = roc_curve(testing_labels,  prediction)
  print("AUC: ",auc(fpr,tpr))
  plt.plot(fpr,tpr)
  plt.ylabel('True Positive Rate')
  plt.xlabel('False Positive Rate')
  plt.title('Roc curve')
  plt.show()

def labels_distribution(labels,title):

  print("------------------------------------------------")
  print(title)
  zero_count = np.count_nonzero( labels == 0)
  one_count = np.count_nonzero( labels == 1)
  two_count = np.count_nonzero( labels == 2)
  three_count = np.count_nonzero( labels == 3)
  four_count = np.count_nonzero( labels == 4)
  y = (zero_count,one_count,two_count,three_count,four_count)
  x = (0,1,2,3,4)
  plt.bar(x,y,align='center') # A bar chart
  plt.xlabel('Class')
  plt.ylabel('Occurance')
  for i in range(len(y)):
    plt.vlines(x[i],0,y[i]) # Here you are drawing the horizontal lines
  plt.show()

def label_traduction(labels,type):
  traducted_labels = []
  if(type==0):
    traducted_labels =  np.array(labels%2, dtype='int')
  else:
    traducted_labels =  np.array(labels/3, dtype='int')
  return traducted_labels

def load_testing():

  images = np.load(os.path.join(train_path,'public_test_tensor.npy'))
  labels = np.load(os.path.join(train_path,'public_test_labels.npy'))

  images = images[1::2]
  labels = labels[1::2]

  return images,labels

def load_training():

  images = np.load(os.path.join(train_path,'train_tensor.npy'))
  labels = np.load(os.path.join(train_path,'train_labels.npy'))

  images = images[1::2]
  labels = labels[1::2]

  return images,labels

def oversample(images,labels):

  indexes_0 = []
  indexes_1 = []

  for i in range(len(images)):
    if(labels[i]==0):
      indexes_0.append(i)
    else:
      indexes_1.append(i)
  
  difference = (len(indexes_0)-len(indexes_1))

  adding_images = []
  adding_labels = []
  
  if(difference>0):
    print("Oversampling on label 1")
    random_indexes = np.random.choice(indexes_1,difference,replace=True)
    for index in random_indexes:
      adding_images.append(images[index])
      adding_labels.append(labels[index])
  else:
    print("Oversampling on label 0")
    random_indexes = np.random.choice(indexes_0,abs(difference),replace=True)
    for index in random_indexes:
      adding_images.append(images[index])
      adding_labels.append(labels[index])
  
  concatenated_images = np.concatenate((images,adding_images),axis=0)
  concatenated_labels = np.concatenate((labels,adding_labels),axis=0)

  return concatenated_images,concatenated_labels


def save_dataset(path, dataset_type, images, labels):
  print("------------------------------------------------")
  print("Creating folders...")
  num_labels = len( np.unique(labels) )
  
  try:
    shutil.rmtree(path + dataset_type)
  except OSError as e:
    print("Warn: %s - %s." % (e.filename, e.strerror))

  os.mkdir(path + dataset_type )

  for i in range( num_labels ): 
    os.mkdir( path + dataset_type + "/" + str(i) )
  print("Saving images...")
  for i in range( len(images) ):
    # creating a image object (main image) 
    im1 = Image.fromarray( images[i] )
    im1 = enhance_image(im1)
    im1 = im1.save( path + dataset_type + "/" + str(labels[i]) + "/" + str(i) + ".png")
  print("DONE!")
  print("------------------------------------------------")

def shuffle_data(images,labels):

  indexes = list(range(len(images)))
  random.shuffle(indexes)

  reordered_images = images
  reordered_labels = labels

  for i in indexes:

    reordered_images[i] = images[i]
    reordered_labels[i] = labels[i]
  
  return reordered_images,reordered_labels

def smote_oversample(images,labels):
  reshaped_data = images.reshape(-1,images.shape[0])
  oversampled_images,oversampled_labels = SMOTE().fit_resample(images,labels)
  return oversampled_images.reshape(np.roll(oversampled_images.shape,0)),oversampled_labels

def visualize_image(data,title):
 
  plt.imshow(data,cmap='gray')
  plt.title(title)
  plt.axis("off")
  plt.show()

