In [None]:
!git clone https://github.com/EwaNikodemMasterThesis/AnomalyDetection.git

Cloning into 'UCSD'...
remote: Enumerating objects: 53469, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 53469 (delta 0), reused 4 (delta 0), pack-reused 53465[K
Receiving objects: 100% (53469/53469), 1.90 GiB | 27.77 MiB/s, done.
Resolving deltas: 100% (2686/2686), done.
Checking out files: 100% (54648/54648), done.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install tensorflow-addons

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.15.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 5.4 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.15.0


In [None]:
#ground truth
TestVideoFile = {}
TestVideoFile[1] = range(60,180)
TestVideoFile[2] = range(94,180)
TestVideoFile[3] = range(0,146)
TestVideoFile[4] = range(30,180)
TestVideoFile[5] = range(0,129)
TestVideoFile[6] = range(0,159)
TestVideoFile[7] = range(45,180)
TestVideoFile[8] = range(0,180)
TestVideoFile[9] = range(0,120)
TestVideoFile[10] = range(0,150)
TestVideoFile[11] = range(0,180)
TestVideoFile[12] = range(87,180)

In [None]:
import os
from os import listdir
from os.path import isfile, join, isdir


from PIL import Image
import numpy as np
import shelve
import keras
import tensorflow as tf 
import tensorflow_addons as tfa
from keras.layers import Conv2DTranspose, ConvLSTM2D, BatchNormalization, TimeDistributed, Conv2D, LayerNormalization, MaxPooling2D, UpSampling2D
from tensorflow_addons.layers import MaxUnpooling2D
from keras.models import Sequential, load_model
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import metrics


import shutil
import pathlib

#os.environ["CUDA_VISIBLE_DEVICES"]="-1"

In [None]:
#the paths need to be changed accordingly 
class Config:
  DATASET_PATH ="/content/UCSD/UCSD_Anomaly_Dataset.v1p2/UCSDped2/Train"
  DATASET_PATH_ALLIMAGES ="/content/UCSD/UCSD_Anomaly_Dataset.v1p2/UCSDped2/All_images"
  TEST_PATH ="/content/UCSD/UCSD_Anomaly_Dataset.v1p2/UCSDped2/Test"
  SINGLE_TEST_VIDEO_FILE = 3
  SINGLE_TEST_PATH = "/content/UCSD/UCSD_Anomaly_Dataset.v1p2/UCSDped2/Test/Test003"
  BATCH_SIZE = 32
  EPOCHS =  50  
  MODEL_PATH = "/content/drive/MyDrive/UCSD/ped2_model_v1.hdf5"
  MODEL_PATH_GEN = "/content/drive/MyDrive/UCSD/ped2_model_gen_v1.hdf5"
  TRAINING_SET_PATH = "/content/drive/MyDrive/UCSD/ped2_trainingset_v1.npy"
  THRESHOLD = 0.95

In [None]:
#Copying all frames into 1 catalog to later feed it to generator

pathlib.Path(Config.DATASET_PATH_ALLIMAGES).mkdir(parents=True, exist_ok=True)

for subdir, dirs, files in os.walk(Config.DATASET_PATH):
  for f in files:
    if pathlib.Path(f).suffix == ".tif":
      shutil.copy(join(Config.DATASET_PATH,subdir,f), join(Config.DATASET_PATH_ALLIMAGES, os.path.basename(subdir).lower()+"_"+f.lower()))

In [None]:
def get_clips_by_stride(stride, frames_list, sequence_size):
#pre-processing, reshaping, putting into volumes
    clips = []
    sz = len(frames_list)
    clip = np.zeros(shape=(sequence_size, 224, 224, 1))
    cnt = 0
    for start in range(0, stride):
        for i in range(start, sz, stride):
            clip[cnt, :, :, 0] = frames_list[i]
            cnt = cnt + 1
            if cnt == sequence_size:
                clips.append(np.copy(clip))
                cnt = 0
    return clips


def get_training_set(reload_training_set=True):

    if not reload_training_set:
        return np.load(Config.TRAINING_SET_PATH)
    
    clips = []
    
    for f in sorted(listdir(Config.DATASET_PATH)):
        if isdir(join(Config.DATASET_PATH, f)):
            all_frames = []
            
            for c in sorted(listdir(join(Config.DATASET_PATH, f))):
                if str(join(join(Config.DATASET_PATH, f), c))[-3:] == "tif":
                    img = Image.open(join(join(Config.DATASET_PATH, f), c)).convert('L').resize((224, 224))
                    
                    img = np.array(img, dtype=np.float32)
                    
                    img=(img-img.mean())/(img.std())
                    
                    img=np.clip(img,0,1)
                    all_frames.append(img)
            
            for stride in range(1, 3):
                clips.extend(get_clips_by_stride(stride=stride, frames_list=all_frames, sequence_size=10))
                
    
    np.save(Config.TRAINING_SET_PATH, clips)
    return clips
    
    

In [None]:
class DataGenerator(tf.keras.utils.Sequence):

    def __init__(self, frame_list, batch_size):
        self.frame_list = frame_list
        self.batch_size = batch_size

    def __len__(self):
      return (np.ceil(len(self.frame_list) / float(self.batch_size))).astype(np.int)

    def __getitem__(self, index):
      frame_list_temp = self.frame_list[index * self.batch_size : (index+1) * self.batch_size]

      # Set of X_train and y_train
      X, Y = self.__data_generation(frame_list_temp)

      return X, Y

    def __data_generation(self, frame_list_temp):
        frames = []
        for f in frame_list_temp:
          img = Image.open(join(Config.DATASET_PATH_ALLIMAGES, f)).convert('L').resize((224, 224))

          img = np.array(img, dtype=np.float32)
          
          img=(img-img.mean())/(img.std())

          
          img=np.clip(img,0,1)
          frames.append(img)
        

        augmentation = []
        
        for stride in range(1, 3):
            augmentation.extend(get_clips_by_stride(stride=stride, frames_list=frames, sequence_size=10))
                
        training_set = np.array(augmentation)
        training_set = training_set.reshape(-1,10,224,224,1)
        
        return training_set, training_set.copy()
        
             


def get_model_gen(reload_model=True):
    if not reload_model:
        return load_model(Config.MODEL_PATH_GEN,custom_objects={'LayerNormalization': LayerNormalization})
    
    
    frames = []
    # loop over the all images
    for f in sorted(listdir(Config.DATASET_PATH_ALLIMAGES)):
      frames.append(f)
            

    training_generator = DataGenerator(frames,Config.BATCH_SIZE)

    model = Sequential()
    
    

    #### Encoder ####
    model.add(TimeDistributed(Conv2D(512, (11, 11), strides=4, padding="valid"), batch_input_shape=(None, 10, 224, 224, 1)))  #512
    model.add(LayerNormalization())
    model.add(TimeDistributed(MaxPooling2D((2, 2), padding="valid")))#, strides = 1)))

    model.add(TimeDistributed(Conv2D(256, (5, 5), strides= 1, padding="same"))) #256
    model.add(LayerNormalization())  
    model.add(TimeDistributed(MaxPooling2D((2, 2), padding="valid")))

    model.add(TimeDistributed(Conv2D(128, (3, 3), padding="same")))  #128
    

    #### Decoder ####

    model.add(TimeDistributed(Conv2DTranspose(128, (3, 3), padding="same")))  #128
    model.add(LayerNormalization())
    
    model.add(TimeDistributed(UpSampling2D((2,2))))
    

    model.add(TimeDistributed(Conv2DTranspose(256, (3, 3), padding="valid")))  #256
    model.add(LayerNormalization())
    model.add(TimeDistributed(UpSampling2D((2,2))))

    model.add(TimeDistributed(Conv2DTranspose(512, (5, 5), padding="same")))  #512
    model.add(LayerNormalization())
    model.add(TimeDistributed(Conv2DTranspose(1, (11, 11), strides= 4, padding="same")))
    
    
    print(model.summary())

    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=1e-3)) #, decay=1e-5, epsilon=1e-6))
    
    
    model.fit_generator(generator=training_generator,
      epochs = Config.EPOCHS,
      verbose = 1,
      max_queue_size=Config.BATCH_SIZE,
      )
      

    
    model.save(Config.MODEL_PATH_GEN)
    return model
    
get_model_gen()

In [None]:
def get_single_test():
    sz = 0

    for subdir, dirs, files in os.walk(Config.SINGLE_TEST_PATH):
      for f in files:
        if pathlib.Path(f).suffix == ".tif":
          sz = sz + 1

    test = np.zeros(shape=(sz, 224, 224, 1))

    cnt = 0
    for f in sorted(listdir(Config.SINGLE_TEST_PATH)):
        if str(join(Config.SINGLE_TEST_PATH, f))[-3:] == "tif":
            img = Image.open(join(Config.SINGLE_TEST_PATH, f)).convert('L').resize((224, 224))
            img = np.array(img, dtype=np.float32)
            
            img=(img-img.mean())/(img.std())
            
            img=np.clip(img,0,1)
            test[cnt, :, :, 0] = img
            cnt = cnt + 1
    return test


In [None]:
def evaluate(reload_model=False):
    model = get_model_gen(reload_model)
    print("got model")
    test = get_single_test()
    print(test.shape)
    sz = test.shape[0] - 10 + 1
    sequences = np.zeros((sz, 10, 224, 224, 1))
    
    for i in range(0, sz):
        clip = np.zeros((10, 224, 224, 1))
        for j in range(0, 10):
            clip[j] = test[i + j, :, :, :]
        sequences[i] = clip

    print("got data")
    # reconstruction error; regularity score
    reconstructed_sequences = model.predict(sequences,batch_size=Config.BATCH_SIZE)
    sequences_reconstruction_cost = np.array([np.linalg.norm(np.subtract(sequences[i],reconstructed_sequences[i])) for i in range(0,sz)])
    sa = (sequences_reconstruction_cost - np.min(sequences_reconstruction_cost)) / np.max(sequences_reconstruction_cost)
    sr = 1.0 - sa

    # plot the regularity scores
    plt.plot(sr)
    plt.ylabel('regularity score Sr(t)')
    plt.xlabel('frame t')
    plt.show()

    return sr, sequences

In [None]:
pr, before_reconstuction = evaluate(reload_model=False)

In [None]:
def plotROC(pr):
  y_pred = pr

  sz = 0
  for subdir, dirs, files in os.walk(Config.SINGLE_TEST_PATH):
    for f in files:
      if pathlib.Path(f).suffix == ".tif":
        sz = sz + 1

  y_test = [1 for element in range(0, sz)]

  for i in TestVideoFile[Config.SINGLE_TEST_VIDEO_FILE]:
    y_test[i] = 0

  
  y_test = y_test[5:sz-4]

  if 1 not in y_test: 
    y_test[0] = 1

  fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred)
  fnr = 1 - tpr
  auc = metrics.roc_auc_score(y_test, y_pred)

  eer_threshold = thresholds[np.nanargmin(np.absolute((fnr - fpr)))]
  eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]

  optimal = np.argmax(tpr - fpr)
  optimal_threshold = thresholds[optimal]

  truePositive, falsePositive, falseNegative, trueNegative = 0,0,0,0 
      
  for ii in range(len(pr)):
      if pr[ii]<optimal_threshold and ii in TestVideoFile[Config.SINGLE_TEST_VIDEO_FILE]:
          truePositive +=1
      if pr[ii]<optimal_threshold and ii not in TestVideoFile[Config.SINGLE_TEST_VIDEO_FILE]:
          falsePositive += 1  
      if pr[ii]>optimal_threshold and ii in TestVideoFile[Config.SINGLE_TEST_VIDEO_FILE]:
          falseNegative +=1
      if pr[ii]>optimal_threshold and ii not in TestVideoFile[Config.SINGLE_TEST_VIDEO_FILE]:
          trueNegative += 1 


  #print("FPR: ", fpr)
  #print("TPR: ", tpr)
  #print("THRESHOLDS", thresholds)
  print("AUC: ", auc)
  print("EER: ", eer)
  print("EER THRESHOLD: ", eer_threshold)
  print("Optimal threshold value is:", optimal_threshold)

  print("TP:", truePositive, " FP:", falsePositive, " FN:", falseNegative, "TN: ", trueNegative)

  plt.title('Receiver Operating Characteristic')
  plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % auc)
  plt.legend(loc = 'lower right')
  plt.plot([0, 1], [0, 1],'r--')  
  plt.ylabel('True Positive Rate')
  plt.xlabel('False Positive Rate')
  plt.show()

  plt.plot(y_test)
  plt.title('Ground Truth')
  plt.ylabel('GT')
  plt.xlabel('Frame')
  plt.show()

  return auc, eer

plotROC(pr)


In [None]:
clips = []

for f in sorted(listdir(Config.TEST_PATH)):
    if isdir(join(Config.TEST_PATH, f)):
      if not 'gt' in f:
        clips.append(join(Config.TEST_PATH, f))


scores = []

for i in range(len(clips)):


  Config.SINGLE_TEST_PATH = clips[i]
  Config.SINGLE_TEST_VIDEO_FILE = i+1

  print("PATH: ", Config.SINGLE_TEST_PATH)
  print("GT: ", Config.SINGLE_TEST_VIDEO_FILE)

  pr, before_reconstuction = evaluate()
  scores.append(plotROC(pr))


mean = np.mean(scores, axis=0)
#print(scores)
print("AUC: ", mean[0])
print("EER: ", mean[1])