In [35]:

from __future__ import print_function

import argparse
from argparse import RawTextHelpFormatter
import numpy as np
import os
import sys
import json
import pandas as pd
import re
import shutil

# plotting and other
import matplotlib.pyplot as plt
from matplotlib import gridspec, colors
import seaborn as sns
import cv2


In [36]:
# Function for JSON import
def json_importer(json_file):
    '''
    Func to import JSON file and extract additional data
    :param json_file: path to JSON
    :return: PAndas Data Frame
    '''
    with open(json_file, 'r') as json_in:
        df = pd.DataFrame(json.load(json_in))
    df['well'] = [re.search(r'Pt\d+_r\d\dc\d\d', x)[0] for x in df.image]
    df['row'] = [re.search(r'r\d\d', x)[0] for x in df.image]
    return df

In [37]:
json_file = "/home/aogorodnikov/aae_filter_Pt04/Pt04.json"
ae_loss = 0.0054373
adv_loss = 0.7413648
example = True
output = "/home/aogorodnikov/anomaly_links/"

In [38]:
np.random.seed(33)

# DataFrame of images per plate
df = json_importer(json_file)

# filter by the threshold

df_anomaly = df.loc[(df.ae_loss >= ae_loss) | (df.adv_loss >= adv_loss)]

In [40]:
df.shape

(5667997, 5)

In [6]:
print("Number of images post-threshold:{}".format(df_anomaly.shape[0]))

Number of images post-threshold:1530099


In [9]:
# if normal data needed:
if example:
    # select random rows and filter to keep only normal
    rand_row = np.random.choice(df.index, size=200, replace=False)
    rand_row = [x for x in rand_row if x not in df_anomaly.index]
    rand_row = np.random.choice(rand_row, size=100, replace=False)

    # keep 100 random normal images
    df = df.loc[rand_row,:]


else:
    del df


In [13]:
# CREATE OUTPUT DIRECTORIES
subdirs = df_anomaly.row.unique()
pt = os.path.basename(json_file).split('.')[0]
for i in subdirs:
    outPath = os.path.join(output, pt, i)
    if not os.path.exists(outPath):
        os.makedirs(outPath)


In [29]:

# CREATE SYMLINK OF ANOMALY IMAGES
filenames_ls = [os.path.basename(x) for x in df_anomaly.image]
source_path_ls = df_anomaly.image.to_list()
#dest_path_ls = [os.path.join(output, pt, df_anomaly.row.to_list()[x], filenames_ls[x]) for x in range(len(filenames_ls))]
#[os.symlink(source_path_ls[x], dest_path_ls[x]) for x in range(len(dest_path_ls))]



In [31]:
shit = df_anomaly.row.to_list()

In [34]:
%%time
[os.path.join(output, pt, shit[x], filenames_ls[x]) for x in range(1000)]

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 2.3 ms


['/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0021.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0023.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0026.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0029.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0031.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0035.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0038.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0040.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0042.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0048.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0049.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0050.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0051.tif',
 '/home/aogorodnikov/anomaly_links/Pt04/r01/Pt04_r01c01_f01_0053.tif',
 '/hom

In [None]:

# OPTIONAL export of 25 samples
## Anomaly
if argsP.example:
    for out_type in ['anomaly', 'normal']:

        fig = plt.figure(figsize=(8, 8))
        columns = 5
        rows = 5
        if out_type == "anomaly":
            source_data = df_anomaly.image
        else:
            source_data = df.image
        rand_select = np.random.choice(source_data, 25, replace=False)
        for k in range(1, columns * rows + 1):
            img = cv2.imread(rand_select[k-1], -1)

            fig.add_subplot(rows, columns, k)
            plt.imshow(img)
            plt.axis('off')

        plt.savefig(fname=os.path.join(argsP.output, pt, '{}_example.pdf'.format(out_type)))