In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import re
from commons import *
from datetime import datetime

In [None]:
raw_dataset_path = '/qarr/studia/magister/datasets/METU/query_reversed/'
output_path = '/home/zenfur/magister/metu_siamese_inputs'
#prepare_resized_dataset()

In [None]:
METU_RAW_PATH = '/qarr/studia/magister/datasets/METU/930k_logo_v3/'
METU_DATASET_PATH = '/home/zenfur/magister/resized_930k_logo/'
EVAL_ORIGIN_PATH = '/qarr/studia/magister/datasets/METU/query_reversed/'
EVAL_DATASET_PATH = '/home/zenfur/magister/metu_eval_256sq/'
TESTING = False


In [None]:
imagesList = tf.io.matching_files(METU_DATASET_PATH + "*.jpg")
print(f"Found {len(imagesList)} images in dataset")

@tf.function
def tf_read_image(path):
    img = tf.io.read_file(path)
    return tf.image.decode_jpeg(img, channels=3, dct_method='INTEGER_ACCURATE')


@tf.function
def tf_convert_and_normalize_img(img):
    c = tf.constant(256.0, dtype=tf.dtypes.float32)
    img = tf.cast(img, tf.dtypes.float32)
    #img = tf.math.subtract(img, c)
    return tf.math.divide(img, c)



pathsDB = tf.data.Dataset.from_tensor_slices(imagesList)
lenMetu = len(pathsDB)
SHARDS = 1
SHARD_IDX = 0
BATCH_SIZE = 1
SUPER_BATCH = 32
metuDB = ( pathsDB.shuffle(lenMetu//SHARDS + (lenMetu % SHARDS > SHARD_IDX), seed=1231231)
                .map(tf_read_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                .batch(BATCH_SIZE*SUPER_BATCH)
                .map(tf_convert_and_normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
                .prefetch(tf.data.experimental.AUTOTUNE)
                .unbatch()
                .batch(BATCH_SIZE)
         )
metuPairs = tf.data.Dataset.zip((pathsDB.shard(SHARDS, SHARD_IDX).shuffle(lenMetu//SHARDS + (lenMetu % SHARDS > SHARD_IDX), seed=1231231), metuDB))

# Exploring the dataset manually in search of decent examples

In [None]:
saved = []
dbIt = metuPairs.as_numpy_iterator()

In [None]:
if TESTING:
    fig, subs = plt.subplots(4,4, figsize=(10,10))
    names = []
    for i in range(4):
        for j in range(4):
            name, img = dbIt.next()
            names.append(name)
            print(i, j, i*4+j, name)
            subs[i][j].imshow(img[0])
            subs[i][j].set_title(str(i*4+j))

In [None]:
# Saving examples one by one
if TESTING:
    saved.append(names[2].decode('utf-8'))
    print(len(saved))

# Writing or loading the saved examples list, assuming N=64

In [None]:
if False:#TESTING:
    with open('chosen_samples.txt', "x") as f:
        f.write('\n'.join(saved))
else:
    saved = []
    with open('chosen_samples.txt', "r") as f:
        for line in f.readlines():
            saved.append(line)

### Printing the chosen logos in grid

In [None]:
if TESTING:
    fig, subs = plt.subplots(8,8,figsize=((8*2.5, 8*2.5)))
    for i in range(8):
        for j in range(8):
            img = tf.image.decode_jpeg(tf.io.read_file(saved[i*8+j].strip('\n')))#.imread(saved[i*8+j])
            subs[i][j].imshow(img)
            subs[i][j].axes.set_yticks([])
            subs[i][j].axes.set_xticks([])
    fig.tight_layout()

## Preparing origin filenames to resize and rename

In [None]:
if TESTING:
    target_paths = [METU_RAW_PATH + re.sub(r".*/", "", path).strip('\n') for path in saved]
    target_paths_2 = list(tf.io.matching_files(EVAL_ORIGIN_PATH+"*").numpy())
    target_paths_2 = [s.decode('utf-8') for s in target_paths_2]

    # Prepare dummy images
    prepare_resized_dataset(False, EVAL_DATASET_PATH, inputFilesList=target_paths, xScale=256, yScale=256, margin=2)
    # Rename dummy images as class 0
    for i, img_path in enumerate(target_paths):
        os.rename(EVAL_DATASET_PATH + os.path.basename(img_path), EVAL_DATASET_PATH + f"0-{i+1}.jpg")

    prepare_resized_dataset(None, EVAL_DATASET_PATH, inputFilesList=target_paths_2, xScale=256, yScale=256, margin=2)

In [None]:
img.shape

In [None]:
randomrot = tf.random.uniform([], minval=0, maxval=4, dtype=tf.int32)
plt.imshow(tf.image.rot90(img, k=randomrot)[0])
