# **ASIRRA TensorFlow**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Deyht/AI_astro_ED_AAIF/blob/main/practical_works/CNN/classification/ASIRRA_TensorFlow.ipynb)

---


### **ASIRRA**

The ASIRRA (Animal Species Image Recognition for Restricting Access) is a dataset that was originally used for CAPTCHA and HIP (Human Interactive Proofs).

The original dataset comprises 25000 images of variable resolution (averaging around 350x500) and is equally distributed over the two classes "Cat" and "Dog". For this exercise, we provide two reduced versions in the form of padded and resized RGB images at either 128x128 or 256x256 as two binary files. This construction is necessary so the dataset can fit into the limited amount of Colab RAM. You can download one or both sets to test the impact of the input resolution on your network. In these files, the first 12500 images are of Cats, and the next 12500 are of Dogs. The last 1024 images of each class will be excluded to form our reference test dataset (total size 2048).

#### Downloading and visualizing the data

We start by downloading and visualizing the raw data. You can get one or both of the resized versions.

In [None]:
%%shell

cd /content

wget https://share.obspm.fr/s/6TBsCpAASeETH3S/download/asirra_bin_128.tar.gz
tar -xvzf asirra_bin_128.tar.gz

#wget https://share.obspm.fr/s/52nxyfn7PjzawSe/download/asirra_bin_256.tar.gz
#tar -xvzf asirra_bin_256.tar.gz

In [None]:
%cd /content/

import os
import matplotlib.pyplot as plt
import numpy as np

image_size = 128

v_width = 8; v_height = 5
nb_images = v_width*v_height

f_im_s = image_size*image_size*3

subset_cats = np.reshape(np.fromfile("asirra_bin_%d.dat"%(image_size),
  dtype="uint8", count=f_im_s*(nb_images//2)), (nb_images//2,image_size,image_size,3))

subset_dogs = np.reshape(np.fromfile("asirra_bin_%d.dat"%(image_size),
  dtype="uint8", count=f_im_s*(nb_images//2), offset=12500*f_im_s), (nb_images//2,image_size,image_size,3))

fig, ax = plt.subplots(v_height, v_width, figsize=(v_width*1.5,v_height*1.5), dpi=200, constrained_layout=True)

for i in range(0, v_width*v_height):
  c_x = i // v_width; c_y = i % v_width
  p_c = int((i)%2) #Alternate cats and dogs in display
  if(p_c == 0):
    ax[c_x,c_y].imshow(subset_cats[i//2])
  else:
    ax[c_x,c_y].imshow(subset_dogs[i//2])
  ax[c_x,c_y].axis('off')

plt.show()

#### Training a network

Edit the following cells to train a network architecture on the ASIRRA dataset

#### Training a network



In [None]:
%cd /content/

import numpy as np
from threading import Thread
import gc, os, sys, glob

from tensorflow import keras
from tensorflow.keras import layers
from sklearn import metrics
import tensorflow as tf



In [None]:
class_count = 12500
nb_class = 2

nb_keep_val = 1024

image_size_raw = 128
image_size = 128
#working image size can be lowered to increase computation speed

batch_size = 16
AUTOTUNE = tf.data.AUTOTUNE

raw_data_array = np.reshape(np.fromfile("asirra_bin_128.dat", dtype="uint8"), (class_count*2,image_size_raw,image_size_raw,3))

train_examples = np.append(raw_data_array[:class_count-nb_keep_val], raw_data_array[class_count:-nb_keep_val], axis=0)
test_examples = np.append(raw_data_array[class_count-nb_keep_val:class_count], raw_data_array[-nb_keep_val:], axis=0)

del(raw_data_array)
gc.collect()

train_labels = np.zeros((np.shape(train_examples)[0],nb_class))
test_labels = np.zeros((np.shape(test_examples)[0],nb_class))

train_labels[:class_count-nb_keep_val,0] = 1.0
train_labels[class_count-nb_keep_val:,1] = 1.0

test_labels[:nb_keep_val,0] = 1.0
test_labels[nb_keep_val:,1] = 1.0

#Alternate classes for better shuffle starting point
buf_train_examples = np.copy(train_examples)
buf_train_labels = np.copy(train_labels)

buf_train_examples[::2] = train_examples[:class_count-nb_keep_val]
buf_train_examples[1::2] = train_examples[class_count-nb_keep_val:]

buf_train_labels[::2] = train_labels[:class_count-nb_keep_val]
buf_train_labels[1::2] = train_labels[class_count-nb_keep_val:]

train_examples = buf_train_examples
train_labels = buf_train_labels

train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))

gc.collect()

resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(image_size, image_size),
  layers.Rescaling(1./255)
])

data_augmentation = tf.keras.Sequential([
  layers.RandomFlip('horizontal',
        input_shape=(image_size, image_size, 3)),
  layers.RandomRotation(factor=(-0.1, 0.1), fill_mode='constant'),
  layers.RandomZoom(height_factor=(-0.2,0.2), width_factor=(-0.2,0.2), fill_mode='constant'),
  layers.RandomContrast(0.2),
  layers.RandomBrightness(0.2, value_range=(0.0, 1.0))
])


def prepare(ds, shuffle=False, augment=False):
  ds = ds.map(lambda x, y: (resize_and_rescale(x), y),
              num_parallel_calls=AUTOTUNE)
  if shuffle:
    ds = ds.shuffle(1000)

  ds = ds.batch(batch_size)

  if augment:
    ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y),
                num_parallel_calls=AUTOTUNE)

  return ds.prefetch(buffer_size=AUTOTUNE)

gc.collect()

train_dataset = prepare(train_dataset, shuffle=True, augment=True)
test_dataset = prepare(test_dataset)

gc.collect()


In [None]:

total_iter = 10

load_iter = 0

if(load_iter > 0):
	model = models.models.load('%04d.keras'%(load_iter))
else:
	model = keras.Sequential()

	#Add your architecture here







model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])


for run_iter in range(load_iter,total_iter):

	model.fit(train_dataset, batch_size=batch_size, epochs=1, shuffle=True, validation_data=test_dataset)

	model.save('%04d.keras'%(run_iter+1))

	pred = model.predict(test_dataset)

	matrix = metrics.confusion_matrix(test_labels.argmax(axis=1), pred.argmax(axis=1))
	print (matrix)

#print(model.summary())

