# Example FRI vs FRI

In [3]:
import h5py

import keras 
from keras.utils import to_categorical
import numpy as np
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf

from format_data import add_random
from format_data import append_random
from format_data import augment
from format_data import generate_labels_fri
from format_data import generate_labels_frii
from format_data import get_aniyan
from format_data import get_fr
from format_data import get_random
from format_data import initial_resizing
from format_data import resize_array
from models import HOGNet
from models import SklearnModel
from query import download_fr_components
from query import download_random

import sklearn

In [2]:
# Setting Seed
seed = 0

data_path = 'data/'
save_path = 'saved/'

# Setting Paths
fr_data_path = data_path + 'data.h5'
random_path = data_path + 'random1000.h5'
everything_path = data_path + 'everything.h5'

# Formatting Raw Data

Here, if the files do not already exist, we convert the output of fri-frii-download.ipynb as well as a h5py file of random sources into the required format, and dump them new h5py files.

In [None]:
if not Path(fr_data_path).is_file():
    download_fr_components(fr_data_path)

if not Path(random_path).is_file():
    download_random(random_path, n=1000)

if not Path(everything_path).is_file():
    add_random(fr_data_path, random_path, everything_path)

# Getting indices of training and testing data 

We use the get_fr function to get locations of the training and testing data from our FR data file.

Classes are split by split_ratio, with split_ratio x class going to the test set and (1 - split_ratio) x class going to train set. The training and testing indices are outputed.

In [None]:
train_i, test_i = get_fr(everything_path, split_ratio=(1/3), seed=seed)

# Constructing the data generator 
Using the augment_data construxt our keras.preprocessing.image.ImageDataGenerator object to apply randomly augmentations to our data

In [None]:
datagen = augment(rotation_range=180, zoom_range=0.2, shift_range=0.0, flip=True)

# Instantiating Sklearn Classifier
We use the SklearnModel class to construct our random forest classifer. Any other Sklearn Classifier can be used.

In [None]:
rft = SklearnModel(RandomForestClassifier, datagen=datagen, nb_augment=100, seed=seed)

# Training Classifer
We train our classifer with the fit method

In [None]:
with h5py.File(everything_path, 'r') as data:
    train_x = np.asarray(data['images'])[train_i]
    test_x = np.asarray(data['images'])[test_i]
    labels = np.asarray(data['labels'])
    images = np.asarray(data['images'])
    
    # Formatting images
    train_x = resize_array(train_x, dim=256)
    train_x = np.expand_dims(train_x, axis=3)
    train_x = normalize_pixels(train_x)
    test_x = resize_array(test_x, dim=256)
    test_x = np.expand_dims(test_x, axis=3)
    test_x = normalize_pixels(test_x)
    
    # Formatting labels
    train_y = np.where(labels[train_i]==1, False, True)
    test_y = np.where(labels[test_i]==1, False, True)

In [None]:
import matplotlib.pyplot as plt

for i in range(train_x.shape[0]):
    plt.imshow(train_x[i, :, :, 0]) #cmap="viridis"
    plt.show()
    print (train_y[i])

In [None]:
rft.fit(train_x, train_y)

In [None]:
rft.save(path=save_path + 'rft.pk')

# Saving Classifer to Pickle File
Using the save method

In [None]:
rft.load(path=save_path + "rft.pk")

In [None]:
rft.predict_proba(test_x)

In [None]:
rft.score(test_x, test_y)

In [None]:
rft.predict(test_x)

# Classifying Unseen Samples
First, we load our model with the load method (If the save method has been used on the same script, path is inferred). We test our classifier with the predict method.

In [None]:
rft = rft.load()

predictions = rft.predict_proba(test_x)
test_y = to_categorical(test_y, 2)

correct = test_y == predictions

# Instantiating HOGNet Model
We use the HOGNet class to construct our custom keras model.

In [None]:
hognet = HOGNet(datagen=datagen, batch_size=50, steps_per_epoch=1, max_epoch=1, patience=5, seed=seed)

# Training Classifer
We train our classifer with the fit method. Model stops training when loss stop decreasing for a set number of epochs (patience argument controls this) or when it reaches the maximum number of epochs. The amount of data augmented by datagen is dictated by the batch_size, and how many batches are generated per epoch is controlled by steps_per_epoch.

In [None]:
hognet.fit(train_x, train_y)

In [None]:
hognet.score(test_x, test_y)

In [None]:
hognet.predict_proba(test_x)

In [None]:
hognet.predict(test_x)

# Saving Model Weights to H5py file
Using the save method

In [None]:
hognet.save(path="data/hognet.h5")

# Classifying Unseen Samples
First, we load our weights into the model with the load method (If the save method has been used on the same script, path is inferred). We test our classifier with the predict method.

In [None]:
hognet.load()

predictions = hognet.predict_proba(test_x)

#correct = test_y == predictions
#print('Accuracy: {:.02%}'.format(correct.mean()))
print (predictions)