# Load libraries

In [1]:
!git clone https://github.com/koshian2/OctConv-TFKeras
!mv OctConv-TFKeras/*.py ./

Cloning into 'OctConv-TFKeras'...
remote: Enumerating objects: 46, done.[K
remote: Counting objects: 100% (46/46), done.[K
remote: Compressing objects: 100% (45/45), done.[K
remote: Total 46 (delta 20), reused 3 (delta 0), pack-reused 0[K
Unpacking objects: 100% (46/46), done.


# Train OctConv Wide ResNet
* alpha = 0 -> normal wide res-net
* alpha > 0 -> OctConv wide res-net

It takes about 2 hour for training.

In [7]:
import tensorflow as tf
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, History
from tensorflow.contrib.tpu.python.tpu import keras_support
from models import *

from keras.datasets import cifar10
from keras.utils import to_categorical
import pickle, os, time

def lr_scheduler(epoch):
    x = 0.1
    if epoch >= 100: x /= 5.0
    if epoch >= 150: x /= 5.0
    if epoch >= 200: x /= 5.0
    return x

def train(alpha):
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    train_gen = ImageDataGenerator(rescale=1.0/255, horizontal_flip=True, 
                                    width_shift_range=4.0/32.0, height_shift_range=4.0/32.0)
    test_gen = ImageDataGenerator(rescale=1.0/255)
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    
    tf.logging.set_verbosity(tf.logging.FATAL)

    if alpha <= 0:
        model = create_normal_wide_resnet()
    else:
        model = create_octconv_wide_resnet(alpha)
    model.compile(SGD(0.1, momentum=0.9), "categorical_crossentropy", ["acc"])
    model.summary()

    # include the epoch in the file name. (uses `str.format`)
    checkpoint_path = "training/cp-{epoch:04d}.h5"
    checkpoint_dir = os.path.dirname(checkpoint_path)

    cp_callback = tf.keras.callbacks.ModelCheckpoint(
       checkpoint_path, verbose=1, save_weights_only=True,
       # Save weights, every 5-epochs.
       period=50)
    
    # convert to tpu model
    tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
    strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)
    model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)

    batch_size = 128
    scheduler = LearningRateScheduler(lr_scheduler)
    hist = History()
   
    start_time = time.time()
    model.fit_generator(train_gen.flow(X_train, y_train, batch_size, shuffle=True),
                        steps_per_epoch=X_train.shape[0]//batch_size,
                        validation_data=test_gen.flow(X_test, y_test, batch_size, shuffle=False),
                        validation_steps=X_test.shape[0]//batch_size,
                        callbacks=[scheduler, hist, cp_callback], max_queue_size=5, epochs=200)
    elapsed = time.time() - start_time
    
    model.save_weights(checkpoint_path, overwrite=True)    
    
    print(elapsed)

    history = hist.history
    history["elapsed"] = elapsed

    with open(f"octconv_alpha_{alpha}.pkl", "wb") as fp:
        pickle.dump(history, fp)

if __name__ == "__main__":
    train(0.25)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
average_pooling2d_30 (AveragePo (None, 16, 16, 3)    0           input_6[0][0]                    
__________________________________________________________________________________________________
oct_conv2d_105 (OctConv2D)      [(None, 32, 32, 120) 8640        input_6[0][0]                    
                                                                 average_pooling2d_30[0][0]       
__________________________________________________________________________________________________
batch_normalization_v1_215 (Bat (None, 32, 32, 120)  480         oct_conv2d_105[0][0]             
__________

# Check Test Accuracy

In [8]:
with open("octconv_alpha_0.25.pkl", "rb") as fp:
    data = pickle.load(fp)
    print(f"Max test accuracy = {max(data['val_acc']):.04}")

Max test accuracy = 0.9467


In [10]:
!ls training\

cp-0050.h5  cp-0100.h5	cp-0150.h5  cp-0200.h5	cp-{epoch:04d}.h5


In [0]:
import matplotlib.pyplot as plt
from models import *
import numpy as np
import time
from tqdm import tqdm

(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_test = y_test[:1000]
X_test = X_test[:1000].reshape(-1, 32 * 32) / 255.0

# Restore the weights
model = create_octconv_wide_resnet(0.25)
model.load_weights('training/cp-0200.h5')

def eval_all():
    #loss, acc = model.evaluate(X_test, y_test)
    #print("Restored model, accuracy: {:5.2f}%".format(100*acc))
    #print("Restored model, accuracy: {:5.2f}%".format(100*acc))
    #X = np.random.rand(50000, 32, 32, 3).astype(np.float32)
    #for alpha in [0, 0.25, 0.5, 0.75]:
    #    if alpha == 0:
    #        model = create_normal_wide_resnet()
    #    else:
    #        model = create_octconv_wide_resnet(alpha)
    #results = []
    #for i in tqdm(range(20)):
    #st = time.time()
    Y_pred = model.predict(X_test, batch_size=128)
    plt.scatter(X_test, Y_test)
    plt.plot(X_test, Y_pred)
    plt.show()
    print('end')
    #results.append(time.time()-st)
    #results = np.array(results)
    #print("alpha = ", alpha)
    #print(f"Mean = {np.mean(results):.04}, Median = {np.median(results):.04}"+
    #          f", SD = {np.std(results):.04}")

if __name__ == "__main__":
    eval_all()