# Imports

In [1]:
from architectures.lenet5 import LeNet5
from architectures.alexnet import AlexNet
from architectures.vgg16 import VGG16
from architectures.resnet50 import ResNet50
from Datasets import load_cifar10, load_cifar100, load_mnist, load_fashion_mnist
from sklearn.model_selection import KFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow.keras as keras
import pickle

# Load Datasets

In [2]:
datasets = {"cifar10": load_cifar10(),
            "cifar100": load_cifar100(),
            "mnist": load_mnist(),
            "fashion_mnist": load_fashion_mnist()
            }
            

# Load Architectures

In [3]:
architectures = {"LeNet5": LeNet5,
                 "AlexNet": AlexNet,
                 "VGG16": VGG16,
                 "ResNet50": ResNet50
                 }

# Set Hyperparameters

In [4]:
learning_rates = [(x ** 2) / 10000 if x != 0 else 0.0001 for x in range(0, 15, 3)]
print("Learning rates: " + str(learning_rates))
learning_rates=[0.0005]
hyperparameters = {"optimizers": [keras.optimizers.SGD, keras.optimizers.Adam],
                   "learning_rates": learning_rates,
                   "epochs": 20,
                   "batch_size": 16
                   }

Learning rates: [0.0001, 0.0009, 0.0036, 0.0081, 0.0144]


# Train Models

In [5]:
# Iterate through all datasets
for dataset_name, dataset in datasets.items():
    X_train, Y_train, X_test, Y_test = dataset
    
    classes = Y_train.shape[1]
    filters = X_train.shape[3]
    
    # For testing
    X_train, _, Y_train, _ = train_test_split(X_train, Y_train, train_size=1024, test_size=1)
    
    
    # Iterate through all architectures
    for architecture_name, architecture in architectures.items():
        print(architecture_name, dataset_name)
        best_model = [None, None, 0.0]
        
        # Prepare data for cross validation
        kf = KFold(
        n_splits=len(hyperparameters["optimizers"]) * len(hyperparameters["learning_rates"])) 
        kf.get_n_splits(X_train)

        i = 0 # Index for optimizers
        j = 0 # Index for learning rates
        
        # Iterate through all combinations of learning rates and optimizers
        for train_index, test_index in kf.split(X_train):
            optimizer = hyperparameters["optimizers"][i]
            learning_rate = hyperparameters["learning_rates"][j]

            # Build and compile model
            arch = architecture(classes = classes,
                                optimizer = optimizer(learning_rate=learning_rate),
                                filters = filters)
            model = arch.model

            # Resize images
            X_tr = architecture.resize_images(X_train[train_index])
            Y_tr = Y_train[train_index]
            X_vl = architecture.resize_images(X_train[test_index])
            Y_vl = Y_train[test_index]

            # Generate edited images
            datagen = ImageDataGenerator(width_shift_range=0.1,
                            height_shift_range=0.1,
                            zoom_range=0.2,
                            fill_mode='nearest',
                            horizontal_flip = True)
            datagen.fit(X_tr)
            
            # Fit model
            history = model.fit(datagen.flow(X_tr, Y_tr, batch_size=hyperparameters["batch_size"]),
                                  validation_data=(X_vl, Y_vl),
                                  epochs=hyperparameters["epochs"], steps_per_epoch=len(X_tr)/hyperparameters["batch_size"])
            
            val_accuracy = history.history['val_accuracy'][-1]
            
            # Check whether the new model is better
            if val_accuracy > best_model[2]:
                best_model = [model, history, val_accuracy]

            # Update indexes
            i = (i + 1) % len(hyperparameters["optimizers"])
            j = (j + 1) % len(hyperparameters["learning_rates"])
            
        # Evaluate best model
        _, test_accuracy = best_model[0].evaluate(architecture.resize_images(X_test), Y_test)
        
        # Save best model
        name = str("{:.2f}".format(test_accuracy) + "Acc_" +
                       architecture_name + "_" + dataset_name)
        
        best_model[0].save(name)

        hist_df = pd.DataFrame(best_model[1].history) 

        # save to json:  
        hist_json_file = name + '_history' + '.json' 
        with open(hist_json_file, mode='w') as f:
            hist_df.to_json(f)


VGG16 cifar10


2021-09-16 20:13:11.037667: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-16 20:13:15.627314: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-16 20:13:15.649118: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2199995000 Hz




2021-09-16 20:18:36.054796: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1505280000 exceeds 10% of free system memory.




KeyboardInterrupt: 

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=64d23a20-b5bc-41cc-930c-3bd7ede12b0c' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>