In [1]:
import sys
import os

from matplotlib import pyplot as plt
# from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.utils import to_categorical
import numpy as np
import cv2
import math
import sys
import logging
from io import StringIO 
import re
from arg_parse import *
from sklearn.model_selection import StratifiedShuffleSplit
import seaborn as sns
import contextlib
import datetime

logging.getLogger('matplotlib.font_manager').disabled = True

import seaborn as sns

sns.set_style('whitegrid')

In [2]:
sys.path.append(os.getcwd()+"/Sklearn-neat")

import neat
from neat import math_util
from neat.puissance import Puissance 

from neuro_evolution import NEATClassifier

In [3]:
now = datetime.datetime.now() # current date and time
time = now.strftime("%d.%m_%H.%M")

In [4]:
output_folder = "../outputs/output_"+time

In [5]:
fig_loc = "figures/"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    
os.chdir(output_folder)
    
print("Current working directory: {}".format(os.getcwd()))

Current working directory: /home/sram/radboud-naco-project/outputs/output_09.06_21.31


In [6]:
logfile = open('output.txt', 'w')

original_stderr = sys.stderr
original_stdout = sys.stdout

In [None]:
args.population_size = 10
args.fitness_limit = 0.75
args.generations = 8

In [None]:
class Tee(object):
    def __init__(self, *files):
        self.files = files
    def write(self, obj):
        for f in self.files:
            f.write(obj)
            f.flush() # If you want the output to be visible immediately
    def flush(self) :
        for f in self.files:
            f.flush()

In [None]:
root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)

In [None]:
# class Capturing(list):
#     def __enter__(self):
#         self._stdout = sys.stdout
#         sys.stdout = self._stringio = open('output.txt','wt')
#         return self
#     def __exit__(self, *args):
#         self.extend(self._stringio.getvalue().splitlines())
#         print(self)

#         del self._stringio    # free up some memory
#         sys.stdout = self._stdout

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

X = np.append(x_train,x_test).reshape(60000,32,32,3)
y = np.append(y_train,y_test).reshape(60000,1)

assert X.shape == (60000, 32, 32, 3)
assert y.shape == (60000, 1)

In [None]:
#Preprocess the data
X = X.astype('float32')
X /= 255

In [None]:
sss = StratifiedShuffleSplit(n_splits=5, 
                             train_size=args.train_size, 
                             test_size=args.test_size,
                             random_state=0)

for train_index, test_index in sss.split(X, y):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
print(X_train.shape)
print(y_train.shape)

print("---")

print(X_test.shape)
print(y_test.shape)

In [None]:
def find_metric_in_output(output, string):    
    result = re.findall(r"\b"+string+r" ([0-9]+\.[0-9]+)\b", str(output))
    
    result = np.array(result).astype('float')
        
    return result 

In [None]:
def find_puissance_in_output(output):    
    result = re.findall(r"\b(?<=Unique puissance values: {).*?(?=})\b", str(output))    
    return result 

In [None]:
X_train_fl = X_train.reshape((X_train.shape[0], -1))
X_test_fl = X_test.reshape((X_test.shape[0], -1))

try:
    puissance_config = Puissance()

    clf = NEATClassifier(number_of_generations=args.generations,
                         fitness_threshold=args.fitness_limit,
                         pop_size=args.population_size,
                         puissance_config = puissance_config)
    
    logging.info("Running NEAT puissance")
    
except:
    clf = NEATClassifier(number_of_generations=args.generations,
                         fitness_threshold=args.fitness_limit,
                         pop_size=args.population_size)
    
    logging.info("Running NEAT")

In [None]:
# for i in range(3):
#     logging.info("*** Running generation "+str(i)+" ***")

sys.stdout = Tee(sys.stdout, logfile)
sys.stderr = sys.stdout
    
neat_genome = clf.fit(X_train_fl, y_train.ravel())

sys.stdout = original_stdout
sys.stderr = original_stderr
logfile.close()
# sys.stdout.close()
        
#     curr_fitness = find_metric_in_output(output, "Fitness:")
#     curr_popavgfit = find_metric_in_output(output, "Population's average f3itness:")
#     curr_stdev = find_metric_in_output(output, "stdev:")
        
#     print("Fitness: {}".format(curr_fitness))
#     print("Population's average fitness: {}".format(curr_popavgfit))
#     print("Standard deviation: {}".format(curr_stdev))

#     print("---")
#     if curr_fitness > best_fitness:
#         best_fitness = curr_fitness
        
#     print("Best fitness: {}".format(best_fitness))
    
    
# #     print(output)
#     print(output)
    
#     results[i] = {
#         "fitness":curr_fitness,
#         "pop_avg_fitness":curr_popavgfit,
#         "stdev":curr_stdev   
#     }

#     print("---\n")

In [None]:
output = open("output.txt", "r").read()

In [None]:
gen_time = find_metric_in_output(output, "Generation time:")
cum_gen_time = np.array([])

for i in range(1,len(gen_time)+1):
    cum_gen_time = np.append(cum_gen_time, gen_time[:i].sum())
    
print(gen_time)
print(cum_gen_time)

In [None]:
results = {}

results['best_fitness'] = find_metric_in_output(output, "Best fitness:")
results['avg_adj_fitness'] = find_metric_in_output(output, "Average adjusted fitness:")
results['pop_avg_fitness'] = find_metric_in_output(output, "Population's average fitness:")
results['gen_time'] = gen_time
results['cum_gen_time'] = cum_gen_time
results['stdev'] = find_metric_in_output(output, "stdev:")
# metrics['puissance'] = puissance = find_puissance_in_output(output)

assert len(results['best_fitness']) == \
        len(results['avg_adj_fitness']) == \
        len(results['pop_avg_fitness']) == \
        len(results['gen_time']) == \
        len(results['stdev'])

In [None]:
print(results)

In [None]:
y_pred = neat_genome.predict(X_test_fl)
    
print(classification_report(y_test.ravel(), y_pred.ravel()))

In [None]:
# puissance_config = Puissance()

# clf = NEATClassifier(number_of_generations=3,
#                      fitness_threshold=0.5,
#                      pop_size=5,
#                      puissance_config = puissance_config)

In [None]:
# for i in range(args.generations):
#     logging.info("*** Running generation "+str(i)+" ***")
#     with Capturing() as output:
#         neat_genome = clf.fit(X_train_fl, y_train.ravel())
        
#     curr_fitness = find_metric_in_output(output, "Fitness:")
#     curr_popavgfit = find_metric_in_output(output, "Population's average fitness:")
#     curr_stdev = find_metric_in_output(output, "stdev:")
        
#     print("Fitness: {}".format(curr_fitness))
#     print("Population's average fitness: {}".format(curr_popavgfit))
#     print("Standard deviation: {}".format(curr_stdev))

#     print("---")
#     if curr_fitness > best_fitness:
#         best_fitness = curr_fitness
        
#     print("Best fitness: {}".format(best_fitness))
    
#     print(find_metric_in_output(output, "Generation time:"))
    
#     results[i] = {
#         "fitness":curr_fitness,
#         "pop_avg_fitness":curr_popavgfit,
#         "stdev":curr_stdev   
#     }

#     print("---\n")

In [None]:
np.linspace(1,args.generations,args.generations)

In [None]:
def plot_results(plots, xlabel, ylabel, fig_name):
    fig,ax = plt.subplots(figsize=(15,8))

    for i in range(0,len(plots)):
        plt.plot(plots[i]['x'],
                 plots[i]['y'],
                 label = plots[i]['label'])

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    plt.legend()

    plt.xticks(np.arange(0, args.generations, math.ceil(args.generations/100)))

    plt.savefig(fig_loc+fig_name)

    plt.plot()

In [None]:
plots = {}

plots[0] = {
    "x":np.linspace(0,args.generations,args.generations),
    "y":results['avg_adj_fitness'],
    "label":"Average adjusted fitness"
}

plots[1] = {
    "x":np.linspace(0,args.generations,args.generations),
    "y":results['pop_avg_fitness'],
    "label":"Population's average fitness"  
}

plots[2] = {
    "x":np.linspace(0,args.generations,args.generations),
    "y":results['best_fitness'],
    "label":"Best fitness"  
}

plot_results(plots, "Generation", "Fitness", "fitness.png")

In [None]:
plots = {}

plots[0] = {
    "x":np.linspace(0,args.generations,args.generations),
    "y":results['stdev'],
    "label":"Standard deviation"
}

plot_results(plots, "Generation", "Standard deviation", "stdev.png")

In [None]:
plots = {}

plots[0] = {
    "x":np.linspace(0,args.generations,args.generations),
    "y":results['gen_time'],
    "label":"Generation time"
}

plot_results(plots, "Generation", "Generation time", "gen_time.png")

In [None]:
plots = {}

plots[0] = {
    "x":np.linspace(0,args.generations,args.generations),
    "y":results['cum_gen_time'],
    "label":"Cumulative generation time (seconds)"
}

plot_results(plots, "Generation", "Cumulative generation time (seconds)", "cum_gen_time.png")