# Shuttle Statlog Data Set Classifier - Breeder Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import numpy as np
import copy
%matplotlib inline

In [None]:
import GeneticProgram as gp
from Statlog import load_statlog
from SamplingPolicies import *

### Import Data Set

In [None]:
# Data set already scaled
train_X, train_y, test_X, test_y, num_inputs, num_classes = load_statlog()

# Create scaler based only on the training set
scaler = MinMaxScaler()
scaler.fit(train_X)

# Scale datasets
train_X = scaler.transform(train_X)
test_X  = scaler.transform(test_X)

In [None]:
fu = []

In [None]:
def breederDisplayCallback(string):
    clear_output(wait=True)
    display(string)

In [None]:
template_program = gp.Program(
     max_initial_instructions = 128,
     num_registers            = 16,
     num_inputs               = num_inputs,
     mutation_rate            = 0.1,
     max_num_instructions     = 512,
     num_classes              = num_classes
)

In [None]:
original_gp_uniform_sampling = []

for i in range(5):
    res = gp.errorMinBreederSelection(p_size                = 100,
                                      p_gap                 =  70,
                                      tau                   = 200,
                                      template_program      = template_program,
                                      sampling_policy_class = UniformSamplingPolicy,
                                      max_num_generations   = 200,
                                      X                     = train_X,
                                      y                     = train_y,
                                      display_fun           = breederDisplayCallback)

    original_gp_uniform_sampling.append(copy.deepcopy(res))

In [None]:
training_accuracies = []
for result in original_gp_uniform_sampling:
    training_accuracies.append(result[0].accuracy(train_X, train_y))
training_accuracies = np.array(training_accuracies)
print(np.median(training_accuracies))

In [None]:
test_accuracies = []
for result in original_gp_uniform_sampling:
    test_accuracies.append(result[0].accuracy(test_X, test_y))
test_accuracies = np.array(test_accuracies)
print(np.median(test_accuracies))

In [None]:
train_det_rates = []
for result in original_gp_uniform_sampling:
    train_det_rates.append(result[0].detectionRate(train_X, train_y))
train_det_rates = np.array(train_det_rates)
print(np.median(train_det_rates))

In [None]:
test_det_rates = []
for result in original_gp_uniform_sampling:
    test_det_rates.append(result[0].detectionRate(test_X, test_y))
test_det_rates = np.array(test_det_rates)
print(np.median(test_det_rates))

In [None]:
original_gp_class_sampling = []

for i in range(5):
    res = gp.errorMinBreederSelection(p_size                = 100,
                                      p_gap                 =  70,
                                      tau                   = 200,
                                      template_program      = template_program,
                                      sampling_policy_class = ClassUniformSamplingPolicy,
                                      max_num_generations   = 200,
                                      X                     = train_X,
                                      y                     = train_y,
                                      display_fun           = breederDisplayCallback)

    original_gp_class_sampling.append(copy.deepcopy(res))

In [None]:
training_accuracies = []
for result in original_gp_class_sampling:
    training_accuracies.append(result[0].accuracy(train_X, train_y))
training_accuracies = np.array(training_accuracies)
print(np.median(training_accuracies))

In [None]:
test_accuracies = []
for result in original_gp_class_sampling:
    test_accuracies.append(result[0].accuracy(test_X, test_y))
test_accuracies = np.array(test_accuracies)
print(np.median(test_accuracies))

In [None]:
train_det_rates = []
for result in original_gp_class_sampling:
    train_det_rates.append(result[0].detectionRate(train_X, train_y))
train_det_rates = np.array(train_det_rates)
print(np.median(train_det_rates))

In [None]:
test_det_rates = []
for result in original_gp_class_sampling:
    test_det_rates.append(result[0].detectionRate(test_X, test_y))
test_det_rates = np.array(test_det_rates)
print(np.median(test_det_rates))

In [None]:
no_sharing_uniform_sampling = []

for i in range(5):
    res = gp.breederSelection(p_size                = 100,
                              p_gap                 =  70,
                              tau                   = 200,
                              template_program      = template_program,
                              sampling_policy_class = UniformSamplingPolicy,
                              max_num_generations   = 200,
                              X                     = train_X,
                              y                     = train_y,
                              display_fun           = breederDisplayCallback)

    no_sharing_uniform_sampling.append(copy.deepcopy(res))

In [None]:
fitness_unions = []
for result in no_sharing_uniform_sampling:
    fitness_unions.append(result[2]['fitness_union'])
fitness_unions = np.array(fitness_unions)
fig=plt.figure(figsize=(10, 8), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(np.median(fitness_unions, axis=0))

In [None]:
fu.append(np.median(fitness_unions, axis=0))

In [None]:
training_accuracies = []
for result in no_sharing_uniform_sampling:
    training_accuracies.append(result[0].accuracy(train_X, train_y))
training_accuracies = np.array(training_accuracies)
print(np.median(training_accuracies))

In [None]:
test_accuracies = []
for result in no_sharing_uniform_sampling:
    test_accuracies.append(result[0].accuracy(test_X, test_y))
test_accuracies = np.array(test_accuracies)
print(np.median(test_accuracies))

In [None]:
train_det_rates = []
for result in no_sharing_uniform_sampling:
    train_det_rates.append(result[0].detectionRate(train_X, train_y))
train_det_rates = np.array(train_det_rates)
print(np.median(train_det_rates))

In [None]:
test_det_rates = []
for result in no_sharing_uniform_sampling:
    test_det_rates.append(result[0].detectionRate(test_X, test_y))
test_det_rates = np.array(test_det_rates)
print(np.median(test_det_rates))

In [None]:
no_sharing_class_sampling = []

for i in range(5):
    res = gp.breederSelection(p_size                = 100,
                              p_gap                 =  70,
                              tau                   = 200,
                              template_program      = template_program,
                              sampling_policy_class = ClassUniformSamplingPolicy,
                              max_num_generations   = 200,
                              X                     = train_X,
                              y                     = train_y,
                              display_fun           = breederDisplayCallback)

    no_sharing_class_sampling.append(copy.deepcopy(res))

In [None]:
fitness_unions = []
for result in no_sharing_class_sampling:
    fitness_unions.append(result[2]['fitness_union'])
fitness_unions = np.array(fitness_unions)
fig=plt.figure(figsize=(10, 8), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(np.median(fitness_unions, axis=0))

In [None]:
fu.append(np.median(fitness_unions, axis=0))

In [None]:
training_accuracies = []
for result in no_sharing_class_sampling:
    training_accuracies.append(result[0].accuracy(train_X, train_y))
training_accuracies = np.array(training_accuracies)
print(np.median(training_accuracies))

In [None]:
test_accuracies = []
for result in no_sharing_class_sampling:
    test_accuracies.append(result[0].accuracy(test_X, test_y))
test_accuracies = np.array(test_accuracies)
print(np.median(test_accuracies))

In [None]:
train_det_rates = []
for result in no_sharing_class_sampling:
    train_det_rates.append(result[0].detectionRate(train_X, train_y))
train_det_rates = np.array(train_det_rates)
print(np.median(train_det_rates))

In [None]:
test_det_rates = []
for result in no_sharing_class_sampling:
    test_det_rates.append(result[0].detectionRate(test_X, test_y))
test_det_rates = np.array(test_det_rates)
print(np.median(test_det_rates))

In [None]:
fitness_sharing_uniform_sampling = []

for i in range(5):
    res = gp.fitnessSharingBreederSelection(p_size                = 100,
                                            p_gap                 =  70,
                                            tau                   = 200,
                                            template_program      = template_program,
                                            sampling_policy_class = UniformSamplingPolicy,
                                            max_num_generations   = 200,
                                            X                     = train_X,
                                            y                     = train_y,
                                            display_fun           = breederDisplayCallback)

    fitness_sharing_uniform_sampling.append(copy.deepcopy(res))

In [None]:
fitness_unions = []
for result in fitness_sharing_uniform_sampling:
    fitness_unions.append(result[2]['fitness_union'])
fitness_unions = np.array(fitness_unions)
fig=plt.figure(figsize=(10, 8), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(np.median(fitness_unions, axis=0))

In [None]:
fu.append(np.median(fitness_unions, axis=0))

In [None]:
training_accuracies = []
for result in fitness_sharing_uniform_sampling:
    training_accuracies.append(result[0].accuracy(train_X, train_y))
training_accuracies = np.array(training_accuracies)
print(np.median(training_accuracies))

In [None]:
test_accuracies = []
for result in fitness_sharing_uniform_sampling:
    test_accuracies.append(result[0].accuracy(test_X, test_y))
test_accuracies = np.array(test_accuracies)
print(np.median(test_accuracies))

In [None]:
train_det_rates = []
for result in fitness_sharing_uniform_sampling:
    train_det_rates.append(result[0].detectionRate(train_X, train_y))
train_det_rates = np.array(train_det_rates)
print(np.median(train_det_rates))

In [None]:
test_det_rates = []
for result in fitness_sharing_uniform_sampling:
    test_det_rates.append(result[0].detectionRate(test_X, test_y))
test_det_rates = np.array(test_det_rates)
print(np.median(test_det_rates))

In [None]:
fitness_sharing_class_sampling = []

for i in range(5):
    res = gp.fitnessSharingBreederSelection(p_size                = 100,
                                            p_gap                 =  70,
                                            tau                   = 200,
                                            template_program      = template_program,
                                            sampling_policy_class = ClassUniformSamplingPolicy,
                                            max_num_generations   = 200,
                                            X                     = train_X,
                                            y                     = train_y,
                                            display_fun           = breederDisplayCallback)

    fitness_sharing_class_sampling.append(copy.deepcopy(res))

In [None]:
fitness_unions = []
for result in fitness_sharing_class_sampling:
    fitness_unions.append(result[2]['fitness_union'])
fitness_unions = np.array(fitness_unions)
fig=plt.figure(figsize=(10, 8), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(np.median(fitness_unions, axis=0))

In [None]:
fu.append(np.median(fitness_unions, axis=0))

In [None]:
training_accuracies = []
for result in fitness_sharing_class_sampling:
    training_accuracies.append(result[0].accuracy(train_X, train_y))
training_accuracies = np.array(training_accuracies)
print(np.median(training_accuracies))

In [None]:
test_accuracies = []
for result in fitness_sharing_class_sampling:
    test_accuracies.append(result[0].accuracy(test_X, test_y))
test_accuracies = np.array(test_accuracies)
print(np.median(test_accuracies))

In [None]:
train_det_rates = []
for result in fitness_sharing_class_sampling:
    train_det_rates.append(result[0].detectionRate(train_X, train_y))
train_det_rates = np.array(train_det_rates)
print(np.median(train_det_rates))

In [None]:
test_det_rates = []
for result in fitness_sharing_class_sampling:
    test_det_rates.append(result[0].detectionRate(test_X, test_y))
test_det_rates = np.array(test_det_rates)
print(np.median(test_det_rates))

In [None]:
# Retain copy of results in case I do some tweaking
orig_fu = copy.deepcopy(fu)

In [None]:
fu = np.array(orig_fu)
index_skip = 0

# Create scaler based only on the training set
scaler = StandardScaler() #MinMaxScaler()

# Scale datasets
fig=plt.figure(figsize=(14, 12), dpi= 80, facecolor='w', edgecolor='k')
for f in fu:
    scaler.fit(f[index_skip:].reshape(-1, 1))
    f = scaler.transform(f[index_skip:].reshape(-1, 1))
    plt.plot(f)

plt.legend([
    'No Fitness Sharing, Uniform Sampling',
    'No Fitness Sharing, Class Sampling',
    'Fitness Sharing, Uniform Sampling',
    'Fitness Sharing, Class Sampling'
])
plt.show()