# Hypothyroid Data Set Classifier - Breeder Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
import GeneticProgram as gp
from Thyroid import load_thyroid
from SamplingPolicies import *

### Import Wisconsin Data Set

In [None]:
# Data set already scaled
train_X, train_y, test_X, test_y = load_thyroid()
num_inputs  = len(train_X[0])
num_classes = np.max(train_y) + 1
print(num_inputs)
print(num_classes)

In [None]:
def breederDisplayCallback(string):
    clear_output(wait=True)
    display(string)

In [None]:
template_program = gp.Program(
     max_initial_instructions = 64,
     num_registers            = 8,
     num_inputs               = num_inputs,
     mutation_rate            = 0.1,
     max_num_instructions     = 1024,
     num_classes              = num_classes
)

In [None]:
p = gp.breederSelection(p_size                = 150,
                        p_gap                 = 100,
                        tau                   = len(train_X),
                        template_program      = template_program,
                        sampling_policy_class = UniformSamplingPolicy,
                        halting_fitness       = 1000000,
                        max_num_generations   = 5,
                        X                     = train_X,
                        y                     = train_y,
                        display_fun           = breederDisplayCallback)

In [None]:
p[0].accuracy(train_X, train_y)

In [None]:
p[0].accuracy(test_X, test_y)

### Gap Percent of 30%

In [None]:
programs_30       = []
history_30        = []
best_accuracy_30  = 0.0
best_performer_30 = None
for i in range(3):
    p, h = gp.breederSelection(population_size     = 200,
                               template_program    = template_program,
                               halting_fitness     = 200.0,
                               max_num_generations = 200,
                               gap_percent         = 0.3,
                               X                   = train_X,
                               y                   = train_y,
                               display_fun         = breederDisplayCallback)
    
    train_acc = p.accuracy(train_X, train_y)
    test_acc  = p.accuracy(test_X, test_y)
    
    h['train_acc'] = train_acc
    h['test_acc']  = test_acc
    
    programs_30.append(p)
    history_30.append(h)
    
    if train_acc > best_accuracy_30 or best_performer_30 == None:
        best_accuracy_30  = train_acc
        best_performer_30 = i

In [None]:
history_30[best_performer_30]['train_acc']

In [None]:
history_30[best_performer_30]['test_acc']

In [None]:
for i in range(3):
    error = history_30[i]['error']
    plt.plot(error)
plt.xlabel("Generations")
plt.ylabel("Error (Unitless)")
plt.title("Wisconsin Breeder Gap Percent = 30%")
plt.show()

### Gap Percent of 70%

In [None]:
programs_70       = []
history_70        = []
best_accuracy_70  = 0.0
best_performer_70 = None
for i in range(3):
    p, h = gp.breederSelection(population_size     = 200,
                               template_program    = template_program,
                               halting_fitness     = 200.0,
                               max_num_generations = 200,
                               gap_percent         = 0.7,
                               X                   = train_X,
                               y                   = train_y,
                               display_fun         = breederDisplayCallback)
    
    train_acc = p.accuracy(train_X, train_y)
    test_acc  = p.accuracy(test_X, test_y)
    
    h['train_acc'] = train_acc
    h['test_acc']  = test_acc
    
    programs_70.append(p)
    history_70.append(h)
    
    if train_acc > best_accuracy_70 or best_performer_70 == None:
        best_accuracy_70  = train_acc
        best_performer_70 = i

In [None]:
history_70[best_performer_70]['train_acc']

In [None]:
history_70[best_performer_70]['test_acc']

In [None]:
for i in range(3):
    error = history_70[i]['error']
    plt.plot(error)
plt.xlabel("Generations")
plt.ylabel("Error (Unitless)")
plt.title("Wisconsin Breeder Gap Percent = 70%")
plt.show()

### Compare Gap Percent on Convergence

In [None]:
error_30 = history_30[best_performer_30]['error']
error_70 = history_70[best_performer_70]['error']

plt.plot(error_30)
plt.plot(error_70)
plt.xlabel("Generations")
plt.ylabel("Error (Unitless)")
plt.legend([
    'Gap = 30%',
    'Gap = 70%'
])
plt.title("Wisconsin Breeder Convergence Behaviour")
plt.show()