# Project 1

## 1: Demo specific functions

In [1]:
# Import from separate .py files
from ga import *
from p1 import *

In [2]:
# Set parameters

params_1 = {
        'indiv_len': 10,
        'pop_size': 8,              # Has to be even
        'num_parents':8,            # Has to be <= pop_size
        'p_m': 0.1,
        'p_c': 0.6,
        'max_sine_exp': 7,          # 2^7 -> [0,128]
        'max_gen': 10, 
        'sine_constraint': False
    }

In [3]:
# DEMO init_pop()

algorithm_1 = GA(params_1, fitness=sine_fitness)
pop = algorithm_1.init_pop()
print(pop)

['1100100100', '1001011011', '1000100110', '0101111001', '1000101001', '0100110110', '1001000101', '1110000101']


In [4]:
# DEMO select_parents()

parents = algorithm_1.select_parents(pop)
print('\nParents selcted to make offsprings:\n', parents)


Parents selcted to make offsprings:
 ['0100110110', '1100100100', '1001000101', '1000100110', '0101111001', '0101111001', '1000100110', '1000101001']


In [5]:
# DEMO make_offsprings()

dummy_parents = ['11111', '00000', '11111', '00000', '11111', '00000', '11111', '00000']

# Crossover
crossover_offsprings = algorithm_1.crossover(dummy_parents)
print('Crossover:\n', crossover_offsprings)

# Mutation
mutation_offsprings = algorithm_1.mutate(dummy_parents)
print('\nMutation:\n', mutation_offsprings)

# Make offsprings
offsprings = algorithm_1.make_offsprings(dummy_parents)
print('\n\nA new generation:\n', offsprings)

Crossover:
 ['11000', '00111', '10000', '01111', '10000', '01111', '11000', '00111']

Mutation:
 ['11111', '00001', '11111', '00000', '11101', '00000', '11101', '00000']


A new generation:
 ['11100', '00111', '10000', '01110', '10001', '00111', '11000', '01111']


## 2: Demo GA - Sine [0, 128]

In [6]:
# DEMO run(), the complete GA with sine fitness function

params_2 = {
        'indiv_len': 15,
        'pop_size': 20,              # Has to be even
        'num_parents':20,            # Has to be <= pop_size
        'p_m': 0.05,
        'p_c': 0.6,
        'max_sine_exp': 7,          # 2^7 -> [0,128]
        'max_gen': 100, 
        'sine_constraint': False
    }
algorithm_2 = GA(params_2, fitness=sine_fitness)
eval_log = algorithm_2.run()

Algorithm succsessfully executed


In [7]:
# Generational data: population, real value, fitness value
gen_print = 10

for generation, data in eval_log.items():
    if generation % gen_print == 0:
        print('Generation:', generation, '\n')
        print('Population:', data[2], '\n')
        print('Population real value:', data[0], '\n')
        print('Population sine fitness value:', ['{:.2f}'.format(item) for item in data[1]], '\n\n')
        

Generation: 0 

Population: ['010111011010111', '001001110101101', '010111001110111', '110110011110101', '000011000100100', '011010001010011', '000100100011100', '111001100110001', '010101010000010', '111011110101101', '011101100100111', '000110000101111', '011100011010101', '010110011001101', '110010010000110', '001101010100010', '000100111111001', '001010011011111', '000111110101101', '000011111001000'] 

Population real value: [ 46.83984375  19.67578125  46.46484375 108.95703125   6.140625
  52.32421875   9.109375   115.19140625  42.5078125  119.67578125
  59.15234375  12.18359375  56.83203125  44.80078125 100.5234375
  26.6328125    9.97265625  20.87109375  15.67578125   7.78125   ] 

Population sine fitness value: ['0.28', '0.74', '0.61', '0.84', '-0.14', '0.88', '0.31', '0.87', '-1.00', '0.29', '0.51', '-0.37', '0.28', '0.73', '-0.01', '1.00', '-0.52', '0.90', '0.03', '1.00'] 


Generation: 10 

Population: ['010110010011100', '010010011000110', '110110111000111', '10011001011011

In [8]:
# Plot the generations

%matplotlib notebook
from ipywidgets import *
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider, Button


x_sine = np.linspace(0, 128, 1000)
y_sine = np.sin(x_sine)

fig, axs = plt.subplots(figsize=(5,3))
plt.subplots_adjust(bottom=0.35)
plt.title("Population plot")
plt.xlabel("x")
plt.ylabel("sin(x)");
plt.xlim(-1, 129)
plt.ylim(-1.5, 1.5)
line, = axs.plot(x_sine, y_sine)

i = 1
x = eval_log[i][0]
y = eval_log[i][1]

dots = axs.scatter(x, y, marker='o', color='orange')

ax = plt.axes([0.25, 0.1, 0.55, 0.05])
generation = Slider(ax, label='Generation', valmin=0, valmax=params_2['max_gen'], valstep=1, valinit=i)

def update(val):
    gen = generation.val
    dots.set_offsets(np.c_[eval_log[gen][0], eval_log[gen][1]])

generation.on_changed(update)


    


<IPython.core.display.Javascript object>

0

## 3: Demo GA - Sine [0, 128] with [5, 10]-constraint

In [9]:
params_3 = {
        'indiv_len': 15,
        'pop_size': 20,              # Has to be even
        'num_parents':20,            # Has to be <= pop_size
        'p_m': 0.05,
        'p_c': 0.6,
        'max_sine_exp': 7,          # 2^7 -> [0,128]
        'max_gen': 100, 
        'sine_constraint': True     # THIS ONE IS TRUE NOW
    }

algorithm_3 = GA(params_3, fitness=sine_fitness)
eval_log_3 = algorithm_3.run()


Algorithm succsessfully executed


In [21]:
# Generational data: population, real value, fitness value
gen_print = 10

for generation, data in eval_log_3.items():
    if generation % gen_print == 0:
        print('Generation:', generation, '\n')
        print('Population:', data[2], '\n')
        print('Population real value:', data[0], '\n')
        print('Population sine fitness value:', ['{:.2f}'.format(item) for item in data[1]], '\n\n')

Generation: 0 

Population: ['100111000001011', '000111100011001', '010010100001101', '001000111101111', '100101111110010', '100110110101011', '000101100111110', '011010101000101', '100111001111100', '100111100010010', '111111010110011', '111001001111110', '110001001101110', '001000001110011', '100110010010101', '000101001111100', '100010000001000', '111110000001101', '101011011100111', '110111101100100'] 

Population real value: [ 78.04296875  15.09765625  37.05078125  17.93359375  75.9453125
  77.66796875  11.2421875   53.26953125  78.484375    79.0703125
 126.69921875 114.4921875   98.4296875   16.44921875  76.58203125
  10.484375    68.03125    124.05078125  86.90234375 111.390625  ] 

Population sine fitness value: ['-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25', '-1.25'] 


Generation: 10 

Population: ['100111010111000', '001111100111111', '000111100000100',

In [11]:
# Plot the generations

DATA = eval_log_3

x_sine = np.linspace(0, 128, 1000)
y_sine = np.sin(x_sine)

fig, axs = plt.subplots(figsize=(5,3))
plt.subplots_adjust(bottom=0.35)
plt.title("Population plot")
plt.xlabel("x")
plt.ylabel("sin(x)");
plt.xlim(-1, 129)
plt.ylim(-1.5, 1.5)
line, = axs.plot(x_sine, y_sine)

i = 1
x = DATA[i][0]
y = DATA[i][1]

dots = axs.scatter(x, y, marker='o', color='orange')

ax = plt.axes([0.25, 0.1, 0.55, 0.05])
generation = Slider(ax, label='Generation', valmin=0, valmax=params_3['max_gen'], valstep=1, valinit=i)

def update(val):
    gen = generation.val
    dots.set_offsets(np.c_[DATA[gen][0], DATA[gen][1]])

generation.on_changed(update)


<IPython.core.display.Javascript object>

0

## 4: Demo GA - Feature selection

In [16]:
# View data set

data_df = pd.read_csv('data/data.csv', index_col=[0])
values_df = pd.read_csv('data/values.csv', index_col=[0])
display(data_df, values_df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,8.0,1.0,0.19,0.33,0.02,0.90,0.12,0.17,0.34,0.47,...,0.12,0.42,0.50,0.51,0.64,0.12,0.26,0.20,0.32,0.20
1,53.0,1.0,0.00,0.16,0.12,0.74,0.45,0.07,0.26,0.59,...,0.21,0.50,0.34,0.60,0.52,0.02,0.12,0.45,0.00,0.67
2,24.0,1.0,0.00,0.42,0.49,0.56,0.17,0.04,0.39,0.47,...,0.14,0.49,0.54,0.67,0.56,0.01,0.21,0.02,0.00,0.43
3,34.0,1.0,0.04,0.77,1.00,0.08,0.12,0.10,0.51,0.50,...,0.19,0.30,0.73,0.64,0.65,0.02,0.39,0.28,0.00,0.12
4,42.0,1.0,0.01,0.55,0.02,0.95,0.09,0.05,0.38,0.38,...,0.11,0.72,0.64,0.61,0.53,0.04,0.09,0.02,0.00,0.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1988,28.0,10.0,0.02,0.41,1.00,0.21,0.02,0.01,0.54,0.57,...,0.01,0.75,0.57,0.74,0.71,0.03,0.17,0.02,0.00,0.19
1989,12.0,10.0,0.01,0.40,0.10,0.87,0.12,0.16,0.43,0.51,...,0.22,0.28,0.34,0.48,0.39,0.01,0.28,0.05,0.00,0.09
1990,6.0,10.0,0.05,0.96,0.46,0.28,0.83,0.32,0.69,0.86,...,0.53,0.25,0.17,0.10,0.00,0.02,0.37,0.20,0.00,0.45
1991,9.0,10.0,0.16,0.37,0.25,0.69,0.04,0.25,0.35,0.50,...,0.25,0.68,0.61,0.79,0.76,0.08,0.32,0.18,0.91,0.23


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,6.0,10.0,0.2,0.78,0.14,0.46,0.24,0.77,0.5,0.62,...,0.68,0.5,0.34,0.35,0.68,0.11,0.3,0.05,1.0,0.48


In [17]:
params_4 = {
        'indiv_len': 102,
        'pop_size': 4,              # Has to be even
        'num_parents':4,            # Has to be <= pop_size
        'p_m': 0.05,
        'p_c': 0.6,
        'max_sine_exp': 7,          # 2^7 -> [0,128]
        'max_gen': 5, 
        'sine_constraint': False
    }

algorithm_4 = GA(params_4, fitness=feature_fitness)

In [18]:
# Test regression class
import LinReg

test_pop = algorithm_4.init_pop()
#print(test_pop)

linreg = LinReg.LinReg()
#feats = values_df.to_numpy().shape[1]
#x = data_df.to_numpy().reshape(feats, data_df.to_numpy().shape[0])
#y = values_df.to_numpy().reshape(feats, 1)
#linreg.train(x, y)
errors = []
for indiv in test_pop:
    x = linreg.get_columns(data_df, indiv)
    y = linreg.get_columns(values_df, indiv)
    #print(x.shape, y.shape)
    feats = y.shape[1]
    x = x.reshape(feats, x.shape[0])
    y = y.reshape(feats, 1)
    error = linreg.get_fitness(x, y)
    errors.append(error)
    #print(error)
    

In [19]:
eval_log_4 = algorithm_4.run()

Algorithm succsessfully executed


In [22]:
# Generational data: population, real value, fitness value
gen_print = 10

for generation, data in eval_log_4.items():
    if generation % gen_print == 0:
        print('Generation:', generation, '\n')
        print('Population:', data[2], '\n')
        print('Population RMSE:', data[0], '\n')
        print('Population fitness value:', ['{:.2f}'.format(item) for item in data[1]], '\n\n')

Generation: 0 

Population: ['110001100110110001110101001101001100101011110111101100010000100101000111100000001101101000011111001010', '001011100010101101111101111000011100110110011101011111001110110001011010101000001011111110110001110000', '001001001110010110101110111000000111100100000001101011010111011111110100110101010010100110110010001111', '001111101000101011011110011111110100010111011011001110011111001110110100000110011001100100011001000110'] 

Population real value: [0.4821424081031518, 0.21291334588338004, 0.3027355313333732, 0.2945572633243046] 

Population sine fitness value: ['0.00', '0.27', '0.18', '0.19'] 


