## Imports

In [None]:
import math 
import pandas as pd
import random 

import numpy as np
from copy import deepcopy
from sklearn.model_selection import train_test_split

## Genetic Programming (regression)

In [None]:
class Function():
    
    def __init__(self, function, arg_count, name):
        self.arg_count = arg_count
        self.function = function
        self.name = name
               
    def __call__(self, *params): 
        args = ', '.join([str(x) for x in params])
        return eval('self.function({})'.format(args))
    
    @classmethod
    def get_classname(cls):
        return cls.__name__
    
    def __str__(self):
        return self.name


In [None]:
class Literal(): 
    
    def __init__(self, key): 
        self.key = key
        
    def __call__(self, context):
        return context[self.key]
    
    @classmethod
    def get_classname(cls):
        return cls.__name__
    
    def __str__(self): 
        return self.key

## Functions

In [None]:
addition = Function(lambda x, y: x + y, 2, '+')
subtraction = Function(lambda x, y: x - y, 2, '-')
multiplication = Function(lambda x, y: x * y, 2, '*')
division = Function(lambda x, y: 1 if y == 0 else x / y, 2, '/')
sin = Function(math.sin, 1, 'sin')
cos = Function(math.cos, 1, 'cos')

In [None]:
class Node(): 
    
    def __init__(self, value): 
        self.__value = value
        self.left = None
        self.right = None
    
    def content(self):
        return str(self.__value)
    
    def value(self, context):
        
        if (isinstance(self.__value, int)): 
            return self.__value
        
        if (isinstance(self.__value, float)):
            return self.__value
        
        if (self.__value.get_classname() == 'Function'): 
            if (self.__value.arg_count == 1): 
                if (self.left is not None): 
                    return self.__value(self.left.value(context))
                
            if (self.__value.arg_count == 2):
                if (sel f.left is not None and self.right is not None): 
                    return self.__value(self.left.value(context), self.right.value(context))
                
        if (self.__value.get_classname() == 'Literal'):
            return self.__value(context)
            
        return None
    
    def __str__(self):
        return str(self.content)

In [None]:
class Chromozome():
    
    def __init__(self, root):
        self.root = root
        self.__fitness = None
        
    def __lt__(self, other): 
        return self.__fitness < other.__fitness 
    
    def __gt__(self, other):
        return self.__fitness > other.__fitness
    
    def mutate(self, literals, functions, depth):
        self.choose_random_node().left = self.__generate_random_tree(literals, functions, depth)
        return self
    
    def value(self, context): 
        return self.root.value(context)
    
    def crossover(self, other):
        offspring1 = deepcopy(self)
        offspring2 = deepcopy(other)
        
        a = offspring1.choose_random_node()
        b = offspring2.choose_random_node()
        a.left, b.left = b.left, a.left
        return offspring1, offspring2

    
    def choose_random_node(self): 
        return random.choice(self.__get_nodes())

    def __get_nodes(self): 
        nodes = []
        aux = [self.root] 
        
        while len(aux) != 0: 
            x = aux.pop(0)
            if x.left is not None: 
                aux.append(x.left)
                nodes.append(x.left)
            if x.right is not None:
                aux.append(x.right)
                nodes.append(x.right)
        nodes = [ node for node in nodes if node.left is not None ]
        return nodes
                
    def fitness(self, train_data): 
        if self.__fitness is not None:
            return self.__fitness
        
        error = 0
        for i in range(len(train_data)):
            expected_label = list(train_data)[-1]
            row = train_data.loc[train_data.index[i]] 
            expected = train_data.loc[train_data.index[i]][expected_label]
            error += abs(expected - self.value(row))
        
        self.__fitness = error
        return error
    
    def __generate_random_tree(self, literals, functions, depth): 
        if depth == 0: 
            return None 
        
        if depth == 1: 
            return Node(random.choice(literals + [ x for x in range(1, 10) ]))
            
        root = Node(random.choice(functions))
        root.left =  self.__generate_random_tree(literals, functions, depth - 1)
        root.right = self.__generate_random_tree(literals, functions, depth - 1)
        
        return root
    
    def print_tree(self):
        self.__print_tree2(self.root)
        
    def __print_tree2(self, root, indent=0):
        if root is None: 
            return 

        print(indent * '   ' + root.content())
        self.__print_tree2(root.left, indent + 1 )
        self.__print_tree2(root.right, indent + 1)

    

In [None]:
class Population():
    
    def __init__(self, size, depth, functions, literals):
        self.depth = depth
        self.functions = functions 
        self.literals = literals
        self.size = size
        self.individs = []
        self.__generate_population(depth)
        
    def best_chromozome(self, train_data):
        for x in self.individs: 
            x.fitness(train_data)
        return min(self.individs)
    
    def selection(self, train_data, cols):
        new_generation = [] 
        self.individs = sorted(self.individs, key=lambda x: x.fitness(train_data))
        
        #the top 20% remain
        last_top = int(0.2 * len(self.individs))
        new_generation += [self.individs[i] for i in range(last_top)]
        
        #breed the new generation until population reaches previous generation
        while(len(new_generation) < len(self.individs)):
          
            individ1 = random.choice(new_generation)
            individ2 = random.choice(new_generation)
            offsprings = individ1.crossover(individ2)
            new_generation.append(offsprings[0].mutate(self.literals, self.functions, 3))
            new_generation.append(offsprings[1].mutate(self.literals, self.functions, 3))
        
        
        #move on to the next generation 
        self.individs = new_generation
        
    def __generate_population(self, depth):
        for _ in range(self.size):
            root = self.__generate_random_tree(depth)
            self.individs.append(Chromozome(root))
            
    def __generate_random_tree(self, depth): 
        if depth == 0: 
            return None 
        
        if depth == 1: 
            return Node(random.choice(self.literals + [ x for x in range(1, 10) ]))
            
        root = Node(random.choice(self.functions))
        root.left =  self.__generate_random_tree(depth - 1)
        root.right = self.__generate_random_tree(depth - 1)
        
        return root
        
    def size(self): 
        return self.individs

In [None]:
class Application():
    
    def __init__(self):
        self.generations_count = 5
        self.generation_size = 30
        self.chromozome_depth = 5
        self.functions = [
            addition, 
            subtraction, 
            multiplication, 
            division, 
            sin,
            cos
        ]
        
        self.data = pd.read_csv("data.txt")[:30]
        self.columns =  list(self.data)
        self.literals = [ Literal(x) for x in self.columns[:-1] ] 
        self.expected = self.columns[-1]
        self.train_data = []
        self.test_data = [] 
#         self.normalize_data()
        self.split_data()
        self.population = Population(
            self.generation_size, 
            self.chromozome_depth, 
            self.functions, 
            self.literals
        )
        self.solution = None
    
    def train(self):
        for _ in range(self.generations_count):
            for x in self.population.individs: 
                print(x.fitness(self.train_data))
                
            self.population.selection(self.train_data, self.columns)
            
            print()
            if self.solution is None: 
                self.solution = self.population.best_chromozome(self.train_data)
            else: 
                self.solution.fitness(self.train_data)
                self.solution = min(self.solution, self.population.best_chromozome(self.train_data))
                
            print(self.solution.fitness(self.train_data))
        
        
    def predict(self, input_values):
        self.solution
    
    def normalize_data(self):
        for c in self.columns:
            column = [float(item) for item in self.data[c]]
            minimum = min(column)
            maximum = max(column)
            for index in range(self.data.shape[0]):
                old_value = float(self.data.iloc[index][c])
                if (maximum == minimum):
                    new_value = 0.5
                else:
                    new_value = (old_value - minimum) / (maximum - minimum)
                self.data.ix[index, c] = new_value
                
    def split_data(self):
        self.train_data, self.test_data = train_test_split(self.data, test_size = 0.25, random_state=42)
         

In [None]:
a = Application()
a.train()


In [None]:
a = Application()
b = a.population.individs[0]
c = a.population.individs[1]

# b.print_tree()
# print('_' * 100)
# c.print_tree()

d,e = b.crossover(c)

# print('=' * 100)
# d.print_tree()
# print('_' * 100)
# e.print_tree()

print(b.fitness(a.data))
print(c.fitness(a.data))
print(d.fitness(a.data))
print(e.fitness(a.data))

print()

print(b.mutate(a.literals, a.functions, 3).fitness(a.data))
print(c.mutate(a.literals, a.functions, 3).fitness(a.data))
print(d.mutate(a.literals, a.functions, 3).fitness(a.data))
print(e.mutate(a.literals, a.functions, 3).fitness(a.data))

h = max(b, c, d, e)
g = min([b, c, d, e])

print()
print('max {}'.format(h.fitness(a.data)))
print('min {}'.format(g.fitness(a.data)))
# print(a.population.individs[0].print_tree())
# print(a.population.individs[0].value(a.data.loc[0]))
# print(a.population.individs[0].fitness(a.data))

In [None]:
for row in range(0, len(a.data)):
    print(a.data.loc[row])
    break

In [None]:
pd.read_csv("data.txt")[1:30]