## Imports

In [1]:
import math 
import pandas as pd
import random 

import numpy as np
from copy import deepcopy
from sklearn.model_selection import train_test_split

## Genetic Programming (regression)

In [2]:
class Function():
    
    def __init__(self, function, arg_count, name):
        self.arg_count = arg_count
        self.function = function
        self.name = name
               
    def __call__(self, *params): 
        args = ', '.join([str(x) for x in params])
        return eval('self.function({})'.format(args))
    
    @classmethod
    def get_classname(cls):
        return cls.__name__
    
    def __str__(self):
        return self.name


In [3]:
class Literal(): 
    
    def __init__(self, key): 
        self.key = key
        
    def __call__(self, context):
        return context[self.key]
    
    @classmethod
    def get_classname(cls):
        return cls.__name__
    
    def __str__(self): 
        return self.key

## Functions

In [4]:
addition = Function(lambda x, y: x + y, 2, '+')
subtraction = Function(lambda x, y: x - y, 2, '-')
multiplication = Function(lambda x, y: x * y, 2, '*')
division = Function(lambda x, y: 1 if y == 0 else x / y, 2, '/')
sin = Function(math.sin, 1, 'sin')
cos = Function(math.cos, 1, 'cos')

In [5]:
class Node(): 
    
    def __init__(self, value): 
        self.__value = value
        self.left = None
        self.right = None
    
    def content(self):
        return str(self.__value)
    
    def value(self, context):
        
        if (isinstance(self.__value, int)): 
            return self.__value
        
        if (isinstance(self.__value, float)):
            return self.__value
        
        if (self.__value.get_classname() == 'Function'): 
            if (self.__value.arg_count == 1): 
                if (self.left is not None): 
                    return self.__value(self.left.value(context))
                
            if (self.__value.arg_count == 2):
                if (self.left is not None and self.right is not None): 
                    return self.__value(self.left.value(context), self.right.value(context))
                
        if (self.__value.get_classname() == 'Literal'):
            return self.__value(context)
            
        return None
    
    def __str__(self):
        return str(self.content)

In [6]:
class Chromozome():
    
    def __init__(self, root):
        self.root = root
        self.__fitness = None
        
    def __lt__(self, other): 
        return self.__fitness < other.__fitness 
    
    def __gt__(self, other):
        return self.__fitness > other.__fitness
    
    def mutate(self, literals, functions, depth):
        self.choose_random_node().left = self.__generate_random_tree(literals, functions, depth)
        return self
    
    def value(self, context): 
        return self.root.value(context)
    
    def crossover(self, other):
        offspring1 = deepcopy(self)
        offspring2 = deepcopy(other)
        
        a = offspring1.choose_random_node()
        b = offspring2.choose_random_node()
        a.left, b.left = b.left, a.left
        return offspring1, offspring2

    
    def choose_random_node(self): 
        return random.choice(self.__get_nodes())

    def __get_nodes(self): 
        nodes = []
        aux = [self.root] 
        
        while len(aux) != 0: 
            x = aux.pop(0)
            if x.left is not None: 
                aux.append(x.left)
                nodes.append(x.left)
            if x.right is not None:
                aux.append(x.right)
                nodes.append(x.right)
        nodes = [ node for node in nodes if node.left is not None ]
        return nodes
                
    def fitness(self, train_data): 
        if self.__fitness is not None:
            return self.__fitness
        
        error = 0
        for i in range(len(train_data)):
            expected_label = list(train_data)[-1]
            row = train_data.loc[train_data.index[i]] 
            expected = train_data.loc[train_data.index[i]][expected_label]
            error += abs(expected - self.value(row))
        
        self.__fitness = error
        return error
    
    def __generate_random_tree(self, literals, functions, depth): 
        if depth == 0: 
            return None 
        
        if depth == 1: 
            return Node(random.choice(literals + [ x for x in range(1, 10) ]))
            
        root = Node(random.choice(functions))
        root.left =  self.__generate_random_tree(literals, functions, depth - 1)
        root.right = self.__generate_random_tree(literals, functions, depth - 1)
        
        return root
    
    def print_tree(self):
        self.__print_tree2(self.root)
        
    def __print_tree2(self, root, indent=0):
        if root is None: 
            return 

        print(indent * '   ' + root.content())
        self.__print_tree2(root.left, indent + 1 )
        self.__print_tree2(root.right, indent + 1)

    

In [7]:
class Population():
    
    def __init__(self, size, depth, functions, literals):
        self.depth = depth
        self.functions = functions 
        self.literals = literals
        self.size = size
        self.individs = []
        self.__generate_population(depth)
        
    def best_chromozome(self, train_data):
        for x in self.individs: 
            x.fitness(train_data)
        return min(self.individs)
    
    def selection(self, train_data, cols):
        new_generation = [] 
        self.individs = sorted(self.individs, key=lambda x: x.fitness(train_data))
        
        #the top 20% remain
        last_top = int(0.2 * len(self.individs))
        new_generation += [self.individs[i] for i in range(last_top)]
        
        #breed the new generation until population reaches previous generation
        while(len(new_generation) < len(self.individs)):
          
            individ1 = random.choice(new_generation)
            individ2 = random.choice(new_generation)
            offsprings = individ1.crossover(individ2)
            new_generation.append(offsprings[0].mutate(self.literals, self.functions, 3))
            new_generation.append(offsprings[1].mutate(self.literals, self.functions, 3))
        
        
#         move on to the next generation 
        self.individs = new_generation
        
    def __generate_population(self, depth):
        for _ in range(self.size):
            root = self.__generate_random_tree(depth)
            self.individs.append(Chromozome(root))
            
    def __generate_random_tree(self, depth): 
        if depth == 0: 
            return None 
        
        if depth == 1: 
            return Node(random.choice(self.literals + [ x for x in range(1, 10) ]))
            
        root = Node(random.choice(self.functions))
        root.left =  self.__generate_random_tree(depth - 1)
        root.right = self.__generate_random_tree(depth - 1)
        
        return root
        
    def size(self): 
        return self.individs

In [8]:
class Application():
    
    def __init__(self):
        self.generations_count = 5
        self.generation_size = 30
        self.chromozome_depth = 5
        self.functions = [
            addition, 
            subtraction, 
            multiplication, 
            division, 
            sin,
            cos
        ]
        
        self.data = pd.read_csv("data.txt")[:30]
        self.columns =  list(self.data)
        self.literals = [ Literal(x) for x in self.columns[:-1] ] 
        self.expected = self.columns[-1]
        self.train_data = []
        self.test_data = [] 
#         self.normalize_data()
        self.split_data()
        self.population = Population(
            self.generation_size, 
            self.chromozome_depth, 
            self.functions, 
            self.literals
        )
        self.solution = None
    
    def train(self):
        for _ in range(self.generations_count):
            for x in self.population.individs: 
                print(x.fitness(self.train_data))
                
            self.population.selection(self.train_data, self.columns)
            
            print()
            if self.solution is None: 
                self.solution = self.population.best_chromozome(self.train_data)
            else: 
                self.solution.fitness(self.train_data)
                self.solution = min(self.solution, self.population.best_chromozome(self.train_data))
                
            print(self.solution.fitness(self.train_data))
        
        
    def predict(self, input_values):
        self.solution
    
    def normalize_data(self):
        for c in self.columns:
            column = [float(item) for item in self.data[c]]
            minimum = min(column)
            maximum = max(column)
            for index in range(self.data.shape[0]):
                old_value = float(self.data.iloc[index][c])
                if (maximum == minimum):
                    new_value = 0.5
                else:
                    new_value = (old_value - minimum) / (maximum - minimum)
                self.data.ix[index, c] = new_value
                
    def split_data(self):
        self.train_data, self.test_data = train_test_split(self.data, test_size = 0.25, random_state=42)
         

In [9]:
a = Application()
a.train()


26.043191050312586
99.13969741877125
4.253067310194855
54.960540342073465
5195.955455191352
21935641.087732557
14.591539715924977
344.89013549318054
3014.309010724118
22.973525954978196
157.4646935223675
66130.0181762005
22.032937840450195
16.365282935595246
41.52486154020795
25.876947987934702
5.015218443075413
43.06311802572572
14.082455656916382
13.042937025758224
5.069726318004441
12.7472631048076
39.611450123197066
4.609399999714752
16.317855483772593
612.8383137770404
5.795266887363516
17.39059999900562
13.356507077966413
14.612169810028451

4.253067310194855
4.253067310194855
4.609399999714752
5.015218443075413
5.069726318004441
5.795266887363516
12.7472631048076
12.7472631048076
4.253067310194855
4.253067310194855
4.609399999714752
4.253067310194855
4.253067310194855
4.609399999714752
5.015218443075413
4.253067310194855
4.253067310194855
12.7472631048076
4.253067310194855
4.609399999714752
5.015218443075413
4.253067310194855
4.253067310194855
4.253067310194855
4.609399999714752

In [10]:
a = Application()
b = a.population.individs[0]
c = a.population.individs[1]

# b.print_tree()
# print('_' * 100)
# c.print_tree()

d,e = b.crossover(c)

# print('=' * 100)
# d.print_tree()
# print('_' * 100)
# e.print_tree()

print(b.fitness(a.data))
print(c.fitness(a.data))
print(d.fitness(a.data))
print(e.fitness(a.data))

print()

print(b.mutate(a.literals, a.functions, 3).fitness(a.data))
print(c.mutate(a.literals, a.functions, 3).fitness(a.data))
print(d.mutate(a.literals, a.functions, 3).fitness(a.data))
print(e.mutate(a.literals, a.functions, 3).fitness(a.data))

h = max(b, c, d, e)
g = min([b, c, d, e])

print()
print('max {}'.format(h.fitness(a.data)))
print('min {}'.format(g.fitness(a.data)))
# print(a.population.individs[0].print_tree())
# print(a.population.individs[0].value(a.data.loc[0]))
# print(a.population.individs[0].fitness(a.data))

18.66292627465139
2379.1807445366185
18.66292627465139
2184.841843183052

18.66292627465139
2379.1807445366185
18.66292627465139
2184.841843183052

max 2379.1807445366185
min 18.66292627465139


In [11]:
for row in range(0, len(a.data)):
    print(a.data.loc[row])
    break

subject#          1.000000
age              72.000000
sex               0.000000
test_time         5.643100
motor_UPDRS      28.199000
total_UPDRS      34.398000
Jitter(%)         0.006620
Jitter(Abs)       0.000034
Jitter:RAP        0.004010
Jitter:PPQ5       0.003170
Jitter:DDP        0.012040
Shimmer           0.025650
Shimmer(dB)       0.230000
Shimmer:APQ3      0.014380
Shimmer:APQ5      0.013090
Shimmer:APQ11     0.016620
Shimmer:DDA       0.043140
NHR               0.014290
HNR              21.640000
RPDE              0.418880
DFA               0.548420
PPE               0.160060
Name: 0, dtype: float64


In [12]:
pd.read_csv("data.txt")[1:30]

Unnamed: 0,subject#,age,sex,test_time,motor_UPDRS,total_UPDRS,Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,...,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA,NHR,HNR,RPDE,DFA,PPE
1,1,72,0,12.666,28.447,34.894,0.003,1.7e-05,0.00132,0.0015,...,0.179,0.00994,0.01072,0.01689,0.02982,0.011112,27.183,0.43493,0.56477,0.1081
2,1,72,0,19.681,28.695,35.389,0.00481,2.5e-05,0.00205,0.00208,...,0.181,0.00734,0.00844,0.01458,0.02202,0.02022,23.047,0.46222,0.54405,0.21014
3,1,72,0,25.647,28.905,35.81,0.00528,2.7e-05,0.00191,0.00264,...,0.327,0.01106,0.01265,0.01963,0.03317,0.027837,24.445,0.4873,0.57794,0.33277
4,1,72,0,33.642,29.187,36.375,0.00335,2e-05,0.00093,0.0013,...,0.176,0.00679,0.00929,0.01819,0.02036,0.011625,26.126,0.47188,0.56122,0.19361
5,1,72,0,40.652,29.435,36.87,0.00353,2.3e-05,0.00119,0.00159,...,0.214,0.01006,0.01337,0.02263,0.03019,0.009438,22.946,0.53949,0.57243,0.195
6,1,72,0,47.649,29.682,37.363,0.00422,2.4e-05,0.00212,0.00221,...,0.445,0.02376,0.02621,0.03488,0.07128,0.01326,22.506,0.4925,0.54779,0.17563
7,1,72,0,54.64,29.928,37.857,0.00476,2.5e-05,0.00226,0.00259,...,0.212,0.00979,0.01462,0.01911,0.02937,0.027969,22.929,0.47712,0.54234,0.23844
8,1,72,0,61.669,30.177,38.353,0.00432,2.9e-05,0.00156,0.00207,...,0.371,0.01774,0.02134,0.03451,0.05323,0.013381,22.078,0.51563,0.61864,0.20037
9,1,72,0,68.688,30.424,38.849,0.00496,2.7e-05,0.00258,0.00253,...,0.31,0.0203,0.0197,0.02569,0.06089,0.018021,22.606,0.50032,0.58673,0.20117
10,1,72,0,75.653,30.67,39.34,0.00465,2.6e-05,0.00238,0.0026,...,0.188,0.01069,0.01214,0.01844,0.03206,0.017443,25.672,0.49892,0.61068,0.17387
