In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes, load_boston
from sklearn.model_selection import train_test_split

from GeneticFeatures.GeneticFeatureGenerator import *
from GeneticFeatures.Node import *

In [10]:
diabetes_ds = load_diabetes()
X = diabetes_ds.data
Y = diabetes_ds.target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [11]:
def add(x, y):
    return x + y

def sub(x, y):
    return x - y

def mul(x, y):
    return x * y

def div(x, y):
    return x / (y + 1e-10)

In [12]:
generator = GeneticFeatureGenerator(
    [add, sub, mul, div],
    operation_names = ["+", "-", "*", "/"],
    popsize = 100, 
    maxiter = 40, 
    clone_prob = 0.1, 
    mutation_rate = 0.05
)

In [13]:
multifeature = MultiFeatureGenerator(X_train, Y_train, generator, 5, 3, 10000, verbose=False)

In [14]:
trees = [i for i in multifeature]

Split:  0 Feature:  0
Split:  1 Feature:  1
Split:  2 Feature:  2
Split:  0 Feature:  3
Split:  1 Feature:  4


In [15]:
new_train_features = np.array([tree(X_train) for tree in trees]).T
new_test_features = np.array([tree(X_test) for tree in trees]).T

new_train_features_concated = np.concatenate([X_train, new_train_features], axis=1)
new_test_features_concated = np.concatenate([X_test, new_test_features], axis=1)

In [16]:
from sklearn.ensemble import RandomForestRegressor

# test with old featues
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, Y_train)
print("Old features score: ", rf.score(X_test, Y_test))

# test with new features
rf = RandomForestRegressor(n_estimators=100)
rf.fit(new_train_features_concated, Y_train)
print("New features score: ", rf.score(new_test_features_concated, Y_test))

Old features score:  0.4195769562648065
New features score:  0.4772563426014842
