In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

from GeneticFeatures.GeneticFeatureGenerator import *
from GeneticFeatures.Node import *

In [15]:
df = pd.read_csv('winequality-red.csv')
# x is everything except the last column
X = df.iloc[:, :-1].values
# y is the last column
Y = df.iloc[:, -1].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [16]:
def add(x, y):
    return x + y

def sub(x, y):
    return x - y

def mul(x, y):
    return x * y

def div(x, y):
    return x / (y + 1e-10)

In [17]:
generator = GeneticFeatureGenerator(
    [add, sub, mul, div],
    operation_names = ["+", "-", "*", "/"],
    popsize = 100, 
    maxiter = 60, 
    mutation_rate = 0.05
)

In [18]:
multifeature = MultiFeatureGenerator(X_train, Y_train, generator, 8, 5000, verbose=True)

In [19]:
trees = [i for i in multifeature]

Split:  1
Split:  2:  59 Best fitness:  0.30933544303797467 Generation best 0.25890031645569623
Split:  3:  59 Best fitness:  0.2164161392405063 Generation best 0.174525316455696247
Split:  4:  59 Best fitness:  0.28916139240506333 Generation best 0.23872626582278478
Split:  5:  59 Best fitness:  0.299367088607595 Generation best 0.2663765822784810484
Iteration:  35 Best fitness:  0.3114715189873418 Generation best 0.31147151898734189

In [None]:
trees

[<Node.Tree at 0x12d677e7cd0>,
 <Node.Tree at 0x12d67df0850>,
 <Node.Tree at 0x12d67e72040>,
 <Node.Tree at 0x12d68a6a7f0>,
 <Node.Tree at 0x12d6a84e0a0>,
 <Node.Tree at 0x12d6837b4c0>,
 <Node.Tree at 0x12d68022c10>,
 <Node.Tree at 0x12d68816c40>]

In [None]:
new_train_features = np.array([tree(X_train) for tree in trees]).T
new_test_features = np.array([tree(X_test) for tree in trees]).T

new_train_features_concated = np.concatenate([X_train, new_train_features], axis=1)
new_test_features_concated = np.concatenate([X_test, new_test_features], axis=1)

In [None]:
from sklearn.ensemble import RandomForestRegressor

# test with old featues
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, Y_train)
print("Old features score: ", rf.score(X_test, Y_test))

# test with new features
rf = RandomForestRegressor(n_estimators=100)
rf.fit(new_train_features_concated, Y_train)
print("New features score: ", rf.score(new_test_features_concated, Y_test))

Old features score:  0.527777282983906
New features score:  0.5448850102362559
