In [3]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))

from slim_gsgp.datasets.data_loader import load_pandas_df
import pandas as pd
import numpy as np
from slim_gsgp.main_gp import gp
from slim_gsgp.main_slim import slim
from slim_gsgp.main_gsgp import gsgp
from slim_gsgp.utils.utils import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

import torch
from imblearn.over_sampling import SMOTENC, SMOTE

In [29]:
def print_scores(y_test, predictions):
    print("Roc Score:", roc_auc_score(y_test, predictions))
    print("F1 Score:", f1_score(y_test, predictions))
    print("Accuracy Score:", accuracy_score(y_test, predictions))

In [30]:
df = pd.read_csv('data/data_prepared/blood.csv')

In [31]:
X, y = load_pandas_df(df, X_y=True)

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, p_test=0.2)

In [33]:
X_train

tensor([[4.0000e+00, 5.0000e+00, 1.2500e+03, 5.8000e+01],
        [1.0000e+00, 9.0000e+00, 2.2500e+03, 5.1000e+01],
        [4.0000e+00, 2.0000e+00, 5.0000e+02, 4.0000e+00],
        ...,
        [2.0000e+00, 6.0000e+00, 1.5000e+03, 1.5000e+01],
        [1.4000e+01, 4.0000e+00, 1.0000e+03, 4.0000e+01],
        [4.0000e+00, 4.0000e+00, 1.0000e+03, 4.6000e+01]])

In [26]:
slim_f1= slim(X_train=X_train, y_train=y_train, seed=1, log_path='log/slim_f1.csv', ms_lower = 0, ms_upper= 0.3, p_inflate = 0.5,
                  X_test=X_val, y_test=y_val, slim_version='SLIM+SIG2', pop_size=200, n_iter=1000, minimization=False, fitness_function='accuracy',
                  )
predictions = slim_f1.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.7612687945365906  |   0.75                   |   0.10419082641601562  |      7           |
|     dataset_1           |       1      |   0.7612687945365906  |   0.75                   |   0.13662266731262207  |      7           |
|     dataset_1           |       2      |   0.7612687945365906  |   0.75                   |   0.10528302192687988  |      7           |
|     dataset_1           |       3      |   0.7629382014274597  |   0.7583333253860474     |   0.12095475196838379  |      20          |
|     dataset_1  

KeyboardInterrupt: 

In [None]:
X_train_over, y_train_over = SMOTE( random_state=42).fit_resample(X_train, y_train)
X_train_over = torch.tensor(X_train_over).float()
y_train_over = torch.tensor(y_train_over).float()

In [None]:
slim_rmse = slim(X_train=X_train, y_train=y_train,
                  X_test=X_val, y_test=y_val, log_path='log/slim_rmse.csv', 
                   slim_version='SLIM+SIG2', pop_size=200, n_iter=1000, minimization=True, fitness_function='sigmoid_rmse',
                  ms_lower=0, ms_upper=0.5, p_inflate=0.5)

predictions = slim_rmse.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.43729355931282043 |   0.4102255702018738     |   0.08507895469665527  |      9           |
|     dataset_1           |       1      |   0.43384045362472534 |   0.4082792103290558     |   0.10199856758117676  |      24          |
|     dataset_1           |       2      |   0.43384045362472534 |   0.4082792103290558     |   0.10250282287597656  |      24          |
|     dataset_1           |       3      |   0.4325994551181793  |   0.40630215406417847    |   0.10356354713439941  |      57          |
|     dataset_1  

In [None]:
gsgp_f1 = gsgp(X_train=X_train, y_train=y_train,
                  X_test=X_val, y_test=y_val, reconstruct=True, log_path='log/gsgp_f1.csv', 
                  pop_size=500, n_iter=500, minimization=False, fitness_function='f1_score', seed = 0
                  )

predictions = gsgp_f1.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.5467127561569214  |   0.4146341383457184     |   0.5616695880889893   |      127         |
|     dataset_1           |       1      |   0.559999942779541   |   0.37974685430526733    |   0.28173089027404785  |      11          |
|     dataset_1           |       2      |   0.559999942779541   |   0.37974685430526733    |   0.2803232669830322   |      11          |
|     dataset_1           |       3      |   0.559999942779541   |   0.37974685430526733    |   0.2505354881286621   |      11          |
|     dataset_1  

In [None]:
gsgp_rmse = gsgp(X_train=X_train_over, y_train=y_train_over,
                  X_test=X_val, y_test=y_val, reconstruct=True, log_path='log/gsgp_rmse.csv',
                  pop_size=500, n_iter=500, minimization=True, fitness_function='sigmoid_rmse', seed = 0,
                  )

predictions = gsgp_rmse.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.478540301322937   |   0.5302056670188904     |   0.9723749160766602   |      5           |
|     dataset_1           |       1      |   0.4644612669944763  |   0.4699842631816864     |   0.625159740447998    |      11          |
|     dataset_1           |       2      |   0.46438002586364746 |   0.49265673756599426    |   0.6738924980163574   |      19          |
|     dataset_1           |       3      |   0.4607837200164795  |   0.4389142394065857     |   0.6179013252258301   |      23          |
|     dataset_1  

In [None]:
gp_f1 = gp(X_train=X_train, y_train=y_train, p_xo=0.2,
                  X_test=X_val, y_test=y_val, pop_size=200, n_iter=100, minimization=False, fitness_function='f1_score', seed = 0,
                  )
predictions = gp_f1.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.44525253772735596 |   0.45308926701545715    |   0.18932867050170898  |      5           |
|     dataset_1           |       1      |   0.44525253772735596 |   0.45308926701545715    |   0.2543361186981201   |      5           |
|     dataset_1           |       2      |   0.44525253772735596 |   0.45308926701545715    |   0.25008082389831543  |      5           |
|     dataset_1           |       3      |   0.44525253772735596 |   0.45308926701545715    |   0.31145787239074707  |      5           |
|     dataset_1  

In [None]:
y_test

tensor([0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.])

In [None]:
gp_rmse = gp(X_train=X_train_over, y_train=y_train_over,
                  X_test=X_val, y_test=y_val, pop_size=200, n_iter=100, minimization=True, fitness_function='sigmoid_rmse', seed = 0,
                  )
predictions = gp_rmse.predict(X_test)
predictions = torch.round(torch.sigmoid(predictions))
print_scores(y_test, predictions)



Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     dataset_1           |       0      |   0.47669273614883423 |   0.5389356017112732     |   0.18155336380004883  |      5           |
|     dataset_1           |       1      |   0.47669273614883423 |   0.5389356017112732     |   0.1466360092163086   |      5           |
|     dataset_1           |       2      |   0.47613951563835144 |   0.5272106528282166     |   0.14223527908325195  |      5           |
|     dataset_1           |       3      |   0.4737030863761902  |   0.5282784700393677     |   0.1464700698852539   |      5           |
|     dataset_1  

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
rf = RandomForestClassifier(max_depth=2)
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)
print_scores(y_test, predictions)

gb = GradientBoostingClassifier(max_depth=2)
gb.fit(X_train, y_train)
predictions = gb.predict(X_test)
print_scores(y_test, predictions)

Roc Score: 0.5
F1 Score: 0.0
Accuracy Score: 0.7931034482758621
Roc Score: 0.5615942028985508
F1 Score: 0.25
Accuracy Score: 0.7931034482758621


In [None]:
final_tree = gp(X_train=X_train, y_train=y_train, log_path='log/test.csv', max_depth=15,
                X_test=X_val, y_test=y_val, log_level=1, seed=2444,
                dataset_name='xyz', pop_size=200, n_iter=100)

Verbose Reporter
-----------------------------------------------------------------------------------------------------------------------------------------
|         Dataset         |  Generation  |     Train Fitness     |       Test Fitness       |        Timing          |      Nodes       |
-----------------------------------------------------------------------------------------------------------------------------------------
|     xyz                 |       0      |   0.3630543351173401  |   0.3774508535861969     |   0.10421562194824219  |      5           |
|     xyz                 |       1      |   0.35974404215812683 |   0.3771507441997528     |   0.10279154777526855  |      15          |
|     xyz                 |       2      |   0.35974404215812683 |   0.3771507441997528     |   0.10576272010803223  |      15          |
|     xyz                 |       3      |   0.35974404215812683 |   0.3771507441997528     |   0.1434328556060791   |      15          |
|     xyz        