# PSyKE's demo for regression tasks

Some imports.

In [8]:
from psyke import Extractor, Clustering
from psyke.tuning.pedro import PEDRO
from psyke.tuning import Objective
from psyke.tuning.crash import CRASH
from sklearn.tree import DecisionTreeRegressor
from psyke.utils.logic import pretty_theory
from psyke.utils.metrics import mae, mse, r2
from sklearn.model_selection import train_test_split
from psyke.utils import Target
from matplotlib import pyplot as plt
import pandas as pd

Import a dataset.

In [9]:
#dataset = pd.read_csv("../test/resources/datasets/df.csv")
#dataset = dataset[["X", "Y", "Z4"]].dropna()
dataset = pd.read_csv("../test/resources/datasets/CCPP.csv", sep=";", decimal=",")
#dataset

Split between train and test set in a reproducible way.

In [10]:
train, test = train_test_split(dataset, test_size=0.5, random_state=10)

We use as predictor a KNN and we train it.

In [11]:
#predictor = KNeighborsRegressor(n_neighbors=3).fit(train.iloc[:, :-1], train.iloc[:, -1])
predictor = DecisionTreeRegressor().fit(train.iloc[:, :-1], train.iloc[:, -1])
#predictor = LinearRegression().fit(train.iloc[:, :-1], train.iloc[:, -1])

predicted = predictor.predict(test.iloc[:, :-1]).flatten()
true = test.iloc[:, -1]

print(f'MAE = {mae(true, predicted):.2f}')
print(f'MSE = {mse(true, predicted):.2f}')
print(f'R2 = {r2(true, predicted):.2f}')

MAE = 3.41
MSE = 24.74
R2 = 0.92


We define a function to print the extractors' evaluation

In [12]:
def evaluate(name, extractor, true, predicted):
    extracted = extractor.predict(test.iloc[:, :-1])
    print(f'{name} performance ({extractor.n_rules} rules):\n'
          f'MAE = {mae(true, extracted):.2f}\nMAE fidelity = {mae(predicted, extracted):.2f}\n'
          f'R2 = {r2(true, extracted):.2f}\nR2 fidelity = {r2(predicted, extracted):.2f}\n')

We create several extractors that use ITER, GridEx and GridREx algorithms to extract prolog rules from the predictor.

In [13]:
creepy = Extractor.creepy(predictor, depth=3, error_threshold=0.02, output=Target.REGRESSION,
                          clustering=Clustering.exact)
theory_from_creepy = creepy.extract(train)
evaluate('CReEPy', creepy, true, predicted)
print('CReEPy extracted rules (ExACT):\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (4 rules):
MAE = 3.37
MAE fidelity = 3.48
R2 = 0.94
R2 fidelity = 0.93

CReEPy extracted rules (ExACT):

'PE'(AP, AT, RH, V, PE) :-
    AT in [6.21, 32.45], V in [35.39, 50.16], AP in [998.07, 1026.40], RH in [35.63, 100.10], PE is 499.89 - 2.16 * AP - 0.27 * AT + 0.01 * RH - 0.11 * V.
'PE'(AP, AT, RH, V, PE) :-
    AT in [6.21, 32.45], V in [34.02, 50.16], AP in [997.90, 1026.41], RH in [35.63, 100.10], PE is 697.90 - 1.74 * AP - 2.04 * AT - 0.17 * RH + 0.61 * V.
'PE'(AP, AT, RH, V, PE) :-
    AT in [6.21, 35.77], V in [25.35, 81.56], AP in [997.84, 1026.45], RH in [25.55, 100.12], PE is 234.73 - 1.42 * AP - 0.29 * AT + 0.26 * RH - 0.12 * V.
'PE'(AP, AT, RH, V, PE) :-
    V in [25.35, 81.56], AP in [992.88, 1033.25], RH in [25.55, 100.15], PE is 628.20 - 2.19 * AP - 0.50 * AT - 0.09 * RH - 0.17 * V.


In [14]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.02, output=Target.REGRESSION,
                          clustering=Clustering.cream)
theory_from_creepy = creepy.extract(train)
evaluate('CReEPy', creepy, true, predicted)
print('CReEPy extracted rules (CREAM):\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (4 rules):
MAE = 3.37
MAE fidelity = 3.49
R2 = 0.94
R2 fidelity = 0.93

CReEPy extracted rules (CREAM):

'PE'(AP, AT, RH, V, PE) :-
    AT in [6.21, 32.45], V in [34.02, 50.16], AP in [997.90, 1026.41], RH in [35.63, 100.10], PE is 502.53 - 2.16 * AP - 0.26 * AT + 0.01 * RH - 0.11 * V.
'PE'(AP, AT, RH, V, PE) :-
    AT in [6.21, 35.77], V in [25.35, 81.56], AP in [997.84, 1026.45], RH in [25.55, 100.12], PE is 234.73 - 1.42 * AP - 0.29 * AT + 0.26 * RH - 0.12 * V.
'PE'(AP, AT, RH, V, PE) :-
    AT in [3.30, 14.60], V in [34.68, 44.47], AP in [1011.31, 1033.25], RH in [58.98, 98.68], PE is 720.26 - 2.20 * AP - 0.47 * AT - 0.18 * RH - 0.22 * V.
'PE'(AP, AT, RH, V, PE) :-
    V in [25.35, 81.56], AP in [992.88, 1033.25], RH in [25.55, 100.15], PE is 579.01 - 2.05 * AP - 0.60 * AT - 0.05 * RH + 0.00 * V.


In [15]:
crash = CRASH(predictor, train, max_depth=3, patience=1, readability_tradeoff=.5,
              algorithm=CRASH.Algorithm.ExACT, output=Target.REGRESSION)
crash.search()
(_, _, depth, threshold) = crash.get_best()[0]

creepy = Extractor.creepy(predictor, depth=depth, error_threshold=threshold, output=Target.REGRESSION,
                          clustering=Clustering.exact)
theory_from_creepy = creepy.extract(train)
evaluate('CReEPy', creepy, true, predicted)
print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

Algorithm.ExACT. Depth: 1. Threshold = 0.00. MAE = 3.64, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.00. MAE = 3.53, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.00. MAE = 3.50, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.00. MAE = 3.52, 2 rules

Algorithm.ExACT. Depth: 2. Threshold = 0.00. MAE = 3.50, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.00. MAE = 3.45, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.00. MAE = 3.42, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.00. MAE = 3.46, 3 rules

Algorithm.ExACT. Depth: 3. Threshold = 0.00. MAE = 3.49, 4 rules
Algorithm.ExACT. Depth: 3. Threshold = 0.00. MAE = 3.50, 4 rules

**********************
Best Algorithm.ExACT
**********************
MAE = 3.42, 3 rules
Threshold = 0.00
Depth = 2

**********************
Best   MAE  
**********************
MAE = 3.42, 3 rules
Threshold = 0.00
Depth = 2

**********************
Best N rules
**********************
MAE = 3.52, 2 rules
Threshold = 0.00
Depth = 1

CReEPy performance

In [16]:
crash = CRASH(predictor, train, max_depth=3, patience=1, readability_tradeoff=.75, algorithm=CRASH.Algorithm.CREAM)
crash.search()
(_, _, depth, threshold) = crash.get_best()[0]

creepy = Extractor.creepy(predictor, depth=depth, error_threshold=threshold, output=Target.REGRESSION,
                          clustering=Clustering.cream)
theory_from_creepy = creepy.extract(train)
evaluate('CReEPy', creepy, true, predicted)
print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 8.47, 2 rules
Algorithm.CREAM. Depth: 1. Threshold = 0.00. MAE = 10.29, 2 rules

Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 6.24, 4 rules
Algorithm.CREAM. Depth: 2. Threshold = 0.00. MAE = 7.77, 4 rules

Algorithm.CREAM. Depth: 3. Threshold = 0.00. MAE = 5.55, 8 rules
Algorithm.CREAM. Depth: 3. Threshold = 0.00. MAE = 5.79, 8 rules

**********************
Best Algorithm.CREAM
**********************
MAE = 6.24, 4 rules
Threshold = 0.00
Depth = 2

**********************
Best   MAE  
**********************
MAE = 5.55, 8 rules
Threshold = 0.00
Depth = 3

**********************
Best N rules
**********************
MAE = 8.47, 2 rules
Threshold = 0.00
Depth = 1

CReEPy performance (4 rules):
MAE = 3.37
MAE fidelity = 3.49
R2 = 0.94
R2 fidelity = 0.93

CReEPy extracted rules:

'PE'(AP, AT, RH, V, PE) :-
    AT in [6.21, 32.45], V in [34.02, 50.16], AP in [997.90, 1026.41], RH in [35.63, 100.10], PE is 502.53 - 2.16 * AP - 0.26 * AT + 0

In [17]:
it = Extractor.iter(predictor, min_update=1.0 / 10, n_points=1, max_iterations=600,
                    min_examples=100, threshold=5)
theory_from_iter = it.extract(train)
evaluate('ITER', it, true, predicted)
print('ITER extracted rules:\n\n' + pretty_theory(theory_from_iter))

ITER performance (61 rules):
MAE = 4.19
MAE fidelity = 4.29
R2 = 0.90
R2 fidelity = 0.89

ITER extracted rules:

'PE'(AP, AT, RH, V, 430.92) :-
    AT in [25.87, 35.77], V in [62.52, 81.56], AP in [992.88, 1012.09], RH in [25.55, 100.15].
'PE'(AP, AT, RH, V, 436.53) :-
    AT in [25.87, 35.77], V in [62.52, 81.56], AP in [1012.09, 1033.25], RH in [25.55, 100.15].
'PE'(AP, AT, RH, V, 442.30) :-
    AT in [22.53, 35.77], V in [25.35, 62.52], AP in [1012.09, 1033.25], RH in [25.55, 100.15].
'PE'(AP, AT, RH, V, 443.12) :-
    AT in [22.53, 25.87], V in [62.52, 81.56], AP in [1012.09, 1033.25], RH in [25.55, 100.15].
'PE'(AP, AT, RH, V, 448.58) :-
    AT in [15.84, 22.53], V in [62.52, 81.56], AP in [1012.09, 1033.25], RH in [25.55, 100.15].
'PE'(AP, AT, RH, V, 452.03) :-
    AT in [19.18, 22.53], V in [25.35, 62.52], AP in [1012.09, 1033.25], RH in [25.55, 100.15].
'PE'(AP, AT, RH, V, 459.14) :-
    AT in [15.84, 19.18], V in [25.35, 62.52], AP in [1012.09, 1033.25], RH in [25.55, 100.15].

In [18]:
pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,
              max_depth=1, patience=1, algorithm=PEDRO.Algorithm.GRIDEX, objective=Objective.MODEL)
pedro.search()
(_, _, threshold, grid) = pedro.get_best()[0]

gridEx = Extractor.gridex(predictor, grid, threshold=threshold)
theory_from_gridEx = gridEx.extract(train)
evaluate('GridEx', gridEx, true, predicted)
print('GridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

Algorithm.GRIDEX. Grid (1). Fixed (2). Threshold = 0.00. MAE = 6.45, 15 rules
Algorithm.GRIDEX. Grid (1). Fixed (2). Threshold = 0.00. MAE = 6.45, 30 rules

Algorithm.GRIDEX. Grid (1). Fixed (3). Threshold = 0.00. MAE = 6.45, 86 rules
Algorithm.GRIDEX. Grid (1). Fixed (3). Threshold = 0.00. MAE = 6.45, 142 rules

Algorithm.GRIDEX. Grid (1). Adaptive ([(0.99, 2)]). Threshold = 0.00. MAE = 6.45, 144 rules
Algorithm.GRIDEX. Grid (1). Adaptive ([(0.99, 2)]). Threshold = 0.00. MAE = 6.45, 146 rules

Algorithm.GRIDEX. Grid (1). Adaptive ([(0.3, 2)]). Threshold = 0.00. MAE = 6.45, 150 rules
Algorithm.GRIDEX. Grid (1). Adaptive ([(0.3, 2)]). Threshold = 0.00. MAE = 6.45, 154 rules

Algorithm.GRIDEX. Grid (1). Adaptive ([(0.99, 3)]). Threshold = 0.00. MAE = 6.45, 157 rules
Algorithm.GRIDEX. Grid (1). Adaptive ([(0.99, 3)]). Threshold = 0.00. MAE = 6.45, 160 rules

Algorithm.GRIDEX. Grid (1). Adaptive ([(0.3, 3)]). Threshold = 0.00. MAE = 6.45, 169 rules
Algorithm.GRIDEX. Grid (1). Adaptive ([(0

In [19]:
#pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,
#              max_depth=2, patience=1, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)
#pedro.search()
(_, _, threshold, grid) = pedro.get_best()[0]

gridREx = Extractor.gridrex(predictor, grid, threshold=threshold)
theory_from_gridREx = gridREx.extract(train)
evaluate('GridREx', gridREx, true, predicted)
print('GridREx extracted rules:\n\n' + pretty_theory(theory_from_gridREx))

**********************
Best Algorithm.GRIDEX
**********************
MAE = 6.45, 15 rules
Threshold = 0.00
Iterations = 1
Strategy = Fixed (2)

**********************
Best   MAE  
**********************
MAE = 6.45, 256 rules
Threshold = 0.00
Iterations = 1
Strategy = Adaptive ([(0.33, 2), (0.67, 3)])

**********************
Best N rules
**********************
MAE = 6.45, 15 rules
Threshold = 0.00
Iterations = 1
Strategy = Fixed (2)

GridREx performance (292 rules):
MAE = 6.50
MAE fidelity = 6.54
R2 = 0.77
R2 fidelity = 0.77

GridREx extracted rules:

'PE'(AP, AT, RH, V, 474.22) :-
    AT in [2.33, 19.05], V in [25.35, 53.46], AP in [992.88, 1013.07], RH in [25.55, 62.85].
'PE'(AP, AT, RH, V, 468.97) :-
    AT in [2.33, 19.05], V in [25.35, 53.46], AP in [992.88, 1013.07], RH in [62.85, 100.15].
'PE'(AP, AT, RH, V, 471.45) :-
    AT in [2.33, 19.05], V in [25.35, 53.46], AP in [1013.07, 1033.25], RH in [25.55, 62.85].
'PE'(AP, AT, RH, V, 472.61) :-
    AT in [2.33, 19.05], V in [25.35, 5

In [20]:
cart = Extractor.cart(predictor, max_depth=5, max_leaves=6, simplify=True)
theory_from_cart = cart.extract(train)
evaluate('CART', cart, true, predicted)
print('CART extracted rules:\n\n' + pretty_theory(theory_from_cart))

CART performance (6 rules):
MAE = 4.46
MAE fidelity = 4.52
R2 = 0.89
R2 fidelity = 0.88

CART extracted rules:

'PE'(AP, AT, RH, V, 479.15) :-
    AT =< 18.25, AT =< 11.90.
'PE'(AP, AT, RH, V, 435.66) :-
    AT > 18.25, V > 66.20.
'PE'(AP, AT, RH, V, 451.33) :-
    AT > 18.25, V =< 66.20, AT =< 22.89.
'PE'(AP, AT, RH, V, 443.00) :-
    AT > 18.25, V =< 66.20, AT > 22.89.
'PE'(AP, AT, RH, V, 467.45) :-
    AT > 11.90, AT =< 15.64.
'PE'(AP, AT, RH, V, 459.70) :-
    AT > 15.64.
