# PSyKE's demo for regression tasks

Some imports.

In [1]:
from psyke import Extractor
from psyke.optimizer.pedro import PEDRO
from psyke.optimizer import Objective
from psyke.optimizer.crash import CRASH
from psyke.regression import HyperCubeExtractor
from sklearn.neighbors import KNeighborsRegressor
from psyke.utils.logic import pretty_theory
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import pandas as pd

Import an artificial dataset.

In [7]:
dataset = pd.read_csv("test/resources/datasets/df.csv")
#dataset = pd.read_csv("test/resources/datasets/CCPP.csv", sep=";", decimal=",")
#dataset

Split between train and test set in a reproducible way.

In [8]:
dataset = dataset[["X", "Y", "Z4"]].dropna()
train, test = train_test_split(dataset, test_size=0.5, random_state=10)

We use as predictor a KNN and we train it.

In [9]:
predictor = KNeighborsRegressor(n_neighbors=3).fit(train.iloc[:, :-1], train.iloc[:, -1])

print((abs(predictor.predict(test.iloc[:, :-1]).flatten() - test.iloc[:, -1])).mean())

0.03688944007870007


We create several extractors that use ITER, GridEx and GridREx algorithms to extract prolog rules from the predictor.

In [10]:
creepy = Extractor.creepy(predictor, depth=3, error_threshold=0.02, output=HyperCubeExtractor.Target.REGRESSION)
theory_from_creepy = creepy.extract(train)
print('CReEPy performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, predictor)))
print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (4 rules):
MAE = 0.00
MAE fidelity = 0.04

CReEPy extracted rules:

'Z4'(X, Y, Z4) :-
    X in [0.402684, 0.543625], Y in [0.416106, 0.597316], Z4 is -6.805486 + 54.455317 * X + -24.861292 * Y.
'Z4'(X, Y, Z4) :-
    X in [0.402684, 0.597316], Y in [0.402684, 0.597316], Z4 is -6.6031 + 54.353719 * X + -25.132222 * Y.
'Z4'(X, Y, Z4) :-
    X in [0.154361, 0.845639], Y in [0.154361, 0.845639], Z4 is 9.003897 + -11.99489 * X + 14.988029 * Y.
'Z4'(X, Y, Z4) :-
    X in [-0.000001, 1.000001], Y in [-0.000001, 1.000001], Z4 is 1.999564 + 3.999876 * X + -2.999378 * Y.


In [11]:
cream = Extractor.cream(predictor, depth=2, error_threshold=0.02, output=HyperCubeExtractor.Target.CONSTANT)
theory_from_cream = cream.extract(train)
print('CREAM performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(cream.n_rules, cream.mae(test), cream.mae(test, predictor)))
print('CREAM extracted rules:\n\n' + pretty_theory(theory_from_cream))

CREAM performance (4 rules):
MAE = 2.39
MAE fidelity = 2.38

CREAM extracted rules:

'Z4'(X, Y, 8.051474) :-
    X in [0.402684, 0.597316], Y in [0.402684, 0.597316].
'Z4'(X, Y, 10.458814) :-
    X in [0.154361, 0.845639], Y in [0.154361, 0.845639].
'Z4'(X, Y, 0.931622) :-
    X in [-0.000001, 0.496645], Y in [-0.000001, 1.000001].
'Z4'(X, Y, 4.024491) :-
    X in [-0.000001, 1.000001], Y in [-0.000001, 1.000001].


In [None]:
#f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
#plt.ylim((0, 1))
#ax1.scatter(test.X, test.Y, c=test.iloc[:, -1], s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))
#ax1.set_xlim((0, 1))
#ax1.set_aspect("equal")
#ax2.scatter(test.X, test.Y, c=predictor.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))
#ax2.set_xlim((0, 1))
#ax2.set_aspect("equal")
#plt.show()

crash = CRASH(predictor, train, max_depth=5, patience=2, readability_tradeoff=.5, algorithm=CRASH.Algorithm.CReEPy)
crash.search()
(_, _, depth, threshold) = crash.get_best()[0]

creepy = Extractor.creepy(predictor, depth=depth, error_threshold=threshold, output=HyperCubeExtractor.Target.CONSTANT)
theory_from_creepy = creepy.extract(train)
print('CReEPy performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(creepy.n_rules, creepy.mae(test), creepy.mae(test, predictor)))
#print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_cream))

plt.scatter(test.X, test.Y, c=creepy.predict(test.iloc[:, :-1]), s=0.5, cmap=plt.cm.get_cmap('gist_rainbow'))
plt.xlim((0, 1))
plt.ylim((0, 1))
plt.gca().set_aspect("equal")
plt.show()

In [None]:
#f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
#plt.ylim((0, 1))
#ax1.scatter(test.X, test.Y, c=test.iloc[:, -1], s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))
#ax1.set_xlim((0, 1))
#ax1.set_aspect("equal")
#ax2.scatter(test.X, test.Y, c=predictor.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))
#ax2.set_xlim((0, 1))
#ax2.set_aspect("equal")
#plt.show()

crash = CRASH(predictor, train, max_depth=5, patience=2, readability_tradeoff=.75, algorithm=CRASH.Algorithm.CREAM)
crash.search()
(_, _, depth, threshold) = crash.get_best()[0]

cream = Extractor.cream(predictor, depth=depth, error_threshold=threshold, output=HyperCubeExtractor.Target.CONSTANT)
theory_from_cream = cream.extract(train)
print('CREAM performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(cream.n_rules, cream.mae(test), cream.mae(test, predictor)))
#print('CREAM extracted rules:\n\n' + pretty_theory(theory_from_cream))

plt.scatter(test.X, test.Y, c = cream.predict(test.iloc[:, :-1]), s = 0.5, cmap=plt.cm.get_cmap('gist_rainbow'))
plt.xlim((0, 1))
plt.ylim((0, 1))
plt.gca().set_aspect("equal")
plt.show()

In [None]:
it = Extractor.iter(predictor, min_update=1.0 / 20, n_points=1, max_iterations=600,
                    min_examples=100, threshold=1.5)
theory_from_iter = it.extract(train)
print('ITER performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(it.n_rules, it.mae(test), it.mae(test, predictor)))
print('ITER extracted rules:\n\n' + pretty_theory(theory_from_iter))

In [None]:
pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,
              max_depth=5, patience=3, algorithm=PEDRO.Algorithm.GRIDEX, objective=Objective.MODEL)
pedro.search()
(_, _, threshold, grid) = pedro.get_best()[0]

gridEx = Extractor.gridex(predictor, grid, threshold=threshold)
theory_from_gridEx = gridEx.extract(train)
print('GridEx performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(gridEx.n_rules, gridEx.mae(test), gridEx.mae(test, predictor)))
print('GridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

In [None]:
pedro = PEDRO(predictor, train, max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1,
              max_depth=5, patience=3, algorithm=PEDRO.Algorithm.GRIDREX, objective=Objective.MODEL)
pedro.search()
(_, _, threshold, grid) = pedro.get_best()[0]

gridREx = Extractor.gridrex(predictor, grid, threshold=threshold)
theory_from_gridREx = gridREx.extract(train)
print('GridREx performance ({} rules):\nMAE = {:.2f}\nMAE fidelity = {:.2f}\n'
      .format(gridREx.n_rules, gridREx.mae(test), gridREx.mae(test, predictor)))
print('GridREx extracted rules:\n\n' + pretty_theory(theory_from_gridREx))