In [37]:
from functools import reduce
from flore.tree import ID3, FDT, Rule
from flore.tree.tests.fdt_legacy_tree import FDT_Legacy
from flore.tree.tests.id3_legacy_tree import ID3_Legacy
from pytest import fixture

from sklearn import datasets
from sklearn.model_selection import train_test_split

from flore.fuzzy import get_fuzzy_points, get_fuzzy_variables, get_dataset_membership
from flore.datasets import load_compas, load_beer
from flore.explanation import FID3_factual, m_factual, mr_factual, c_factual
import numpy as np
import pandas as pd
import random
import xml.etree.cElementTree as e
from scipy.io.arff import loadarff

In [2]:
seed = 0
random.seed(seed)
np.random.seed(seed)

In [3]:
def _get_fuzzy_element(fuzzy_X, idx):
    element = {}
    for feat in fuzzy_X:
        element[feat] = {}
        for fuzzy_set in fuzzy_X[feat]:
            try:
                element[feat][str(fuzzy_set)] = pd.to_numeric(fuzzy_X[feat][fuzzy_set][idx])
            except ValueError:
                element[feat][str(fuzzy_set)] = fuzzy_X[feat][fuzzy_set][idx]

    return element


In [4]:
dataset = load_beer()

df = dataset['df']
class_name = dataset['class_name']
X = df.drop(class_name, axis=1)
y = df[class_name]

df_categorical_columns = dataset['discrete']
class_name = dataset['class_name']
df_categorical_columns.remove(class_name)
df_numerical_columns = dataset['continuous']
all_classes = dataset['possible_outcomes']
all_classes_dict = {float(i+1):c for i, c in enumerate(all_classes)}


In [5]:
df.dtypes

beer_style     object
color         float64
bitterness    float64
strength      float64
dtype: object

In [6]:
def load_arff_df(path):
    raw_data = loadarff(path)
    columns = ['color', 'bitterness', 'strength', 'beer_style']
    df_data = pd.DataFrame(raw_data[0])
    df_data.columns = columns
    df_data['beer_style'] = pd.to_numeric(df_data['beer_style'])
    df_data.replace({'beer_style': all_classes_dict}, inplace=True)
    return df_data

In [7]:
train_path = './BEER2/CV0/data/BEER2.txt.aux.train.0.arff'
test_path = './BEER2/CV0/data/BEER2.txt.aux.test.0.arff'

feats = ['color', 'bitterness', 'strength']
class_val = 'beer_style'

df_train = load_arff_df(train_path)
df_test = load_arff_df(test_path)

X_train = df_train[feats]
y_train = df_train[class_val]

X_test = df_test[feats]
y_test = df_test[class_val]

In [8]:
df_train.color.max()

45.0

In [9]:
fuzzy_points = get_fuzzy_points(df_train, 'entropy', df_numerical_columns, class_name=class_name)
discrete_fuzzy_values = {col: df_train[col].unique() for col in df_categorical_columns}
fuzzy_variables = get_fuzzy_variables(fuzzy_points, discrete_fuzzy_values)
df_train_membership = get_dataset_membership(df_train, fuzzy_variables)
df_test_membership = get_dataset_membership(df_test, fuzzy_variables)

#fuzzy_element = _get_fuzzy_element(df_test_membership, 48)


In [10]:
fuzzy_variables

[FuzzyVariable(name='color', fuzzy_sets=[FuzzyContinuousSet(name='0.0', fuzzy_points=[0.0, 0.0, 6.0]), FuzzyContinuousSet(name='6.0', fuzzy_points=[0.0, 6.0, 7.0]), FuzzyContinuousSet(name='7.0', fuzzy_points=[6.0, 7.0, 15.0]), FuzzyContinuousSet(name='15.0', fuzzy_points=[7.0, 15.0, 19.0]), FuzzyContinuousSet(name='19.0', fuzzy_points=[15.0, 19.0, 20.0]), FuzzyContinuousSet(name='20.0', fuzzy_points=[19.0, 20.0, 29.0]), FuzzyContinuousSet(name='29.0', fuzzy_points=[20.0, 29.0, 45.0]), FuzzyContinuousSet(name='45.0', fuzzy_points=[29.0, 45.0, 45.0])]),
 FuzzyVariable(name='bitterness', fuzzy_sets=[FuzzyContinuousSet(name='10.0', fuzzy_points=[10.0, 10.0, 19.0]), FuzzyContinuousSet(name='19.0', fuzzy_points=[10.0, 19.0, 24.0]), FuzzyContinuousSet(name='24.0', fuzzy_points=[19.0, 24.0, 33.0]), FuzzyContinuousSet(name='33.0', fuzzy_points=[24.0, 33.0, 36.0]), FuzzyContinuousSet(name='36.0', fuzzy_points=[33.0, 36.0, 63.0]), FuzzyContinuousSet(name='63.0', fuzzy_points=[36.0, 63.0, 250.0])

In [11]:
all_classes_dict

{1.0: 'Blanche',
 2.0: 'Lager',
 3.0: 'Pilsner',
 4.0: 'IPA',
 5.0: 'Stout',
 6.0: 'Barleywine',
 7.0: 'Porter',
 8.0: 'Belgian-Strong-Ale'}

In [12]:
y_train

0      Barleywine
1           Lager
2      Barleywine
3      Barleywine
4         Blanche
          ...    
355       Blanche
356         Lager
357           IPA
358       Blanche
359       Pilsner
Name: beer_style, Length: 360, dtype: object

In [13]:
new_fdt = FDT(df_train_membership.keys())
new_fdt.fit(df_train_membership, y_train.to_numpy())
rules = new_fdt.to_rule_based_system()

In [14]:
membership_function_names = ['MF0', 'MF1', 'MF2', 'MF3', 'MF4', 'MF5', 'MF6', 'MF7', 'MF8', 'MF9']
xml_name = 'POTATO'

In [15]:
df_train_membership

{'color': {'0.0': array([0.        , 0.        , 0.        , 0.        , 0.5       ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.5       , 0.        , 0.        , 0.5       , 0.33333333,
         0.        , 0.        , 0.        , 0.        , 0.5       ,
         0.5       , 0.        , 0.5       , 0.33333333, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.66666667,
         0.        , 0.        , 0.66666667, 0.        , 0.        ,
         0.        , 0.33333333, 0.        , 0.        , 0.        ,
         0.66666667, 0.        , 0.        , 0.        , 0.        ,
         0.33333333, 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.5       , 0.        , 0.        ,
         0.        , 0.        , 0.66666667, 0.33333333, 0.        ,
         0.       

In [16]:
fuzzy_variables[0]

FuzzyVariable(name='color', fuzzy_sets=[FuzzyContinuousSet(name='0.0', fuzzy_points=[0.0, 0.0, 6.0]), FuzzyContinuousSet(name='6.0', fuzzy_points=[0.0, 6.0, 7.0]), FuzzyContinuousSet(name='7.0', fuzzy_points=[6.0, 7.0, 15.0]), FuzzyContinuousSet(name='15.0', fuzzy_points=[7.0, 15.0, 19.0]), FuzzyContinuousSet(name='19.0', fuzzy_points=[15.0, 19.0, 20.0]), FuzzyContinuousSet(name='20.0', fuzzy_points=[19.0, 20.0, 29.0]), FuzzyContinuousSet(name='29.0', fuzzy_points=[20.0, 29.0, 45.0]), FuzzyContinuousSet(name='45.0', fuzzy_points=[29.0, 45.0, 45.0])])

In [17]:
var_mapping = {
    'color': 'Color',
    'strength': 'Strength',
    'bitterness': 'Bitterness',
    'beer_style': 'Beer-Style'
}

In [18]:
def get_fuzzy_variables_xml(fuzzy_variables, all_classes, class_name):
    kb = e.Element("knowledgeBase")
    mf_names = {}
    for fv in fuzzy_variables:
        attrs = {}
        attrs['name'] = var_mapping[fv.name]
        attrs['scale'] = ''
        attrs['domainleft'] = str(fv.fuzzy_sets[0].fuzzy_points[0])
        attrs['domainright'] = str(fv.fuzzy_sets[-1].fuzzy_points[-1])
        attrs['type'] = 'input'
        ffv = e.SubElement(kb, "fuzzyVariable", attrib=attrs)
        mf_names[fv.name] = {}
        for i, fs in enumerate(fv.fuzzy_sets):
            ft_attrs = {}
            ft_attrs['name'] = membership_function_names[i]
            mf_names[fv.name][fs.name] = membership_function_names[i]
            ft_attrs['complement'] = 'false'
            ft = e.SubElement(ffv, "fuzzyTerm", attrib=ft_attrs)
            ts_attrs = {}
            ts_attrs['param1'] = str(fs.fuzzy_points[0])
            ts_attrs['param2'] = str(fs.fuzzy_points[1])
            ts_attrs['param3'] = str(fs.fuzzy_points[1])
            ts_attrs['param4'] = str(fs.fuzzy_points[2])
            e.SubElement(ft, "trapezoidShape", attrib=ts_attrs)

    attrs = {}
    attrs['name'] = var_mapping[class_name]
    attrs['scale'] = ''
    attrs['domainleft'] = '1.0'
    attrs['domainright'] = str(float(len(all_classes)))
    attrs['type'] = 'output'
    attrs['accumulation'] = 'MAX'
    attrs['defuzzifier'] = 'MOM'
    attrs['defaultValue'] = '1.0'
    cv = e.SubElement(kb, "fuzzyVariable", attrib=attrs)

    ft_attrs = {}
    ft_attrs['name'] = all_classes[0]
    ft_attrs['complement'] = 'false'
    cft = e.SubElement(cv, "fuzzyTerm", attrib=ft_attrs)
    ts_attrs = {}
    ts_attrs['param1'] = str("1.0")
    ts_attrs['param2'] = str("1.0")
    ts_attrs['param3'] = str("2.0")
    e.SubElement(cft, "triangleShape", attrib=ts_attrs)

    for i, cl in enumerate(all_classes[1:-1]):
        ft_attrs = {}
        ft_attrs['name'] = cl
        ft_attrs['complement'] = 'false'
        cft = e.SubElement(cv, "fuzzyTerm", attrib=ft_attrs)
        ts_attrs = {}
        ts_attrs['param1'] = str(float(i+1))
        ts_attrs['param2'] = str(float(i+2))
        ts_attrs['param3'] = str(float(i+3))
        e.SubElement(cft, "triangleShape", attrib=ts_attrs)
    
    ft_attrs = {}
    ft_attrs['name'] = all_classes[-1]
    ft_attrs['complement'] = 'false'
    cft = e.SubElement(cv, "fuzzyTerm", attrib=ft_attrs)
    ts_attrs = {}
    ts_attrs['param1'] = str(float(len(all_classes)-1))
    ts_attrs['param2'] = str(float(len(all_classes)))
    ts_attrs['param3'] = str(float(len(all_classes)))
    e.SubElement(cft, "triangleShape", attrib=ts_attrs)
    
    return kb, mf_names

In [19]:
def get_rules_xml(rule_list, class_name, mf_names):
    rb_attrs = {}
    rb_attrs['name'] = 'rulebase'
    rb_attrs['activationMethod'] = 'MIN'
    rb_attrs['andMethod'] = 'MIN'
    rb_attrs['orMethod'] = 'MAX'
    rb = e.Element("mamdaniRuleBase", attrib=rb_attrs)

    for i, rule in enumerate(rule_list):
        r_attrs = {}
        r_attrs['name'] = f'rule{i+1}'
        r_attrs['andMethod'] = 'MIN'
        r_attrs['connector'] = 'and'
        r_attrs['weight'] = str(rule.weight)
        r = e.SubElement(rb, "rule", r_attrs)
        ante = e.SubElement(r, 'antecedent')
        for var, term in rule.antecedent:
            clause = e.SubElement(ante, 'clause')
            xml_var = e.SubElement(clause, 'variable')
            xml_var.text = var_mapping[var]
            xml_term = e.SubElement(clause, 'term')
            xml_term.text = mf_names[var][term]
        
        conse = e.SubElement(r, 'consequent')
        then = e.SubElement(conse, 'then')
        clause = e.SubElement(then, 'clause')
        xml_var = e.SubElement(clause, 'variable')
        xml_var.text = var_mapping[class_name]
        xml_term = e.SubElement(clause, 'term')
        xml_term.text = rule.consequent
    
    return rb
    

In [20]:
fv_tree, mf_names = get_fuzzy_variables_xml(fuzzy_variables, all_classes, class_name)
rules_tree = get_rules_xml(rules, class_name, mf_names)

In [21]:
root_attrs = {}
root_attrs['xmlns'] = 'http://www.ieee1855.org'
root_attrs['name'] = xml_name
root = e.Element("fuzzySystem", attrib=root_attrs)
root.append(fv_tree)
root.append(rules_tree)

In [22]:
a = e.ElementTree(root)
a.write('test.xml', xml_declaration=True, encoding='UTF8')

In [23]:
all_classes_dict

{1.0: 'Blanche',
 2.0: 'Lager',
 3.0: 'Pilsner',
 4.0: 'IPA',
 5.0: 'Stout',
 6.0: 'Barleywine',
 7.0: 'Porter',
 8.0: 'Belgian-Strong-Ale'}

In [41]:
for i in range(len(df_test)):
    fuzzy_element = _get_fuzzy_element(df_test_membership, i)
    new_fdt_predict = new_fdt.predict(fuzzy_element)[0]
    mr_f = mr_factual(fuzzy_element, rules, new_fdt_predict)
    if len(mr_f) > 1:
        print(f'Index {i} has {len(mr_f)} rules')
        print(fuzzy_element)
        print(X_test.loc[i])
        print(mr_f)
        print(mr_f[0].matching(fuzzy_element) * mr_f[0].weight)

Index 27 has 2 rules
{'color': {'0.0': 0.0, '6.0': 0.0, '7.0': 0.0, '15.0': 0.0, '19.0': 0.0, '20.0': 0.1111111111111111, '29.0': 0.8888888888888888, '45.0': 0.0}, 'bitterness': {'10.0': 0.0, '19.0': 0.2, '24.0': 0.8, '33.0': 0.0, '36.0': 0.0, '63.0': 0.0, '250.0': 0.0}, 'strength': {'0.039': 0.3749999999999999, '0.055': 0.6250000000000001, '0.068': 0.0, '0.078': 0.0, '0.092': 0.0, '0.136': 0.0}}
color         28.000
bitterness    23.000
strength       0.049
Name: 27, dtype: float64
[Rule((('color', '29.0'), ('strength', '0.039')), Porter, 0.8453729012104646), Rule((('color', '20.0'), ('strength', '0.039')), Porter, 1.0)]
0.31701483795392416




In [44]:
y_test.loc[27]

'Porter'

In [43]:
fuzzy_element = _get_fuzzy_element(df_test_membership, 27)
print(fuzzy_element)
rules[73].matching(fuzzy_element) * rules[73].weight
rules[73].consequent

{'color': {'0.0': 0.0, '6.0': 0.0, '7.0': 0.0, '15.0': 0.0, '19.0': 0.0, '20.0': 0.1111111111111111, '29.0': 0.8888888888888888, '45.0': 0.0}, 'bitterness': {'10.0': 0.0, '19.0': 0.2, '24.0': 0.8, '33.0': 0.0, '36.0': 0.0, '63.0': 0.0, '250.0': 0.0}, 'strength': {'0.039': 0.3749999999999999, '0.055': 0.6250000000000001, '0.068': 0.0, '0.078': 0.0, '0.092': 0.0, '0.136': 0.0}}


'Stout'

In [36]:
rules[73]

Rule((('color', '29.0'), ('strength', '0.055')), Stout, 0.5749603968109693)

In [None]:
MF6/MF5 & MF0

In [28]:
df_train.describe()

Unnamed: 0,color,bitterness,strength
count,360.0,360.0,360.0
mean,16.375,42.866667,0.067528
std,11.489656,29.658108,0.022739
min,0.0,10.0,0.039
25%,7.0,24.75,0.05
50%,13.0,32.0,0.055
75%,27.25,50.0,0.088
max,45.0,250.0,0.136


In [29]:
df_test.describe()

Unnamed: 0,color,bitterness,strength
count,40.0,40.0,40.0
mean,16.5,42.1,0.070675
std,12.276724,30.045094,0.025867
min,0.0,8.0,0.045
25%,5.75,23.0,0.05175
50%,11.5,32.5,0.055
75%,28.25,57.25,0.0915
max,43.0,156.0,0.131
