In [1]:
from utils import seed

import pickle
import numpy as np

from sklearn.model_selection import train_test_split
from joblib import dump, load


import random
from teacher.datasets import load_compas, load_german, load_adult, load_heloc, load_iris, load_wine, load_beer
from teacher.fuzzy import get_fuzzy_variables, get_fuzzy_points
from teacher.tree import FDT


# %%
DATASETS = {
    'adult': load_adult,
    'compas': load_compas,
    'fico': load_heloc,
    'german': load_german,
    'iris': load_iris,
    'wine': load_wine,
    'beer': load_beer
}

In [2]:
def train_models(ds, bb, random_state):
    random.seed(random_state)
    np.random.seed(random_state)
    path_models = './models/'

    dataset = DATASETS[ds]()
    class_name = dataset['class_name']
    discrete = dataset['discrete']
    continuous = dataset['continuous']
    df = dataset['df']
    X = df.drop(class_name, axis=1)
    y = df[class_name]

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, random_state=random_state)

    X_num = X_train[continuous]
    print(f'Extracting fuzzy points...')
    fuzzy_points = get_fuzzy_points('entropy', continuous, X_num, y_train)
    cate = [col for col in discrete if col != class_name]

    discrete_fuzzy_values = {col: X_train[col].unique() for col in cate}
    fuzzy_variables_order = {col: i for i, col in enumerate(X_train.columns)}
    fuzzy_variables = get_fuzzy_variables(fuzzy_points, discrete_fuzzy_values, fuzzy_variables_order)
    print(f'Training FDT...')
    fdt = FDT(fuzzy_variables)
    fdt.fit(X_train, y_train)
    
    dump(fdt, path_models + '%s_%s.joblib' % (ds, bb))
    score = fdt.score(X_test, y_test)
    print(f'Accuracy for model {bb} and dataset {ds}: {score}')


In [3]:
def check_models(ds, bb, random_state):
    random.seed(random_state)
    np.random.seed(random_state)
    path_models = './models/'

    dataset = DATASETS[ds]()
    class_name = dataset['class_name']
    discrete = dataset['discrete']
    continuous = dataset['continuous']
    df = dataset['df']
    X = df.drop(class_name, axis=1)
    y = df[class_name]

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, random_state=random_state)
    X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test, train_size=0.75, random_state=random_state)

    
    fdt = load(path_models + '%s_%s.joblib' % (ds, bb))
    score = fdt.score(X_test, y_test)
    print(f'Accuracy for model {bb} and dataset {ds}: {score}')

In [4]:
train_models('iris', 'FDT', seed)

Extracting fuzzy points...
Training FDT...
Accuracy for model FDT and dataset iris: 0.9666666666666667


In [5]:
train_models('wine', 'FDT', seed)

Extracting fuzzy points...
Training FDT...
Accuracy for model FDT and dataset wine: 0.9166666666666666


In [6]:
train_models('beer', 'FDT', seed)

Extracting fuzzy points...
Training FDT...
Accuracy for model FDT and dataset beer: 0.43125


In [10]:
check_models('iris', 'FDT', seed)

Accuracy for model FDT and dataset iris: 1.0
