In [None]:
import torch
import numpy as np
import pandas as pd

from datasets.dataset import transform_dataset, kfold_dataset
from DRNet import train, DRNet

In [None]:
def load_NIPS_results():
    return pd.read_csv('NIPS_crips_additive_rules_0999_8_1.csv', index_col=0, sep=';')


def good_NIPS_datasets():
    df = load_NIPS_results()

    return list(df[df['Accuracy'] == df['Accuracy']].index)

In [4]:
# Read datasets
name = 'iris'
X, Y, X_headers, Y_headers = transform_dataset(name, method='onehot-compare', negations=False, labels='binary')
datasets = kfold_dataset(X, Y, shuffle=1)
X_train, X_test, Y_train, Y_test = datasets[0]

train_set = torch.utils.data.TensorDataset(torch.Tensor(X_train.to_numpy()), torch.Tensor(Y_train))
test_set = torch.utils.data.TensorDataset(torch.Tensor(X_test.to_numpy()), torch.Tensor(Y_test))

In [8]:
# Train DR-Net
# Default learning rate (1e-2), and_lam (1e-2), and and_lam (1e-5) usually work the best. A large epochs number is necessary for a sparse rule set i.e 10000 epochs.
net = DRNet(train_set[:][0].size(1), 50, 1)
train(net, train_set, test_set=test_set, device='cuda', lr=1e-2, epochs=1000, batch_size=5,
      and_lam=1e-2, or_lam=1e-5, num_alter=500)

Epoch: 100%|██████████| 1000/1000 [04:57<00:00,  3.36it/s, loss=-15.9, epoch accu=0.667, test accu=0.667, num rules=50, sparsity=0.859] 


In [7]:
# Get accuracy and the rule net
accu = (net.predict(np.array(X_test)) == Y_test).mean()
rules = net.get_rules(X_headers)
print(f'Accuracy: {accu}, num rules: {len(rules)}, num conditions: {sum(map(len, rules))}')

Accuracy: 0.6666666666666666, num rules: 50, num conditions: 352


In [None]:
average_rule_length = np.mean(([len(rule) for rule in rules]))
unique_conditions = set()
for rule in rules:
    for condition in rule:
        if isinstance(condition, tuple):
            condition = condition[0]
        unique_conditions.add(condition)

compelxity_score = np.log(len(rules) + len(average_rule_length) + len(unique_conditions))
print(f'Complexity score: {compelxity_score}')
print(f'Number of unique conditions: {len(unique_conditions)}')    
print(f'Average rule length: {average_rule_length}')
print(f'Number of rules: {len(rules)}')

res = pd.DataFrame(
    {'Accuracy': [accu],
     'Complexity score': [compelxity_score],
     'Average rule length': [average_rule_length],
     'Number of unique conditions': [len(unique_conditions)],
     'Number of rules': [len(rules)]},
    index=[name])
res.to_csv('results_' + name + '.csv', sep=';')

Number of unique conditions: 12
Average rule length: 7.04
Number of rules: 50
