In [1]:
import pandas as pd
from base import Timer
import line_profiler
import pickle

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [3]:
import ripper

def make_ripper_dataset(dataset_filename, random_state=42):
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # Split
    train, test = train_test_split(df, test_size=.33, random_state=random_state)
    
    return train, test

In [4]:
def make_ripper(train, class_feat, pos_class, k=2, random_state=42, verbosity=0):
    # Train
    irep_clf = ripper.RIPPER(class_feat=class_feat,pos_class=pos_class, k=k, verbosity=verbosity)
    irep_clf.fit(train, n_discretize_bins=5, seed=random_state)
    return irep_clf

In [5]:
def score_ripper(ripper_clf, test):
    X_test = test.drop(ripper_clf.class_feat,axis=1)
    y_test = test[class_feat]

    precision = ripper_clf.score(X_test, y_test, precision_score)
    recall = ripper_clf.score(X_test, y_test, recall_score)
    total_conds = ripper_clf.ruleset_.count_conds()
    return precision, recall, total_conds

In [6]:
from sklearn.tree import DecisionTreeClassifier

def make_tree_dataset(dataset_filename, class_feat, pos_class, n_classes, random_state=42):
    
    # Load df
    df = pd.read_csv(dataset_filename)
    
    # sklearn's Tree (of all things!) doesn't directly take categorical features.
    # One-hot preprocess the data to make it work.
    le = LabelEncoder()
    df_le=df.apply(le.fit_transform)
    enc = OneHotEncoder(sparse=False)
    enc.fit(df_le)
    df_hot=enc.transform(df_le)
    
    # Split
    train, test = train_test_split(df_hot, test_size=.33, random_state=random_state)
    train_X = train[:,n_classes:]
    train_y = train[:,0]
    test_X = test[:,n_classes:]
    test_y = test[:,0]
    
    return train_X, train_y, test_X, test_y

In [7]:
def make_tree(train_X, train_y, max_depth=None, random_state=42):
    tree = DecisionTreeClassifier(max_depth=max_depth, random_state=random_state)
    tree.fit(train_X, train_y)
    return tree

In [8]:
def score_tree(tree_clf, test_X, test_y):
    predictions = tree_clf.predict(test_X)
    precision = precision_score(test_y, predictions)
    recall = recall_score(test_y, predictions)
    return precision, recall, tree_clf.tree_.node_count

In [9]:
import numpy as np
import math

def run_experiment(filename, class_feat, pos_class, n_classes, verbosity=0, seed=42, n_runs=10):
    ripper_precision_list = []
    ripper_recall_list = []
    total_conds_list = []
    ripper_models_list = []
    
    tree_precision_list = []
    tree_recall_list = []
    tree_nodes_list = []
    
    for i in range(n_runs):

        print(f'ripper {i+1} of {n_runs}')

        # Set random_state
        random_state = i+seed
        
        # Run ripper
        train, test = make_ripper_dataset(filename, random_state=random_state)
        ripper_clf = make_ripper(train, class_feat, pos_class, verbosity=verbosity, random_state=random_state)
        ripper_precision, ripper_recall, total_conds = score_ripper(ripper_clf, test)
        
        print(f'tree {i+1} of {n_runs}')
        
        # Run tree
        tree_train_X, tree_train_y, tree_test_X, tree_test_y = make_tree_dataset(filename, class_feat, pos_class, n_classes, random_state=random_state)
        tree_clf = make_tree(tree_train_X, tree_train_y, random_state=random_state)
        tree_precision, tree_recall, tree_nodes = score_tree(tree_clf, tree_test_X, tree_test_y)
        
        ripper_precision_list.append(ripper_precision)
        ripper_recall_list.append(ripper_recall)
        total_conds_list.append(total_conds)
        ripper_models_list.append(ripper_clf) 

        tree_precision_list.append(tree_precision)
        tree_recall_list.append(tree_recall)
        tree_nodes_list.append(tree_nodes)
        
    r_means = (np.mean(ripper_precision_list), np.mean(ripper_recall_list), np.mean(total_conds_list))
    r_meds = (np.median(ripper_precision_list), np.median(ripper_recall_list), np.median(total_conds_list))
    t_means = (np.mean(tree_precision_list), np.mean(tree_recall_list), np.mean(tree_nodes_list))
    t_meds = (np.median(tree_precision_list), np.median(tree_recall_list), np.median(tree_nodes_list))
    print(f'ripper means {r_means}')
    print(f'ripper medians {r_meds}')
    print(f'tree means {t_means}')
    print(f'tree medians {t_meds}')
    
    max_tree_precision_list = []
    max_tree_recall_list = []
    max_tree_nodes_list = []
    
    for i in range(n_runs):
        
        print(f'max-tree {i+1} of {n_runs}')
        
        # Set random state
        random_state = i+seed
        
        # Run max_depth-specified tree
        max_depth = max(1,int(math.log2(np.median(total_conds_list)))) # limit n_nodes to approx n_conds
        tree_train_X, tree_train_y, tree_test_X, tree_test_y = make_tree_dataset(filename, class_feat, pos_class, n_classes, random_state=random_state)
        tree_clf = make_tree(tree_train_X, tree_train_y, max_depth=max_depth, random_state=random_state)
        tree_precision, tree_recall, tree_nodes = score_tree(tree_clf, tree_test_X, tree_test_y)
        tree_precision, tree_recall, tree_nodes

        #irep_precision_list.append(irep_precision)
        #irep_recall_list.append(irep_recall)
        #total_conds_list.append(total_conds)

        max_tree_precision_list.append(tree_precision)
        max_tree_recall_list.append(tree_recall)
        max_tree_nodes_list.append(tree_nodes)
    
    max_t_means = (np.mean(max_tree_precision_list), np.mean(max_tree_recall_list), np.mean(max_tree_nodes_list))
    max_t_meds = (np.median(max_tree_precision_list), np.median(max_tree_recall_list), np.median(max_tree_nodes_list))
 
    print(f'maxtree means {max_t_means}')
    print(f'maxtree medians {max_t_meds}')
    
    print()
    print(ripper_models_list)
    
    return {'r_means':r_means,
            'r_meds':r_meds,
            't_means':t_means,
            't_meds':t_meds,
            'max_t_means':max_t_means,
            'max_t_meds':max_t_meds,
            'r_models':ripper_models_list,
            'n':n_runs
           }

In [10]:
datasets_path = '../datasets/'
random_state = 42

In [None]:
# Set up
dataset = 'balance-scale.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = 'B' #???
n_classes = 3
k=2
verbosity=1
n_runs=1

In [29]:
# Set up
dataset = 'breast-cancer.csv'
filename = datasets_path + dataset
class_feat = 'Recurrence'
pos_class = 'recurrence-events'
n_classes = 2
n_runs=10

timer = Timer()
breast_cancer_results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=n_runs)
timer.stop()
pickle.dump(breast_cancer_results,open(filename.replace('.csv','.pkl'),'wb'))
breast_cancer_results

ripper 1 of 10
tree 1 of 10
ripper 2 of 10
tree 2 of 10
ripper 3 of 10
tree 3 of 10
ripper 4 of 10
tree 4 of 10
ripper 5 of 10
tree 5 of 10
ripper 6 of 10
tree 6 of 10
ripper 7 of 10
tree 7 of 10
ripper 8 of 10
tree 8 of 10
ripper 9 of 10
tree 9 of 10
ripper 10 of 10
tree 10 of 10
ripper means (0.6102525742231625, 0.310242006729118, 4.0)
ripper medians (0.6066433566433567, 0.26903225806451614, 4.0)
tree means (0.7573734254388288, 0.7323903419508938, 127.6)
tree medians (0.7573529411764706, 0.74824016563147, 128.0)
max-tree 1 of 10
max-tree 2 of 10
max-tree 3 of 10
max-tree 4 of 10
max-tree 5 of 10
max-tree 6 of 10
max-tree 7 of 10
max-tree 8 of 10
max-tree 9 of 10
max-tree 10 of 10
maxtree means (0.76902223248098, 0.9304058895554647, 7.0)
maxtree medians (0.7810485001401739, 0.9541744402985075, 7.0)


{'max_t_means': (0.76902223248098, 0.9304058895554647, 7.0),
 'max_t_meds': (0.7810485001401739, 0.9541744402985075, 7.0),
 'n': 10,
 'r_means': (0.6102525742231625, 0.310242006729118, 4.0),
 'r_meds': (0.6066433566433567, 0.26903225806451614, 4.0),
 't_means': (0.7573734254388288, 0.7323903419508938, 127.6),
 't_meds': (0.7573529411764706, 0.74824016563147, 128.0)}

In [None]:
# Set up

# Merge good, vgood
dataset = 'car.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = 'recurrence-events'
n_classes = #4, 5
random_state=30

In [None]:
# Set up
dataset = 'breast-cancer.csv'
filename = datasets_path + dataset
class_feat = 'Recurrence'
pos_class = 'recurrence-events'
n_classes = 2
random_state=30

In [38]:
# Set up
dataset = 'connect-4.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = 'win'
n_classes = 3
verbosity=2
n_runs=5

timer = Timer()
connect4 = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=n_runs)
timer.stop()
pickle.dump(connect4,open(filename.replace('.csv','.pkl'),'wb'))
connect4

ripper 1 of 5
updated dl for rule size 1: 4.2241579747998905
updated dl for rule size 2: 7.942680364767357
updated dl for rule size 3: 11.070560293584009
updated dl for rule size 4: 13.862640183419453
updated dl for rule size 5: 16.4201305410389
updated dl for rule size 6: 18.79710964463375
updated dl for rule size 7: 21.027115617291013
updated dl for rule size 8: 23.13293211968692
updated dl for rule size 9: 25.131022232519527
updated dl for rule size 10: 27.033824630984114
updated dl for rule size 11: 28.851059969264767
updated dl for rule size 12: 30.590528125204354
updated dl for rule size 13: 32.25862194855415
updated dl for rule size 14: 33.860673052123886
updated dl for rule size 15: 35.40119299814999
growing ruleset...

pos_growset 19976 pos_pruneset 9839
neg_growset 10350 neg_pruneset 5098
grew rule: [a1=o^d1=x^d2=b^c2=b^b1=o^g1=b^a3=b]
pruned rule unchanged
updated ruleset: [a1=o^d1=x^d2=b^c2=b^b1=o^g1=b^a3=b]

pos_growset 19870 pos_pruneset 9787
neg_growset 10350 neg_prunese

pos_growset 15401 pos_pruneset 7587
neg_growset 10304 neg_pruneset 5076
grew rule: [a1=o^d2=x^d3=b^a3=b^g3=b^e1=b^c3=b^g1=x]
pruned rule: [a1=o^d2=x^d3=b^a3=b^g3=b^e1=b^c3=b]
updated ruleset: ...[g1=o^g2=b^b1=o^a1=b^b2=o] V [a1=o^d2=x^d3=b^a3=b^g3=b^e1=b^c3=b]

pos_growset 15275 pos_pruneset 7524
neg_growset 10301 neg_pruneset 5074
grew rule: [c2=x^c3=b^d2=b^b1=o^b2=o^a1=b]
pruned rule: [c2=x^c3=b^d2=b^b1=o^b2=o]
updated ruleset: ...[a1=o^d2=x^d3=b^a3=b^g3=b^e1=b^c3=b] V [c2=x^c3=b^d2=b^b1=o^b2=o]

pos_growset 15087 pos_pruneset 7431
neg_growset 10296 neg_pruneset 5072
grew rule: [d1=x^d2=b^b1=o^c1=o^g1=b^a1=b^c3=b^f5=b]
pruned rule: [d1=x^d2=b^b1=o^c1=o^g1=b^a1=b^c3=b]
updated ruleset: ...[c2=x^c3=b^d2=b^b1=o^b2=o] V [d1=x^d2=b^b1=o^c1=o^g1=b^a1=b^c3=b]

pos_growset 15010 pos_pruneset 7394
neg_growset 10295 neg_pruneset 5072
grew rule: [a1=o^b1=b^d1=x^d3=x^c1=o]
pruned rule unchanged
updated ruleset: ...[d1=x^d2=b^b1=o^c1=o^g1=b^a1=b^c3=b] V [a1=o^b1=b^d1=x^d3=x^c1=o]

pos_growset 149

pos_growset 11975 pos_pruneset 5899
neg_growset 10185 neg_pruneset 5017
grew rule: [d1=x^c1=b^d2=b^a2=o^b2=b]
pruned rule: [d1=x^c1=b^d2=b^a2=o]
updated ruleset: ...[f2=x^f3=x^b1=b^c2=b^c1=x] V [d1=x^c1=b^d2=b^a2=o]

pos_growset 11873 pos_pruneset 5848
neg_growset 10181 neg_pruneset 5015
grew rule: [e1=o^c2=x^c3=b^d3=b^e3=b^f3=b^e2=b^a2=b]
pruned rule unchanged
updated ruleset: ...[d1=x^c1=b^d2=b^a2=o] V [e1=o^c2=x^c3=b^d3=b^e3=b^f3=b^e2=b^a2=b]

pos_growset 11767 pos_pruneset 5796
neg_growset 10179 neg_pruneset 5015
grew rule: [f1=o^f2=x^f3=x^c2=b^b2=b^d1=b^e2=b^g3=b]
pruned rule: [f1=o^f2=x^f3=x^c2=b^b2=b^d1=b^e2=b]
updated ruleset: ...[e1=o^c2=x^c3=b^d3=b^e3=b^f3=b^e2=b^a2=b] V [f1=o^f2=x^f3=x^c2=b^b2=b^d1=b^e2=b]

pos_growset 11690 pos_pruneset 5759
neg_growset 10178 neg_pruneset 5014
grew rule: [c3=x^c2=x^d3=b^g1=b^d1=x]
pruned rule: [c3=x^c2=x^d3=b^g1=b]
updated ruleset: ...[f1=o^f2=x^f3=x^c2=b^b2=b^d1=b^e2=b] V [c3=x^c2=x^d3=b^g1=b]

pos_growset 11530 pos_pruneset 5679
neg_grows

pos_growset 10102 pos_pruneset 4976
neg_growset 10100 neg_pruneset 4976
grew rule: [a1=o^c3=x^c4=b^b1=o^b2=b]
pruned rule: [a1=o^c3=x^c4=b^b1=o]
updated ruleset: ...[g1=o^f1=o^g2=b^b1=x^d1=b^b2=b] V [a1=o^c3=x^c4=b^b1=o]

pos_growset 10074 pos_pruneset 4962
neg_growset 10098 neg_pruneset 4975
grew rule: [a1=o^a2=o^d1=b^b1=b^c2=b^f1=b]
pruned rule unchanged
updated ruleset: ...[a1=o^c3=x^c4=b^b1=o] V [a1=o^a2=o^d1=b^b1=b^c2=b^f1=b]

pos_growset 10052 pos_pruneset 4951
neg_growset 10098 neg_pruneset 4975
grew rule: [a1=b^f1=o^e1=o^g1=b^f2=b^c1=x^e5=b]
pruned rule: [a1=b^f1=o^e1=o]
updated ruleset: ...[a1=o^a2=o^d1=b^b1=b^c2=b^f1=b] V [a1=b^f1=o^e1=o]


GREW INITIAL RULESET:
[[a1=o^d1=x^d2=b^c2=b^b1=o^g1=b^a3=b] V
[a1=o^d1=x^a2=o^e1=b^d3=b^b1=b] V
[a1=o^b1=o^g1=b^a2=b^c2=b] V
[g1=o^a1=o^b1=o^d3=b^g2=b] V
[g1=o^g2=o^d1=b^c1=x^g3=b^a2=b] V
[a1=o^d1=x^d2=b^a2=o^a4=b^g2=b] V
[g1=o^a1=o^b1=b^a2=o] V
[d1=x^d3=b^d2=x^c3=b] V
[a1=o^c2=x^c3=b^d3=b^e1=o^e4=b] V
[g1=o^b2=x^a1=o^b3=b^d2=b^f3=b^c3=b] 

KeyboardInterrupt: 

In [11]:
# Set up
dataset = 'house-votes-84.csv'
filename = datasets_path + dataset
class_feat = 'Party'
pos_class = 'democrat'
n_classes = 2
verbosity=0
n_runs=10

timer = Timer()
congress_results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=n_runs)
timer.stop()
pickle.dump(congress_results,open(filename.replace('.csv','.pkl'),'wb'))
congress_results

ripper 1 of 10
tree 1 of 10
ripper 2 of 10
tree 2 of 10
ripper 3 of 10
tree 3 of 10
ripper 4 of 10
tree 4 of 10
ripper 5 of 10
tree 5 of 10
ripper 6 of 10
tree 6 of 10
ripper 7 of 10
tree 7 of 10
ripper 8 of 10
tree 8 of 10
ripper 9 of 10
tree 9 of 10
ripper 10 of 10
tree 10 of 10
ripper means (0.9822343858411969, 0.8574556909273443, 2.9)
ripper medians (0.9869275461380724, 0.8539714151827553, 3.0)
tree means (0.9541566437840467, 0.9564409277290078, 39.8)
tree medians (0.9612237420615535, 0.9662878787878788, 40.0)
max-tree 1 of 10
max-tree 2 of 10
max-tree 3 of 10
max-tree 4 of 10
max-tree 5 of 10
max-tree 6 of 10
max-tree 7 of 10
max-tree 8 of 10
max-tree 9 of 10
max-tree 10 of 10
maxtree means (0.9798652374805726, 0.9582411320379064, 3.0)
maxtree medians (0.9826122672508215, 0.9562828475871954, 3.0)

[<RIPPER object fit ruleset=[physician-fee-freeze=n]>, <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^mx-missile=y] V [education-spending=n^physici

{'max_t_means': (0.9798652374805726, 0.9582411320379064, 3.0),
 'max_t_meds': (0.9826122672508215, 0.9562828475871954, 3.0),
 'n': 10,
 'r_means': (0.9822343858411969, 0.8574556909273443, 2.9),
 'r_meds': (0.9869275461380724, 0.8539714151827553, 3.0),
 'r_models': [<RIPPER object fit ruleset=[physician-fee-freeze=n]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^mx-missile=y] V [education-spending=n^physician-fee-freeze=?]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [adoption-of-the-budget-resolution=y^anti-satellite-test-ban=n^Water-project-cost-sharing=y] V [synfuels-corporation-cutback=y^mx-missile=y]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^adoption-of-the-budget-resolution=y^anti-satellite-test-ban=n]>,
  <RIPPER object fit ruleset=[physician-fee-freeze=n] V [synfuels-corporation-cutback=y^adoption-of-the-budget-resolution=y]>,
  <RIPPER object fit ruleset=[physician-fee-freez

In [13]:
congress_results['r_models'][0].ruleset_

<Ruleset object: [physician-fee-freeze=n]>

In [None]:
timer = Timer()
results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=1)
pickle.dump(results,open(filename.replace('.csv','.pkl'),'wb'))
timer.buzz()

In [56]:
# Set up
dataset = 'kr-vs-kp.csv'
filename = datasets_path + dataset
class_feat = 'won/lost'
pos_class = 'won'
n_classes = 2
k=2
verbosity=1
n_runs=5

timer = Timer()
chess_results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=n_runs)
pickle.dump(chess_results,open(filename.replace('.csv','.pkl'),'wb'))
timer.stop()
chess_results

ripper 1 of 5

GREW INITIAL RULESET:
[[10=f^33=f^35=t^6=t] V
[10=f^33=f^35=t^23=f^13=l^15=n] V
[15=w^31=t^1=f^10=f] V
[10=f^33=f^32=f^15=w] V
[7=f^18=f^26=f^27=f^2=f] V
[7=f^18=f^33=t^2=t^36=t] V
[7=f^31=t^15=w^1=f] V
[7=f^10=f^33=f^35=t^15=n^23=f] V
[8=f^1=t^11=t^34=t^33=f] V
[8=f^18=f^7=f^2=f^10=t] V
[7=f^31=t^18=f^24=f^20=f] V
[8=f^32=f^33=f^10=f^6=t] V
[7=f^32=f^8=f^5=t^10=t] V
[7=f^10=f^34=f^16=f^32=f^27=f^4=f^33=t^30=f^3=f^20=f] V
[7=f^32=f^6=f^17=t^12=t^10=f] V
[34=t^35=t^33=f^10=f^15=n^7=t] V
[7=f^32=f^10=f^18=f^31=t^9=t] V
[7=f^2=f^8=f^24=f^10=t^36=t^33=t] V
[7=f^17=t^33=f^10=f] V
[8=f^36=t^13=l^5=f^18=t^12=f^34=t^23=f^26=f^2=f^10=t] V
[7=f^32=f^31=t^22=t] V
[7=f^36=t^13=l^10=f^9=t^26=t^24=f] V
[36=t^30=t^7=f^34=f^6=t] V
[26=f^34=t^33=f^6=f^5=t^24=t]]

optimization run 1 of 2

OPTIMIZED RULESET:
[[10=f^33=f^35=t^6=t] V
[10=f^33=f^35=t^18=f] V
[15=w^31=t^1=f] V
[10=f^33=f^32=f^15=w] V
[7=f^18=f^36=t^27=f^24=f^10=f] V
[7=f^18=f^33=t^2=t^36=t] V
[7=f^31=t^15=w^1=f] V
[33=f^10=f^3


OPTIMIZED RULESET:
[[33=f^10=f^35=t^6=t] V
[33=f^10=f^35=t^15=n^23=f] V
[15=w^31=t^1=f] V
[10=f^11=t^15=w^33=f] V
[7=f^18=f^10=f^4=f^24=f] V
[7=f^18=f^2=f^9=f^27=f^35=t^24=f] V
[7=f^17=t^10=f^33=f] V
[7=f^18=f^10=f^27=f^33=t^16=f] V
[8=f^1=t^11=t^33=f^14=f] V
[7=f^18=f^10=f^27=f^35=t^16=f] V
[9=f^26=f^10=t^33=t^36=t^7=f] V
[7=f^18=f^10=f^27=f^35=t^16=f] V
[9=f^35=t^2=f^7=f^10=t^5=t] V
[5=f^6=t^16=f^10=f^33=f] V
[9=f^7=f^2=f^18=f^10=t] V
[5=f^33=t^10=f^11=f^13=l^26=t^9=t] V
[9=f^33=t^18=f^7=f^4=f^10=f^27=f^16=f^20=f] V
[9=f^5=f^33=t^10=f^27=f^16=f^34=f^3=f^12=f^7=f] V
[35=t^34=t^5=f^10=f^13=l^9=t^36=t^24=t] V
[9=f^35=t^12=t^10=f^36=n^26=f^13=l]]

GREW FINAL RULES
[[33=f^10=f^35=t^6=t] V
[33=f^10=f^35=t^15=n^23=f] V
[15=w^31=t^1=f] V
[10=f^11=t^15=w^33=f] V
[7=f^18=f^10=f^4=f^24=f] V
[7=f^18=f^2=f^9=f^27=f^35=t^24=f] V
[7=f^17=t^10=f^33=f] V
[7=f^18=f^10=f^27=f^33=t^16=f] V
[8=f^1=t^11=t^33=f^14=f] V
[7=f^18=f^10=f^27=f^35=t^16=f] V
[9=f^26=f^10=t^33=t^36=t^7=f] V
[7=f^18=f^10=f^27=f^35

FINAL RULESET:
[[33=f^10=f^35=t^6=t] V
[33=f^10=f^35=t^15=n^23=f] V
[15=w^10=f^11=t^27=f] V
[18=f^7=f^10=f^24=f^20=f^27=f] V
[7=f^18=f^6=f^9=t^2=t] V
[8=f^18=f^15=w^31=t] V
[8=f^33=f^32=f^1=t^14=f] V
[7=f^35=t^9=f^36=t^10=t^33=t] V
[7=f^17=t^10=f^33=f] V
[7=f^15=w^33=t^8=f] V
[8=f^33=f^6=t^10=f] V
[7=f^35=t^5=t^10=t^8=f] V
[31=t^22=t^33=t^12=f^7=f] V
[6=t^33=f^10=f]]

tree 5 of 5
ripper means (0.9639305558397989, 0.8230942165066917, 60.0)
ripper medians (0.9651639344262295, 0.8456014362657092, 64.0)
tree means (0.9733270743264821, 0.9658035812497255, 279.8)
tree medians (0.9717698154180239, 0.9692144373673036, 279.0)
max-tree 1 of 5
max-tree 2 of 5
max-tree 3 of 5
max-tree 4 of 5
max-tree 5 of 5
maxtree means (0.9452411293887681, 0.9892874726299489, 53.4)
maxtree medians (0.9473684210526315, 0.9893617021276596, 53.0)

[<IREP object fit ruleset=[10=f^33=f^35=t^6=t] V [10=f^33=f^35=t^18=f] V [15=w^31=t^1=f] V [10=f^33=f^32=f^15=w] V [7=f^18=f^36=t^27=f^24=f^10=f] V [33=f^10=f^35=t^15=n^2

{'max_t_means': (0.9452411293887681, 0.9892874726299489, 53.4),
 'max_t_meds': (0.9473684210526315, 0.9893617021276596, 53.0),
 'n': 5,
 'r_means': (0.9639305558397989, 0.8230942165066917, 60.0),
 'r_meds': (0.9651639344262295, 0.8456014362657092, 64.0),
 'r_models': [<IREP object fit ruleset=[10=f^33=f^35=t^6=t] V [10=f^33=f^35=t^18=f] V [15=w^31=t^1=f] V [10=f^33=f^32=f^15=w] V [7=f^18=f^36=t^27=f^24=f^10=f] V [33=f^10=f^35=t^15=n^23=f] V [8=f^1=t^11=t^34=t^33=f] V [7=f^31=t^22=t^12=f^24=t] V [9=f^18=f^2=f^10=t] V [7=f^18=f^2=f^26=f^9=f^16=f^27=f] V [7=f^32=f^8=f^5=t^10=t] V [7=f^10=f^34=f^16=f^32=f^27=f^4=f^33=t^30=f] V [7=f^17=t^33=f^10=f] V [6=t^33=f^10=f]>,
  <IREP object fit ruleset=[33=f^10=f^35=t^6=t] V [10=f^33=f^35=t^13=l^18=f] V [10=f^33=f^32=f^15=w] V [7=f^10=f^33=f^35=t^15=n^23=f] V [18=f^7=f^33=t^16=f^27=f^10=f] V [31=t^15=w] V [8=f^11=t^1=t^33=f] V [7=f^17=t^10=f^33=f] V [18=f^7=f^33=t^24=f^36=t^27=f]>,
  <IREP object fit ruleset=[33=f^10=f^35=t^6=t] V [33=f^10=f^35=t^1

In [57]:
len(chess_results['r_models'])

5

In [35]:
# Set up
dataset = 'mushroom.csv'
filename = datasets_path + dataset
class_feat = 'Poisonous/Edible'
pos_class = 'p'
n_classes = 2
k=2
verbosity=1
n_runs=10

timer = Timer()
mushroom_results = run_experiment(filename, class_feat, pos_class, n_classes, verbosity=verbosity, seed=42, n_runs=n_runs)
timer.stop()
pickle.dump(mushroom_results,open(filename.replace('.csv','.pkl'),'wb'))
mushroom_results

ripper 1 of 10

GREW INITIAL RULESET:
[[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Sport-print-color=w^Gill-spacing=c] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^Cap-surface=s] V
[Gill-size=n^Cap-surface=s^Stalk-shape=e] V
[Gill-size=n^Habitat=g] V
[Gill-size=n^Sport-print-color=k^Stalk-root=b] V
[Population=v^Stalk-shape=e^Bruises?=t^Cap-shape=b] V
[Gill-size=n^Cap-surface=y^Bruises?=t] V
[Gill-size=n^Stalk-root=c] V
[Population=v^Stalk-shape=e^Stalk-root=b^Stalk-color-below-ring=w^Cap-surface=f] V
[Ring-number=t^Population=v^Cap-shape=f^Gill-color=g] V
[Ring-number=t^Population=v^Habitat=g] V
[Habitat=m^Cap-shape=f] V
[Habitat=l^Cap-color=w]]

optimization run 1 of 2

OPTIMIZED RULESET:
[[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Sport-print-color=w^Gill-spacing=c] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^Cap-surface=s] V
[Gill-size=n^Cap-surface=s^Stalk-shape=e] V
[Gill-size=n^Sport-print-color=k^Stalk-root=b] V
[Gill-size=n^Stalk-s

GREW FINAL RULES
[[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Stalk-root=?^Stalk-shape=t] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^Cap-surface=s] V
[Gill-size=n^Cap-surface=s^Stalk-shape=e] V
[Gill-size=n^Habitat=g] V
[Population=v^Stalk-shape=e^Bruises?=t] V
[Gill-size=n^Stalk-root=b^Gill-spacing=c] V
[Gill-size=n^Population=c] V
[Gill-size=n^Cap-color=p] V
[Gill-size=n^Gill-color=u^Cap-surface=f] V
[Gill-size=n^Cap-color=g^Gill-spacing=w] V
[Gill-color=g^Stalk-root=b] V
[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Stalk-root=?^Stalk-shape=t] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^Cap-surface=s] V
[Gill-size=n^Cap-surface=s^Stalk-shape=e] V
[Gill-size=n^Habitat=g] V
[Population=v^Stalk-shape=e^Bruises?=t] V
[Gill-size=n^Stalk-root=b^Gill-spacing=c] V
[Gill-size=n^Population=c]]

FINAL RULESET:
[[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Stalk-root=?^Stalk-shape=t] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^C

FINAL RULESET:
[[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Sport-print-color=w^Gill-spacing=c] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^Cap-surface=s] V
[Gill-size=n^Sport-print-color=k^Bruises?=t] V
[Gill-size=n^Sport-print-color=n^Stalk-shape=e^Bruises?=t] V
[Population=v^Stalk-shape=e^Stalk-root=b^Stalk-color-below-ring=w] V
[Habitat=l^Cap-color=w] V
[Stalk-surface-above-ring=y^Cap-color=y]]

tree 8 of 10
ripper 9 of 10

GREW INITIAL RULESET:
[[Stalk-surface-above-ring=k^Gill-spacing=c] V
[Gill-size=n^Sport-print-color=w^Gill-spacing=c] V
[Gill-size=n^Population=s] V
[Sport-print-color=h^Cap-surface=s] V
[Gill-size=n^Habitat=g] V
[Gill-size=n^Cap-surface=s^Stalk-shape=e] V
[Gill-size=n^Cap-surface=y^Bruises?=t] V
[Population=v^Ring-number=t^Cap-shape=b] V
[Gill-size=n^Population=c] V
[Gill-size=n^Cap-shape=x^Stalk-root=b^Bruises?=f^Cap-surface=f] V
[Ring-number=t^Population=v^Habitat=g] V
[Habitat=m^Cap-shape=f]]

optimization run 1 of 2

OPTIMIZED RULESET

{'max_t_means': (0.9961124907561201, 0.9904823381289631, 18.4),
 'max_t_meds': (0.9992519626887717, 0.9869280976811692, 19.0),
 'n': 10,
 'r_means': (0.9985686906328078, 0.9997701149425288, 28.8),
 'r_meds': (1.0, 1.0, 28.5),
 'r_models': [<IREP object fit ruleset=[Stalk-surface-above-ring=k^Gill-spacing=c] V [Gill-size=n^Sport-print-color=w^Gill-spacing=c] V [Gill-size=n^Population=s] V [Sport-print-color=h^Cap-surface=s] V [Gill-size=n^Cap-surface=s^Stalk-shape=e] V [Gill-size=n^Sport-print-color=k^Stalk-root=b] V [Gill-size=n^Stalk-shape=e^Bruises?=t] V [Ring-number=t^Population=v^Cap-shape=b] V [Gill-size=n^Population=c] V [Population=v^Stalk-shape=e^Stalk-root=b^Stalk-color-below-ring=w^Cap-surface=f] V [Ring-number=t^Population=v^Habitat=g] V [Habitat=m^Cap-shape=f]>,
  <IREP object fit ruleset=[Stalk-surface-above-ring=k^Gill-spacing=c] V [Gill-size=n^Sport-print-color=w^Gill-spacing=c] V [Gill-size=n^Population=s] V [Sport-print-color=h^Cap-surface=s] V [Gill-size=n^Bruises?=t^

In [None]:
# Set up

# Recommend and very recommend

dataset = 'nursery.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = 'p'
n_classes = 2
k=2
verbosity=5
n_runs=1

In [None]:
# Set up

# Has test and train

dataset = 'SPECT.csv'
filename = datasets_path + dataset
class_feat = 'class'
pos_class = '1'
n_classes = 2
k=2
verbosity=5
n_runs=1

In [None]:
# Set up

# Has test and train

dataset = 'tic-tac-toe.csv'
filename = datasets_path + dataset
class_feat = 'Class'
pos_class = 'positive'
n_classes = 2
k=2
verbosity=5
n_runs=1

In [48]:
mushrooms = pd.read_csv(datasets_path+'mushroom.csv')
mushrooms.head()

Unnamed: 0,Poisonous/Edible,Cap-shape,Cap-surface,Cap-color,Bruises?,Odor,Gill-attachment,Gill-spacing,Gill-size,Gill-color,...,Stalk-surface-below-ring,Stalk-color-above-ring,Stalk-color-below-ring,Veil-type,Veil-color,Ring-number,Ring-type,Sport-print-color,Population,Habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [55]:
mushrooms = pd.read_csv(datasets_path+'mushroom.csv')
mushrooms['Gill-size']=mushrooms['Gill-size'].map(lambda x:1 if x=='b' else 0)
mushrooms['Poisonous/Edible']=mushrooms['Poisonous/Edible'].map(lambda x:1 if x=='p' else 0)
mushrooms = mushrooms[['Gill-size','Poisonous/Edible']].dropna()
mushrooms['Gill-size'].corr(mushrooms['Poisonous/Edible'])


-0.5400243574330186