In [1]:
import numpy as np
import pandas as pd
from pandas.io.parsers import read_csv
from BOAmodel import *
from collections import defaultdict


""" parameters """
# The following parameters are recommended to change depending on the size and complexity of the data
N = 2000      # number of rules to be used in SA_patternbased and also the output of generate_rules
Niteration = 500  # number of iterations in each chain
Nchain = 2         # number of chains in the simulated annealing search algorithm

supp = 5           # 5% is a generally good number. The higher this supp, the 'larger' a pattern is
maxlen = 3         # maxmum length of a pattern

# \rho = alpha/(alpha+beta). Make sure \rho is close to one when choosing alpha and beta. 
alpha_1 = 500       # alpha_+
beta_1 = 1          # beta_+
alpha_2 = 500         # alpha_-
beta_2 = 1       # beta_-

""" input file """
# notice that in the example, X is already binary coded. 
# Data has to be binary coded and the column name shd have the form: attributename_attributevalue
filepathX = 'tictactoe_X.txt' # input file X
filepathY = 'tictactoe_Y.txt' # input file Y
df = read_csv(filepathX,header=0,sep=" ")
Y = np.loadtxt(open(filepathY,"rb"),delimiter=" ")


lenY = len(Y)
train_index = sample(range(lenY),int(0.70*lenY))
test_index = [i for i in range(lenY) if i not in train_index]

model = BOA(df.iloc[train_index],Y[train_index])
model.generate_rules(supp,maxlen,N)
model.set_parameters(alpha_1,beta_1,alpha_2,beta_2,None,None)
rules = model.SA_patternbased(Niteration,Nchain,print_message=True)

# test
Yhat = predict(rules,df.iloc[test_index])
TP,FP,TN,FN = getConfusion(Yhat,Y[test_index])
tpr = float(TP)/(TP+FN)
fpr = float(FP)/(FP+TN)
print('TP = {}, FP = {}, TN = {}, FN = {} \n accuracy = {}, tpr = {}, fpr = {}'.format(TP,FP,TN,FN, float(TP+TN)/(TP+TN+FP+FN),tpr,fpr))


	Took 18.280s to generate 19366 rules
Screening rules using information gain


  cond_entropy = -pp*(p1*np.log(p1)+(1-p1)*np.log(1-p1))-(1-pp)*(p2*np.log(p2)+(1-p2)*np.log(1-p2))
  cond_entropy = -pp*(p1*np.log(p1)+(1-p1)*np.log(1-p1))-(1-pp)*(p2*np.log(p2)+(1-p2)*np.log(1-p2))
  cond_entropy[p2*(1-p2)==0] = -(pp*(p1*np.log(p1)+(1-p1)*np.log(1-p1)))[p2*(1-p2)==0]
  cond_entropy[p2*(1-p2)==0] = -(pp*(p1*np.log(p1)+(1-p1)*np.log(1-p1)))[p2*(1-p2)==0]
  alpha = np.exp(float(pt_new -pt_curr)/T)


	Took 3.695s to generate 2000 rules
Computing sizes for pattern space ...
	Took 0.004s to compute patternspace
No or wrong input for alpha_l and beta_l. The model will use default parameters!

** chain = 0, max at iter = 0 ** 
 accuracy = 0.5895522388059702, TP = 208.0,FP = 44.0, TN = 187.0, FN = 231.0
 pt_new is -702.9859319555703, prior_ChsRules=-23.150756822809853, likelihood_1 = -165.1480013493242, likelihood_2 = -514.6871737834363
 
['4_O_neg', '5_X', '6_X']
['3_O_neg', '1_B_neg', '5_O_neg']
[446, 1312]

** chain = 0, max at iter = 1 ** 
 accuracy = 0.6791044776119403, TP = 260.0,FP = 36.0, TN = 195.0, FN = 179.0
 pt_new is -607.5540735519471, prior_ChsRules=-23.150756822809853, likelihood_1 = -144.36109384903466, likelihood_2 = -440.0422228801026
 
['3_O_neg', '1_B_neg', '5_O_neg']
['5_O_neg', '9_O_neg', '1_O_neg']
[1312, 1860]

** chain = 0, max at iter = 2 ** 
 accuracy = 0.735820895522388, TP = 298.0,FP = 36.0, TN = 195.0, FN = 141.0
 pt_new is -556.5933205999108, prior_ChsRul


** chain = 1, max at iter = 22 ** 
 accuracy = 0.9791044776119403, TP = 439.0,FP = 14.0, TN = 217.0, FN = 0.0
 pt_new is -170.0353474994622, prior_ChsRules=-98.29722694484917, likelihood_1 = -71.37765281243537, likelihood_2 = -0.3604677421776614
 
['5_O_neg', '8_X_neg', '2_O']
['7_O_neg', '5_O_neg', '3_O_neg']
['9_X', '3_X', '6_X']
['8_X', '5_X', '2_X']
['8_X', '9_X', '7_X']
['5_X', '4_X', '6_X']
['4_X', '1_X', '7_X']
['2_X', '3_X', '1_X']
['5_X', '1_X', '9_X']
[800, 1897, 1929, 1938, 1930, 1856, 1838, 1927, 1942]

** chain = 1, max at iter = 26 ** 
 accuracy = 1.0, TP = 439.0,FP = 0.0, TN = 231.0, FN = 0.0
 pt_new is -88.76338619541866, prior_ChsRules=-87.75337345330445, likelihood_1 = -0.6302073807864872, likelihood_2 = -0.37980536132772613
 
['7_O_neg', '5_O_neg', '3_O_neg']
['8_X', '9_X', '7_X']
['5_X', '4_X', '6_X']
['4_X', '1_X', '7_X']
['5_X', '1_X', '9_X']
['8_X', '5_X', '2_X']
['9_X', '3_X', '6_X']
['2_X', '3_X', '1_X']
[1897, 1930, 1856, 1838, 1942, 1938, 1929, 1927]
TP = 18