In [2]:
# dataframe management
import pandas as pd
import math
import numpy as np
import sklearn
import json
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from functools import reduce # Valid in Python 2.6+, required in Python 3
import operator
from pyomo.environ import *
from pyomo.opt import SolverFactory

infinity = float('inf')


#model = AbstractModel()

# Preprocessing of dataset
Let's load the Iris dataset:

In [3]:
iris = pd.read_csv('IrisCategorical.csv')
iris = iris.drop('Id', axis=1)
iris_std = iris.copy()
iris.head(5)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
scaler = MinMaxScaler() # also MaxAbsScaler()

In [5]:
#Preprocessing: we get the columns names of features which have to be standardized
columns_names = list(iris)
index_features = list(range(0, 4))
index_instances = list(range(0,150))

#The name of the classes K
classes = iris_std['Species'].unique().tolist()
classes_en = [i for i in range(len(classes))] 

#Encoder processing
le = preprocessing.LabelEncoder()
le.fit(iris_std['Species'])

iris_std['Species'] = le.transform(iris_std['Species']) 

#Scaling phase
iris_std[columns_names[0:4]] = scaler.fit_transform(iris_std[columns_names[0:4]])

iris_std.head(1)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,0.222222,0.625,0.067797,0.041667,0


In [6]:
BF_in_NL_R = {4:[],5:[2],6:[1],7:[1,3]}
BF_in_NL_L = {4:[1,2],5:[1],6:[3],7:[]}
I_in_k = {i : list(iris_std[iris_std['Species']== i].index) for i in range(len(classes))}
my_W = {(i,j): 0.5 if i != j else 0 for i in classes_en for j in classes_en}

x = iris_std.loc[:, columns_names].values
y = iris_std.loc[:,['Species']].values
my_x = {(i,j): x[i,j] for i in index_instances for j in index_features}
print(type([2]))
print(type(I_in_k[0]))

<class 'list'>
<class 'list'>


In [7]:
def B_in_NR(model, i):
    if i==4:
        return []
    elif i==5:
        return [2]
    elif i==6:
        return [1]
    elif i==7:
        return [1,3]
def B_in_NL(model, i):
    if i==4:
        return [1,2]
    elif i==5:
        return [1]
    elif i==6:
        return [3]
    elif i==7:
        return []

def I_k(model,i):
    if i==0:
        return I_in_k[0]
    elif i==1:
        return I_in_k[1]
    elif i==2:
        return I_in_k[2]

# Model definition
We initialize the __model__ and the sets K, N_L, N_B, I, I_k, N_L_L, N_L_R and f_s are declared abstractly using the Set component:

In [43]:
model = ConcreteModel() #ConcretModel()
# Instances & Classes
# Assume a dict I_in_k, with keys k and values of a list of I's in that k

model.I = Set(initialize=set(i for k in I_in_k for i in I_in_k[k]))
model.K = Set(initialize=I_in_k.keys())
model.I_k = Set(model.K,initialize=I_k)    ##########################

# Features
model.f_s =Set(initialize=index_features)

# Nodes Leaf N_L & Nodes Breanch N_B
model.N_B = Set(initialize=set(i for k in BF_in_NL_R for i in BF_in_NL_R[k]))
model.N_L = Set(initialize=BF_in_NL_R.keys())
model.N_L_R = Set(model.N_L,initialize=B_in_NR)
model.N_L_L = Set(model.N_L,initialize=B_in_NL)

Similarly, the model parameters are defined abstractly using the __Param__ component:

In [44]:
# Cost of misclassification
model.W = Param(model.K, model.K, within=NonNegativeReals, initialize=my_W)

# Value for the instance i-th of the feature j-th
model.x = Param(model.I, model.f_s, within=PercentFraction, initialize=my_x)

The __Var__ component is used to define the decision variables:

In [45]:
# The weigths of feature j-th in breanch node t-th

#random initialization
init_a = np.random.uniform(low=-1.0, high=1.0, size=None)
init_mu = np.random.uniform(low=-1.0, high=1.0, size=None)
init_C = np.random.uniform(low=0.0, high=1.0, size=None)
init_P = np.random.uniform(low=0.0, high=1.0, size=None)
init_p = np.random.uniform(low=0.0, high=1.0, size=None)

model.a = Var(model.f_s, model.N_B, within=Reals, bounds = (-1.0,1.0),initialize=init_a)

# The intercepts of the linear combinations correspond to decision variables
model.mu = Var(model.N_B, within = Reals, bounds = (-1.0,1.0),initialize=init_mu)

# The variables thtat take into account if node t is labeled with class k
model.C = Var(model.K, model.N_L, within = PercentFraction,initialize=init_C)

# An auxiliary variables
model.P = Var(model.I,model.N_L,within = PercentFraction,initialize=init_P)
model.p = Var(model.I,model.N_B,within = PercentFraction,initialize=init_p)

Several definition of functions: tools useful to characterize the objective function

In [46]:
def my_sigmoid(a,x,mu,length,scale=512):
    l = length
    val = (sum([a[i]*x   for i, x in enumerate(x)]) / l) - mu 
    # The default value is 512 as suggested in Blanquero et Al.
    return 1 / (1 + math.exp(-scale*val))

# An easy way to manage product within elements of an iterable object
def multiply_numpy(iterable):
    return np.prod(np.array(iterable))

# Calculate the probability of an individual falling into a given leaf node:
def Prob(model,instance_idx, leaf_idx):
    left = [my_sigmoid(model.a[:,tl],model.x[instance_idx,:],model.mu[tl],4) for tl in model.N_L_L[leaf_idx] ]
    right = [1-my_sigmoid(model.a[:,tl],model.x[instance_idx,:],model.mu[tl],4) for tr in model.N_L_R[leaf_idx] ]
    
    return multiply_numpy(left)*multiply_numpy(right)
    

The __Objective component__ is used to define the cost objective. This component uses a rule function to construct the objective expression:

In [46]:
# Minimize the cost of misclassification
def cost_rule(model):
    return sum( sum( sum( model.P[i,t]* sum(model.W[k,j]*model.C[j,t] for j in model.K if k!=j)  for t in model.N_L) for i in model.I_k[k] ) for k in model.K )
model.cost = Objective(rule=cost_rule, sense=minimize)

In [47]:
# We must add the following set of constraints for making a single class prediction at each leaf node:
def Pr(model,i,tl):
    return  reduce(operator.mul,(model.p[i,t] for t in model.N_L_L[tl]),1)*reduce(operator.mul,(1-model.p[i,tr] for tr in model.N_L_R[tl]),1) == model.P[i,tl]
model.Pr = Constraint(model.I,model.N_L, rule=Pr)

def pr(model, i , tb):
    return 1 / (1 + exp(-512*(   (sum(model.x[i,j]*model.a[j,tb]for j in model.f_s)/4)-model.mu[tb]  ))) ==model.p[i,tb]
model.pr = Constraint(model.I,model.N_B, rule=pr)

Similarly, rule functions are used to define constraint expressions in the __Constraint__ component:

In [48]:
# We must add the following set of constraints for making a single class prediction at each leaf node:
def class_in_leaf(model, tl):
    return  sum(model.C[k,tl] for k in model.K) == 1
model.class_in_leaf = Constraint(model.N_L, rule=class_in_leaf)

# We force each class k to be identified by, at least, one terminal node, by adding the set of constraints below:
def leaf_in_class(model,k):
    return sum(model.C[k,tl] for tl in model.N_L) >=1
model.leaf_in_class = Constraint(model.K, rule=leaf_in_class)

In [49]:
opt = SolverFactory('ipopt',executable='C:/Users/antoc/Desktop/Ipopt-3.11.1-win64-intel13.1/bin/ipopt.exe')
# Create a model instance and optimize
#instance = model.create_instance()
results = opt.solve(model,tee=True)
#instance.display()

Ipopt 3.11.1: 

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit http://projects.coin-or.org/Ipopt
******************************************************************************

NOTE: You are using Ipopt by default with the MUMPS linear solver.
      Other linear solvers might be more efficient (see Ipopt documentation).


This is Ipopt version 3.11.1, running with linear solver mumps.

Number of nonzeros in equality constraint Jacobian...:     4485
Number of nonzeros in inequality constraint Jacobian.:       12
Number of nonzeros in Lagrangian Hessian.............:     1545

Total number of variables............................:     1077
                     variables with only lower bounds:        0
                variables with lower and upper bounds:     1077


In [39]:
print(results)
print(value(model.cost))


Problem: 
- Lower bound: -inf
  Upper bound: inf
  Number of objectives: 1
  Number of constraints: 1057
  Number of variables: 1077
  Sense: unknown
Solver: 
- Status: ok
  Message: Ipopt 3.11.1\x3a Optimal Solution Found
  Termination condition: optimal
  Id: 0
  Error rc: 0
  Time: 8.375356197357178
Solution: 
- number of solutions: 0
  number of solutions displayed: 0

0.6469934607242865


In [52]:
for i in model.mu:
    print (str(model.mu[i]), model.mu[i].value)
for i in model.P:
    print (str(model.P[i]), model.P[i].value)
for i in model.p:
    print()

mu[1] 0.06783854613367332
mu[2] -0.23606250969727835
mu[3] 0.8175275849642927
P[0,4] 6.5714894160127096e-15
P[0,5] 3.6734198463196485e-40
P[0,6] 3.776708512595959e-40
P[0,7] 0.9999999999999934
P[1,4] 3.612204558023532e-15
P[1,5] 0.0
P[1,6] 0.0
P[1,7] 0.9999999999999963
P[2,4] 3.864389456103955e-15
P[2,5] 0.0
P[2,6] 6.428484731184809e-40
P[2,7] 0.9999999999999961
P[3,4] 3.5432300013385626e-15
P[3,5] 0.0
P[3,6] 0.0
P[3,7] 0.9999999999999964
P[4,4] 6.904915837209705e-15
P[4,5] 1.8367099231598242e-40
P[4,6] 1.0328856120610461e-41
P[4,7] 0.9999999999999931
P[5,4] 1.398080347150699e-14
P[5,5] 0.0
P[5,6] 2.7550648592893018e-40
P[5,7] 0.999999999999986
P[6,4] 4.894851662167769e-15
P[6,5] 3.6734198463196485e-40
P[6,6] 0.0
P[6,7] 0.9999999999999951
P[7,4] 5.838505151490512e-15
P[7,5] 0.0
P[7,6] 7.450128527783243e-40
P[7,7] 0.9999999999999941
P[8,4] 2.544680437683392e-15
P[8,5] 3.6734198463196485e-40
P[8,6] 7.346839697046882e-40
P[8,7] 0.9999999999999974
P[9,4] 3.894993985021635e-15
P[9,5] 1.8367

P[115,5] 0.0
P[115,6] 0.0
P[115,7] 0.999999999999897
P[116,4] 7.153181710984279e-14
P[116,5] 0.0
P[116,6] 0.0
P[116,7] 0.9999999999999285
P[117,4] 5.284014599339903e-13
P[117,5] 1.8367099231598242e-40
P[117,6] 0.0
P[117,7] 0.9999999999994716
P[118,4] 1.8001806195431504e-13
P[118,5] 0.0
P[118,6] 0.0
P[118,7] 0.99999999999982
P[119,4] 1.8131432378998824e-14
P[119,5] 0.0
P[119,6] 0.0
P[119,7] 0.9999999999999819
P[120,4] 1.5016480582598013e-13
P[120,5] 0.0
P[120,6] 0.0
P[120,7] 0.9999999999998498
P[121,4] 3.454149895868421e-14
P[121,5] 1.8367099231598242e-40
P[121,6] 0.0
P[121,7] 0.9999999999999655
P[122,4] 1.7452103503015975e-13
P[122,5] 1.8367099231598242e-40
P[122,6] 0.0
P[122,7] 0.9999999999998255
P[123,4] 3.972477967177303e-14
P[123,5] 0.0
P[123,6] 0.0
P[123,7] 0.9999999999999603
P[124,4] 1.34232919446722e-13
P[124,5] 0.0
P[124,6] 0.0
P[124,7] 0.9999999999998658
P[125,4] 1.4516375010823657e-13
P[125,5] 0.0
P[125,6] 0.0
P[125,7] 0.9999999999998548
P[126,4] 4.044599023421364e-14
P[126,5

In [31]:
!pyomo solve --solver=ipopt --save-results FILE schifo.py 

[    0.00] Setting up Pyomo environment
[    0.00] Applying Pyomo preprocessing actions
[    1.27] Creating model
[    1.27] Applying solver
[    5.79] Processing results
    Number of solutions: 1
    Solution Information
      Gap: None
      Status: optimal
      Function Value: 1.3401020068962646
    Solver results file: FILE
[    5.87] Applying Pyomo postprocessing actions
[    5.87] Pyomo Finished


In [13]:
json_data=open('C:/Users/antoc/Desktop/AAA/results.json').read()

class Test(object):
    def __init__(self, data):
	    self.__dict__ = json.loads(data)

test1 = Test(json_data)
print(test1.Solution[1]['Variable'])

{'C[0,4]': {'Value': 1.0}, 'C[0,5]': {'Value': 1.0}, 'C[1,6]': {'Value': 1.0}, 'C[2,7]': {'Value': 1.0}, 'P[0,4]': {'Value': 0.49999999999999895}, 'P[0,5]': {'Value': 0.5000000000000007}, 'P[0,6]': {'Value': 3.728889295474199e-16}, 'P[1,4]': {'Value': 0.49999999999996864}, 'P[1,5]': {'Value': 0.4999999999999704}, 'P[1,6]': {'Value': 6.097512027515253e-14}, 'P[10,4]': {'Value': 0.499999999999999}, 'P[10,5]': {'Value': 0.5000000000000008}, 'P[10,6]': {'Value': 1.9802821004874176e-16}, 'P[100,7]': {'Value': 1.0}, 'P[101,6]': {'Value': 4.366486626745179e-08}, 'P[101,7]': {'Value': 0.9999999563351337}, 'P[102,6]': {'Value': 1.3288093276098035e-10}, 'P[102,7]': {'Value': 0.9999999998671191}, 'P[103,6]': {'Value': 8.77424677964913e-09}, 'P[103,7]': {'Value': 0.9999999912257532}, 'P[104,6]': {'Value': 1.0718469252108474e-13}, 'P[104,7]': {'Value': 0.9999999999998929}, 'P[105,6]': {'Value': 1.0100249983175675e-14}, 'P[105,7]': {'Value': 0.9999999999999899}, 'P[106,6]': {'Value': 3.0923831132941

Tools to manage the results:

In [24]:
dict_variable = test1.Solution[1]['Variable']

my_C = ['C['+str(i)+','+str(j)+']'for i in model.K for j in model.N_L ]
res_C = {i: dict_variable[i]['Value']  if i in dict_variable else 0 for i in my_C}

my_mu = ['mu['+str(i)+']' for i in model.N_B]
res_mu = {i: dict_variable[i]['Value'] if i in dict_variable else 0 for i in my_mu}

my_a = ['a['+str(j)+','+str(b)+']' for j in model.f_s for b in model.N_B]
res_a = {i:dict_variable[i]['Value'] if i in dict_variable else 0 for i in my_a}
print(res_a)

{'a[0,1]': -0.02222941652033611, 'a[0,2]': 0, 'a[0,3]': 0.32072580941784795, 'a[1,1]': 0.20217789821921223, 'a[1,2]': 0, 'a[1,3]': 0.10227622047451264, 'a[2,1]': -0.3840075585057884, 'a[2,2]': 0, 'a[2,3]': -1.0, 'a[3,1]': -0.6294631191491633, 'a[3,2]': 0, 'a[3,3]': -0.45937429780914}


In [70]:
model.mu.pprint()

mu : Size=3, Index=N_B
    Key : Lower : Value : Upper : Fixed : Stale : Domain
      1 :  -1.0 :     0 :   1.0 : False : False :  Reals
      2 :  -1.0 :     0 :   1.0 : False : False :  Reals
      3 :  -1.0 :     0 :   1.0 : False : False :  Reals


In [42]:
print(init_p)

0.35553771487419705
