# Decision Analysis - Project 2 #

Daniel Jankowski 148257

Mateusz Małecki 148265

## Introduction ##

In [15]:
import pandas as pd

data = pd.read_csv("DA_database.csv", decimal=",")

In [16]:
data

Unnamed: 0,name,prestige,power,price,engine_size,colour_preference,fuel_consumption,mileage,production_year,automatic_gear_box
0,Seat Ibiza 1.2 white,1,90,34900,1.2,2,6.0,90000,2015,0
1,Seat Ibiza 1.4 green,1,85,27800,1.4,4,6.9,82000,2015,0
2,Skoda Fabia grey,4,75,31500,1.0,1,5.8,215000,2019,0
3,Nissan Note grey,3,80,28500,1.2,1,6.0,133000,2014,0
4,MINI Cooper 1.6 blue,7,175,27900,1.6,5,8.0,183000,2006,0
5,Opel Corsa white,4,75,28500,1.4,2,6.0,130000,2017,0
6,Toyota Yaris 1.3 grey,5,99,27300,1.3,1,6.5,125000,2012,0
7,Citroen DS3 red,3,82,27900,1.2,5,5.0,130000,2014,0
8,Volkswagen Polo 1.2 white,5,105,37000,1.2,2,6.9,106000,2013,0
9,Toyota Yaris 1.0 white,5,69,34000,1.0,2,5.0,184000,2016,0


<table>
    <tr>
        <th>
            Criterion
        </th>
        <th>Type</th>
        <th>Discrete / Continuous</th>
        <th>Values / Range</th>
    </tr>
    <tr>
        <td>Prestige</td><td>Gain</td><td>Discrete</td><td>{1,2,3,4,5,6,7,8,9,10}</td>
    </tr>
    <tr>
        <td>Power</td><td>Gain</td><td>Continuous</td><td><69;215></td>
    </tr>
    <tr>
        <td>Price</td><td>Cost</td><td>Continuous</td><td><27,300; 40,000></td>
    </tr>
    <tr>
        <td>Engine size</td><td>Cost</td><td>Continuous</td><td><1.0; 4.2></td>
    </tr>
    <tr>
        <td>Colour preference</td><td>Gain</td><td>Discrete</td><td>{1,2,3,4,5}</td>
    </tr>
    <tr>
        <td>Fuel consumption</td><td>Cost</td><td>Continuous</td><td><2.0; 15.0></td>
    </tr>
    <tr>
        <td>Mileage</td><td>Cost</td><td>Continuous</td><td><22,100; 301,000></td>
    </tr>
    <tr>
        <td>Production year</td><td>Gain</td><td>Continuous</td><td><1977; 2020></td>
    </tr>
    <tr>
        <td>Automatic gear box</td><td>Gain</td><td>Discrete (binary in fact)</td><td>{0,1}</td>
    </tr>
</table>

## UTA Method ##

### Preferential information - pairwise comparisons ###

<b>11.</b> MINI Cooper 1.5 grey <span style="color:red"> ></span> <b>4.</b> MINI Cooper 1.6 blue <br>
<b>8.</b> Volkswagen Polo 1.2 white <span style="color:red"> ></span> <b>17.</b> Toyota Prius black <br>
<b>18.</b> Ford Fiesta blue<span style="color:red"> ></span> <b>9.</b> Toyota Yaris 1.0 white  <br>
<b>20.</b> Audi A1 red <span style="color:red"> I</span> <b>15.</b> Audi A1 black <br>
<b>0.</b> Seat Ibiza 1.2 white <span style="color:red"> I</span> <b>1.</b> Seat Ibiza 1.4 green <br>
<b>2.</b> Skoda Fabia grey <span style="color:red"> I</span> <b>3.</b> Nissan Note grey <br>

For test: 21 better than 14 <br>
        0 better than 21

In [17]:
import numpy as np
from typing import Tuple, List

NUM_CRITERIA = 9
NUM_ALTERNATIVES = 22
PREF_INFO_INDICES = [0,1,2,3,4,7, 8,9,10,11,13,14,15,17,18,20,21]

GAIN_COST_FLAG = np.array([1,1,0,0,1,0,0,1,1])

# min and max value for criterion
CRITERIA_RANGES = [
    [1, 10],
    [69, 215],
    [27300, 40000],
    [1.0, 4.2],
    [1, 5],
    [2.0, 15.0],
    [22100, 301000],
    [1977, 2020],
    [0,1]
]

In [18]:
def make_breakpoints(_range, additional_breaks: int) -> List[int]:
    """Generate N equally distributed addtional breakpoints in some range of values

    :param _range: Minimal and maximum value for a criteria
    :type _range: Tuple[int, int]
    :param additional_breaks: Number of additional breakpoints between min and max value 
    :type additional_breaks: int
    :return: List of breakpoints
    :rtype: List
    """
    step = abs(_range[0]-_range[1]) / (additional_breaks+1)
    if (isinstance(_range[0], int)):
        return [int(min(_range) + i * step) for i in range(additional_breaks + 2)]
    else:
        return [round(min(_range) + i * step, 2) for i in range(additional_breaks + 2)]
        
    

In [29]:
# number of additional breakpoints for a given criterion
NUMBER_ADD_BREAKPOINTS = [8,7,7,4,3,4,4,4,1]

In [30]:
from pulp import *

model = LpProblem(name="Ordinal_Regression", sense=LpMinimize)

# declaration of error variables (over and under estimations)
error_plus_variables = LpVariable.dicts("plus", PREF_INFO_INDICES, lowBound=0)
error_minus_variables = LpVariable.dicts("minus", PREF_INFO_INDICES, lowBound=0)

# declaration of objective function 
error_plus_sum = lpSum([error_plus_variables[i] * 1 for i in error_plus_variables])
error_minus_sum = lpSum([error_minus_variables[i] * 1 for i in error_minus_variables])

model += error_plus_sum + error_minus_sum

In [31]:
# Declare variables for breakpoints

# list which holds variables important for 
normalization_constraint_variables = []

u_variables = dict()

contraints = []

BREAKPOINTS = []

for c in range(NUM_CRITERIA):
    breakpoints = make_breakpoints(tuple(CRITERIA_RANGES[c]), NUMBER_ADD_BREAKPOINTS[c])
    variables = LpVariable.dicts("u"+str(c+1), breakpoints, lowBound=0, upBound=1)
    if GAIN_COST_FLAG[c]: # if true then criterion is gain-type:
        # constraint that the worst performance = 0
        model += (variables[min(breakpoints)] == 0)
        # save the best variable which has to take a part in normalization constraint
        normalization_constraint_variables.append(variables[max(breakpoints)]) 
    else:
        model += (variables[max(breakpoints)] == 0)
        normalization_constraint_variables.append(variables[min(breakpoints)]) 
    BREAKPOINTS.append(breakpoints)
    u_variables["u"+str(c+1)] = variables
    
normalization_expr = lpSum([var * 1 for var in normalization_constraint_variables])
model += LpConstraint(e=normalization_expr, sense=0, name="Normalization_constraint", rhs=1)

In [32]:
PREFERENCES = [
    (11, 4, "P"),
    (8, 17, "P"),
    (18, 9, "P"),
    (20, 15, "I"),
    (0, 1, "I"),
    (2, 3, "I"),
    (21, 14, "P"),
    (0, 21, "P"),
    (1, 10, "I"),
    (13, 9, "I"),
    (10, 7, "P")
]
    

In [33]:
# Add to the model all "main" constraints w.r.t. to preference information

for preference in PREFERENCES:
    alt1_inx, alt2_inx, type = preference
    alt1 = data.loc[alt1_inx, data.columns!='name'].values.flatten().tolist()
    alt2 = data.loc[alt2_inx, data.columns!='name'].values.flatten().tolist()
    for inx, (a1, a2) in enumerate(zip(alt1, alt2)):
        if a1 not in u_variables["u"+str(inx+1)]:
            # if value of the alternative for a given criteria is not present in the list already (it's not a breakpoint value), add it
            u_variables["u"+str(inx+1)][a1] = LpVariable("u"+str(inx+1)+"_"+str(a1), lowBound=0, upBound=1)
            BREAKPOINTS[inx].append(a1)
            BREAKPOINTS[inx].sort()
        if a2 not in u_variables["u"+str(inx+1)]:
            # if value of the alternative for a given criteria is not present in the list already (it's not a breakpoint value), add it
            u_variables["u"+str(inx+1)][a2] = LpVariable("u"+str(inx+1)+"_"+str(a2), lowBound=0, upBound=1)
            BREAKPOINTS[inx].append(a2)
            BREAKPOINTS[inx].sort()
    a1_variables = []
    a2_variables = []
    for inx, (a1, a2) in enumerate(zip(alt1, alt2)):
        a1_variables.append(u_variables["u"+str(inx+1)][a1])
        a2_variables.append(u_variables["u"+str(inx+1)][a2]) 
    a1_sum = lpSum([_ * 1 for _ in a1_variables])
    a2_sum = lpSum([_ * 1 for _ in a2_variables])
    if type == "P":
        model += a1_sum - error_plus_variables[alt1_inx] + error_minus_variables[alt1_inx] - a2_sum + error_plus_variables[alt2_inx] -  error_minus_variables[alt2_inx] >= 1e-04
    else:
        model += a1_sum - error_plus_variables[alt1_inx] + error_minus_variables[alt1_inx] - a2_sum + error_plus_variables[alt2_inx] -  error_minus_variables[alt2_inx] == 0

In [34]:
# Create constraints about monotonicity
for inx, points in enumerate(BREAKPOINTS):
    if GAIN_COST_FLAG[inx]:
        for i in range(len(points)-1):
            model += u_variables["u"+str(inx+1)][points[i+1]] - u_variables["u"+str(inx+1)][points[i]] >= 0
    else:
        for i in range(len(points)-1):
            model += u_variables["u"+str(inx+1)][points[i]] - u_variables["u"+str(inx+1)][points[i+1]] >= 0

In [36]:
model

Ordinal_Regression:
MINIMIZE
1*minus_0 + 1*minus_1 + 1*minus_10 + 1*minus_11 + 1*minus_13 + 1*minus_14 + 1*minus_15 + 1*minus_17 + 1*minus_18 + 1*minus_2 + 1*minus_20 + 1*minus_21 + 1*minus_3 + 1*minus_4 + 1*minus_7 + 1*minus_8 + 1*minus_9 + 1*plus_0 + 1*plus_1 + 1*plus_10 + 1*plus_11 + 1*plus_13 + 1*plus_14 + 1*plus_15 + 1*plus_17 + 1*plus_18 + 1*plus_2 + 1*plus_20 + 1*plus_21 + 1*plus_3 + 1*plus_4 + 1*plus_7 + 1*plus_8 + 1*plus_9 + 0
SUBJECT TO
_C1: u1_1 = 0

_C2: u2_69 = 0

_C3: u3_40000 = 0

_C4: u4_4.2 = 0

_C5: u5_1 = 0

_C6: u6_15.0 = 0

_C7: u7_301000 = 0

_C8: u8_1977 = 0

_C9: u9_0 = 0

Normalization_constraint: u1_10 + u2_215 + u3_27300 + u4_1.0 + u5_5 + u6_2.0
 + u7_22100 + u8_2020 + u9_1 = 1

_C10: minus_11 - minus_4 - plus_11 + plus_4 + 0 u1_7 + u2_102 - u2_175
 - u3_27900 + u3_40000 + u4_1.5 - u4_1.6 + u5_1 - u5_5 + u6_7.3 - u6_8.0
 - u7_183000 + u7_54500 - u8_2006 + u8_2018 + 0 u9_0 >= 0.0001

_C11: - minus_17 + minus_8 + plus_17 - plus_8 + 0 u1_5 + u2_105 - u2_99
 + u3

In [37]:
status = model.solve()

In [38]:
print(f"status: {model.status}, {LpStatus[model.status]}")
print(f"objective: {model.objective.value()}")

status: 1, Optimal
objective: 0.0


In [39]:
for v in model.variables():
    print(v.name, "=", v.varValue)

minus_0 = 0.0
minus_1 = 0.0
minus_10 = 0.0
minus_11 = 0.0
minus_13 = 0.0
minus_14 = 0.0
minus_15 = 0.0
minus_17 = 0.0
minus_18 = 0.0
minus_2 = 0.0
minus_20 = 0.0
minus_21 = 0.0
minus_3 = 0.0
minus_4 = 0.0
minus_7 = 0.0
minus_8 = 0.0
minus_9 = 0.0
plus_0 = 0.0
plus_1 = 0.0
plus_10 = 0.0
plus_11 = 0.0
plus_13 = 0.0
plus_14 = 0.0
plus_15 = 0.0
plus_17 = 0.0
plus_18 = 0.0
plus_2 = 0.0
plus_20 = 0.0
plus_21 = 0.0
plus_3 = 0.0
plus_4 = 0.0
plus_7 = 0.0
plus_8 = 0.0
plus_9 = 0.0
u1_1 = 0.0
u1_10 = 0.0007
u1_2 = 0.0
u1_3 = 0.0
u1_4 = 0.0
u1_5 = 0.0
u1_6 = 0.0
u1_7 = 0.0
u1_8 = 0.0
u1_9 = 0.0
u2_100 = 0.0
u2_101 = 0.0
u2_102 = 0.0
u2_105 = 0.0
u2_122 = 0.0
u2_123 = 0.0
u2_142 = 0.0
u2_160 = 0.0
u2_175 = 0.0
u2_178 = 0.0
u2_196 = 0.0
u2_200 = 0.0
u2_215 = 0.0009
u2_69 = 0.0
u2_75 = 0.0
u2_80 = 0.0
u2_82 = 0.0
u2_84 = 0.0
u2_85 = 0.0
u2_87 = 0.0
u2_90 = 0.0
u2_99 = 0.0
u3_27300 = 0.0008
u3_27800 = 0.0008
u3_27900 = 0.0
u3_28000 = 0.0
u3_28500 = 0.0
u3_28887 = 0.0
u3_29900 = 0.0
u3_30475 = 0.0
u3_

In [47]:
def create_ranking(variables, alternatives):
    var_values = dict()
    scores = []
    for v in variables:
        var_values[v.name] = v.varValue
        
    for i in alternatives:
        alt = data.loc[i, data.columns!='name'].values.flatten().tolist()
        score = 0
        for inx, a in enumerate(alt):
            score += var_values["u"+str(inx+1)+"_"+str(a)]
        scores.append(score)
    
    return scores
    
        

In [49]:
ranking_ref = create_ranking(model.variables(), PREF_INFO_INDICES)

In [50]:
for inx, score in zip(PREF_INFO_INDICES, ranking_ref):
    print(data.loc[inx].values.flatten().tolist()[0], ":", score)

Seat Ibiza 1.2 white : 0.0009
Seat Ibiza 1.4 green : 0.0009000000000000001
Skoda Fabia grey : 0.0007999999999999999
Nissan Note grey : 0.0007999999999999999
MINI Cooper 1.6 blue : 0.0
Citroen DS3 red : 0.0007999999999999999
Volkswagen Polo 1.2 white : 0.0005
Toyota Yaris 1.0 white : 0.0007999999999999999
Abarth Grande Punto white : 0.0009
MINI Cooper 1.5 grey : 0.0001
Hyundai i20 white : 0.0007999999999999999
Peugeot 208 red : 0.0007000000000000001
Audi A1 black : 0.0001
Toyota Prius black : 0.00039999999999999996
Ford Fiesta blue : 0.0009
Audi A1 red : 0.0001
Jaguar xj red : 0.0008
