## Automatic creator of pareto tables for multiple dataframes
*created by Aitor Ochotorena*

## Concepts

L'optimum de *Pareto* permet de diviser en deux l'ensemble de données:
- De l'un côté ceux qui sont aimeliorables (*non dominants*)
- Ceux qui ne sont pas aimeliorables et qui sont donc optimales selon les critères de sélection (*dominants*).

Dans ce deuxième cas on désigne comme *optima au sens de Paréto*.

**Example de frontière d'efficacité de Pareto:**

si les situations préférables sont celles où f1 et f2 sont les plus faibles, le point C n'est pas sur la frontière de Pareto parce qu'il est dominé par les points A et B. Les points A et B sont tous les deux efficaces.

![paretofrontiere](./img/300px-Front_pareto.svg.png)

## Application

Upload all *csv* files you wish to filter from your local system:

There are some example files you can upload in the [GitHub repository](https://github.com/aitorochotorena/Automatic-Pareto-Creator/tree/master/testPareto).

(wait until a button of 'Upload' shows, otherwise refresh the page)

In [1]:
# %matplotlib widget
# to plot the scatter matrix

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# download link

import base64
from IPython.display import HTML

# required to obtain the widgets
from ipywidgets import *
from IPython.display import display,clear_output
from io import StringIO
from ipywidgets import FileUpload, Output


In [2]:
# FUNCTION: create_download_link
# goal: create a link with the output csv file with to download

def create_download_link( df, title = "Download CSV file", filename = "data.csv"):
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)


In [3]:
# convert a database of bits in dataframe
def printing(df_bits):
    
    s=str(df_bits,'utf-8')

    data = StringIO(s) 

    df=pd.read_csv(data, sep=';')
    
    return df


In [4]:
# dominated function. variable input: reference point, dataframe and selection criterium
# function to determine the pareto optimum according to multiple conditions
# return counter=1 if dominated, else 0 for dominant point
# here args[0] is the reference point, args[1] is the dataframe and args[2] is the selection criterium

def dominated(*args):
    counter=0
#     print(len(args[0]))
    for argsDF in zip(*args[1]):# first row of dataframe 
#         print(argsDF)
        subcounter=0
        for x1,x2,x3 in zip(args[0],argsDF,args[2]):

            if x3=='max.': #if criterium is to maximize,
                if x2>x1:
#                     print('x2>x1',(x1,x2))
                    subcounter+=1
                else: 
                    None
            else:
                if x2<x1:
#                     print('x2<x1',(x1,x2))
                    subcounter+=1
                else: 
                    None
            if subcounter==len(args[0]):# if condition is respected n times, the point is dominated, counter !=0
                counter+=1
    return counter

In [5]:
def pareto (*args):
    df=args[0]
    outputV=args[1]
    outputdf=args[2]
    chars=args[3]
    description_name=args[4] #list(upload_button.value)[idx]

    df['Dominated']=0
#     print(df)

    for row in range(len(df)-1):
        if dominated(eval(outputV),eval(outputdf),chars)>0:
            df.loc[row,'Dominated']=1    

    color_wheel = {0: 'r', 
                   1: 'b'}

    colorsM = df['Dominated'].map(lambda x: color_wheel.get(x))
    
    pd.plotting.scatter_matrix(df, color=colorsM, figsize=[15,10], s=200)

    plt.suptitle('{}'.format(description_name))
    plt.tick_params(axis = 'both', labelsize = 14)
    plt.rcParams.update({'font.size': 14})
    display(create_download_link(df[df['Dominated']==0],title='"Download CSV file for {}'.format(description_name), filename='pareto.csv'))

    plt.show()

In [6]:
# FUNCTION: rename (create a string to be executed in the code later on). Input: output given in the widget
# the goal of this function is to transform the output given by the widget into a sentence which can be computed in the code
# i.e. if output of widget is ['Thrust', 'max.', 'DIAMETER', 'min.'], we create a string chain as 
# df.Thrust[row],df.DIAMETER[row] and df.Thrust,df.DIAMETER, which will be applied in the loop to calculate the pareto.

def renameVDF(mylist):
    argsV = {} # dict to save the variables with the format of df.Variable[row]
    argsDF = {}# dict to save the variables with the format of df.Variable
#     chars = {} # dict to save the criteria : maximize the variable (max.) or (min.)
#     counter=0

#     if dominated(df_pro.loc[row,Pro_Par1.value], df_pro.loc[row,Pro_Par2.value],df_pro[Pro_Par1.value].values,df_pro[Pro_Par2.value].values)>0:

    for idx,x in enumerate(mylist):
            argsV["x{0}".format(idx)] = 'df.'+mylist[idx]+'[row]'
            argsDF["x{0}".format(idx)] = 'df.'+mylist[idx]
#             counter+=1

    argsV = list(argsV.values())    
    argsDF= list(argsDF.values())  

#     chars=list(chars.values())


    argsV = ','.join(argsV)
    argsDF = ','.join(argsDF)
#     display(argsV, argsDF)
    return argsV, argsDF

In [7]:
Output2= Output()
@Output2.capture()

def display_grid(change):
#     global d_vector,e_vector
    clear_output()
    salida= widgets.Output()
    button = widgets.Button(
        description='Plot',
        disabled=False,
        button_style='info',
        tooltip='Click me',
        icon='check'
    )
    
    dfDropdownPars=[]# lists of the diffferent dataframes
    for idx,widget in enumerate(labelWidgets1):
        dfDropdownPars.append(printing(dfContents[idx]))#appends each df in a vector

    vector=[]# vector with the number of criteria per component: example =[3,2,2]: 3 criteria for motor, 2 criteria for esc, 2 criteria for other
    valueMax=0
    for a in labelWidgets1:
        vector.append(a.value)
            
    f={}
    g={}

    for idx,i in enumerate(vector):
        HorizBox=HBox(children=[])
        f["dict{}".format(idx)]={}
        g["dict{}".format(idx)]={}
        for j in range(i):          
            f['dict{0}'.format(idx)]["string{0}".format(j)] = widgets.Dropdown(
            options= dfDropdownPars[idx].columns.values,
            description='Parameter of {}'.format(list(upload_button.value)[idx]),
            style = {'description_width': '170pt'} )

            g['dict{0}'.format(idx)]["string{0}".format(j)]=widgets.RadioButtons(
            options=['max.', 'min.'],
            description='Critery: ',
            style = {'description_width': '130pt'},
            disabled=False
            )
            HorizBox.children += (VBox([f['dict{0}'.format(idx)]["string{0}".format(j)],g['dict{0}'.format(idx)]["string{0}".format(j)]]),)
            
        display(HorizBox)

        
    def on_button_clicked(b): 
        with salida:
            clear_output()
            listPars = [[] for x in range(len(dfContents))]
            listCrits = [[] for x in range(len(dfContents))]
            counter=0
            
            for k, v in f.items():
                for k1, v1 in v.items():
                    listPars[counter].append(v1.value)
                counter+=1
            
            counter=0
            for k, v in g.items():
                for k1, v1 in v.items():
                    listCrits[counter].append(v1.value)
                counter+=1

            for i in range(len(listPars)):
                pareto(dfDropdownPars[i],renameVDF(listPars[i])[0],renameVDF(listPars[i])[1],listCrits[i],list(upload_button.value)[i])
            
        

    button.on_click(on_button_clicked)
    display(button)
    display(salida)


In [10]:
sortie= Output()
@sortie.capture()

def show_content(change):
    clear_output()
    global dfContents,criteriapercomp,labelWidgets1
    dfContents = [] #save dataframes
    criteriapercomp=[] # number of criteria per comp
    labelWidgets1 = [] #save widgets

    for idx,item in enumerate(range(len(change['new'].keys()))):
            NrCrits=widgets.BoundedIntText(
                value=3,
                min=2,
                max=5,
                step=0.1,
                description='number of criteria for {}:'.format(list(upload_button.value)[idx]),
                disabled=False,
                style = {'description_width': '200pt'},
                layout = {'width': '250pt'}
            )
            criteriapercomp.append(NrCrits.value)
            labelWidgets1.append(NrCrits)# save widget into a list
            uploaded_filename = list(upload_button.value)[idx]# load file title
            content = upload_button.value[uploaded_filename]['content']# load dataframe content
            dfContents.append(content)# save df into a list

    for idx,widget in enumerate(labelWidgets1):
        display(widget)
        display_grid(widget)
        widget.observe(display_grid,'value')    
    display(Output2)
        
upload_button = FileUpload(multiple=True)
display(upload_button)
upload_button.observe(show_content,'value')
display(sortie)

FileUpload(value={}, description='Upload', multiple=True)

Output()