### Fase 3 - Punto 4

In [1]:
from sys import maxsize
from itertools import permutations
from math import sqrt
import random

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

In [3]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

### Importacion del Dataset 

In [4]:
# importar el archivo .csv
data = pd.read_csv('dataset.csv')
data

Unnamed: 0,embalaje,largo,ancho,alto,peso,procedencia,temperatura,manipulacion,protocolo
0,A,34.687708,51.459835,42.095164,23.857571,D,refrigerado,fragil,protocolo_1
1,A,49.464008,50.378068,38.858520,32.119336,A,ambiente,normal,protocolo_2
2,B,66.617850,52.215517,27.181471,23.359145,C,ambiente,fragil,protocolo_1
3,C,24.215506,41.945443,51.965421,20.012881,C,ambiente,normal,protocolo_2
4,C,56.804813,60.880069,56.895350,23.560809,C,ambiente,normal,protocolo_2
...,...,...,...,...,...,...,...,...,...
9995,A,60.906016,52.911421,24.009152,20.668354,C,ambiente,normal,protocolo_2
9996,C,50.839682,53.362869,49.981335,19.933259,C,ambiente,normal,protocolo_2
9997,B,64.661100,67.924710,37.296223,18.554790,D,ambiente,fragil,protocolo_1
9998,B,42.017064,72.635800,61.344982,31.943001,D,ambiente,normal,protocolo_2


### Conversion datos categoricos en numeros

In [5]:
data['embalaje'].replace(['A', 'B', 'C'], [0, 1, 2], inplace=True)
data['procedencia'].replace(['A', 'B', 'C', 'D'], [0, 1, 2, 3], inplace=True)
data['temperatura'].replace(['ambiente', 'refrigerado'], [0, 1], inplace=True)
data['manipulacion'].replace(['normal', 'fragil'], [0, 1], inplace=True)
data['protocolo'].replace(['protocolo_1', 'protocolo_2'], [1, 2], inplace=True)
data

Unnamed: 0,embalaje,largo,ancho,alto,peso,procedencia,temperatura,manipulacion,protocolo
0,0,34.687708,51.459835,42.095164,23.857571,3,1,1,1
1,0,49.464008,50.378068,38.858520,32.119336,0,0,0,2
2,1,66.617850,52.215517,27.181471,23.359145,2,0,1,1
3,2,24.215506,41.945443,51.965421,20.012881,2,0,0,2
4,2,56.804813,60.880069,56.895350,23.560809,2,0,0,2
...,...,...,...,...,...,...,...,...,...
9995,0,60.906016,52.911421,24.009152,20.668354,2,0,0,2
9996,2,50.839682,53.362869,49.981335,19.933259,2,0,0,2
9997,1,64.661100,67.924710,37.296223,18.554790,3,0,1,1
9998,1,42.017064,72.635800,61.344982,31.943001,3,0,0,2


### Creacion del modelo de Regresion Logistica

In [6]:
# evaluacion del modelo
def EvaluacionModelo(y_test, y_pred):
    pre_s = metrics.precision_score(y_test, y_pred, average='micro')
    rec_s = metrics.recall_score(y_test, y_pred, average='micro')
    f1_s = metrics.f1_score(y_test, y_pred, average='micro')
    accu_s = metrics.accuracy_score(y_test, y_pred)
    return (pre_s, rec_s, f1_s, accu_s)

In [7]:
features = data.iloc[:,:-1]
etiquetas = data.iloc[:,-1]

In [8]:
# preprocesado de los datos para re escalar los datos
scaler = StandardScaler()
# normalizacion de los datos de las caracteristicas
features_norm = scaler.fit_transform(features)

In [9]:
# separacion de los datos en train y test
x_train, x_test, y_train, y_test = train_test_split(features_norm, etiquetas, train_size = 0.8)

In [10]:
# instancia del modelo de regresion logistica
modelo = LogisticRegression()
# entrenamiento del modelo
LR_modelo_entrenado = modelo.fit(X=x_train, y=y_train)
# prediccion 
y_pred = LR_modelo_entrenado.predict(x_test)

### Modelo No supervisado K-Mean

In [11]:
class KMeanProductOrganizer:
    
    def __init__(self, original_dataset):
        self.area_num_to_letter = {0:'A', 1:'B', 2:'C'}
        self.features = ['largo', 'ancho', 'alto', 'peso', 'procedencia', 'temperatura', 'manipulacion']
        self.area_list = range(0,3)
        self.protocol_list = range(1,3)
        
        self.clustering_datasets = self._get_dataset_for_clustering(original_dataset)
        self.clustering_models_trained = self._get_trained_models()
        
    
    def _get_dataset_for_clustering(self, data):
        """ split the original dataset into each area+protocol dataset """
        clustering_datasets = {}
        for area in self.area_list:
            for protocol in self.protocol_list:
                key = f"{self.area_num_to_letter[area]}_{protocol}"
                clustering_datasets[key] = data[ (data['embalaje'] == area) & (data['protocolo'] == protocol) ]
        return clustering_datasets 
    
    def _get_trained_models(self):
        """ train each area+protocol dataset """
        clustering_models_trained = {}
        for key, dataset in self.clustering_datasets.items():
            clustering_models_trained[key] = self._kmeans_model_train(dataset[self.features])
        return clustering_models_trained 
    
    def _kmeans_model_train(self, dataset):
        """ kmean model and calculate the silhouette score"""
        scaler = StandardScaler()
        # normalizacion de los datos de las caracteristicas
        data_scaled = scaler.fit_transform(dataset)
        kmeans = KMeans(n_clusters=4, n_init=25, random_state=123)
        kmeans.fit(data_scaled)
        return {'model': kmeans, 'silhouette_score': silhouette_score(data_scaled, kmeans.labels_)}
    
    def predict_product_sample_cluster(self, product_data, product_area, product_protocol):
        """ Receive a product sample, the area and the protocol. 
            Select the area+protocol model trained and predict the cluster for the product
        """
        key = f"{product_area}_{product_protocol}"
        
        if key in self.clustering_models_trained:
            product_cluster = self.clustering_models_trained[key]['model'].predict(product_data.reshape(1, -1))[0]
            return product_cluster
        else:
            return None

In [12]:
productClusterOrganizerObject = KMeanProductOrganizer(data)



### Ruta Optima Para Distribuir Los Productos

In [13]:
# diccionario con coordenadas y si el area pertenece al protocolo 1 o no
area_data = {
    'A': [0,0,False],
    'EA1': [100,300, True],
    'EA2': [400,300,False],
    'EB1': [700,-300, True],
    'EB2': [400,-300,False],
    'EC1': [1000,200, True],
    'EC2': [1000,0,False],
}

area_num_to_letter = {0:'A', 1:'B', 2:'C'}

In [14]:
def getCostMatrix(areaList):
    graph = []
    for j in range(len(areaList)):
        graphRow = []
        for i in range(len(areaList)):
            p1 = area_data[areaList[j]]
            p2 = area_data[areaList[i]]
            d = getDistance(p1, p2)
            graphRow.append(d)
        graph.append(graphRow)
    return graph

In [15]:
def getDistance(p1, p2):
    return sqrt((p2[0]-p1[0])**2 + (p2[1]-p1[1])**2) 

def pathIndexTranslate(areaList, pathIndexes):
    path = []
    for i in range(len(pathIndexes)):
        path.append(areaList[pathIndexes[i][1]])
    return path

In [16]:
def splitAreasByProtocol(areaList):
    proto_1_list = []
    proto_2_list = []

    for i in areaList:
        if area_data[i][2]:
            proto_1_list.append(i)
        else:
            proto_2_list.append(i)
    
    return proto_1_list, proto_2_list

In [17]:
def travellingBestRoute(graph, s, arealist):
 
    vertex = []
    for i in range(len(arealist)):
        if i != s:
            vertex.append(i)

    min_distance = maxsize
    min_path_i = []
    min_last_node_idx = None

    next_permutation=permutations(vertex)
    
    for i in next_permutation:
        current_pathweight = 0
        current_path_i = []

        k = s
        for j in i:
            current_pathweight += graph[k][j]
            current_path_i.append((k,j))
            k = j

        if current_pathweight < min_distance:
            min_distance = current_pathweight
            min_path_i = current_path_i
            min_last_node_idx = k
    
    # translate min path indexes to area name
    best_path = pathIndexTranslate(arealist, min_path_i)

    return min_distance, min_path_i, arealist[min_last_node_idx], best_path

In [18]:
def calculateBestPath(areaProto1List, areaProto2List, originNode):
    
    best_path = []
    total_distance = 0
    
    travel_proto1 = None
    
    # calculate best path for protocol 1 first
    if len(areaProto1List)>0:

        areaProto1List = originNode + areaProto1List
        
        areaProto1CostMatrix = getCostMatrix(areaProto1List)

        travel_proto1 = travellingBestRoute(areaProto1CostMatrix, 0, areaProto1List)
        
        best_path += travel_proto1[3]
        total_distance += travel_proto1[0]
    
    # calculate best path for protocol 2
    if len(areaProto2List) > 0:
        
        travel_proto2_ori = originNode
        
        if len(areaProto1List)>0:
            # set last node from protocol 1 path as origin node for protocol 2 path
            travel_proto2_ori = [travel_proto1[2]]
        
        areaProto2List = travel_proto2_ori + areaProto2List

        areaProto2CostMatrix = getCostMatrix(areaProto2List)

        travel_proto2 = travellingBestRoute(areaProto2CostMatrix, 0, areaProto2List)

        # adding best path and distance for protocol 2
        best_path += travel_proto2[3]
        total_distance += travel_proto2[0]

    return {'total_distance': total_distance, 'best_path': best_path}

In [19]:
 def GetBestRoute(areas_to_go):
    # set the origin
    area_origen = ['A']

    # split areas_to_go by protocol
    area_proto_1_list, aprea_proto_2_list = splitAreasByProtocol(areas_to_go)

    res = calculateBestPath(area_proto_1_list, aprea_proto_2_list, area_origen)
    
    return res

### Generador de Productos Aleatorios

In [20]:
def get_gaussian_value(min_num, max_num, media, std_dev):
    while True:
        val = random.gauss(media, std_dev)
        if val>=min_num and val<=max_num:
            return val

In [21]:
# Generador aleatorio de productos
def products_generator(n):
    products = []
    for i in range(n):
        
        embalaje = random.choice([0,1,2])
        largo = get_gaussian_value(5, 100, 50, 15)
        ancho = get_gaussian_value(5, 100, 50, 15)
        alto = get_gaussian_value(5, 100, 50, 15)
        peso = get_gaussian_value(1, 50, 25, 5)
        procedencia = random.choice([0,1,2,3])
        temperatura = random.choice([0,1])
        manipulacion = random.choice([0,1])
        
        products.append(np.array([embalaje, largo, ancho, alto, peso, procedencia, temperatura, manipulacion]))
        
    return products

### Logica del Vehiculo

In [22]:
# prediccion del protocolo usando un modelo de Logistic Regression 
def predict_protocol(p):
    y = LR_modelo_entrenado.predict(p.reshape(1,-1))
    proto = int(y[0])
    print("> Protocolo: ", proto)
    new_p = np.append (p, [proto]) # add protocol predicted to the product array
    return new_p

In [23]:
# determina que areas visitar y cuantos productos entregar por area
def get_building_areas_to_go(products):
    
    dict_res = {}
    for p in products:
        
        area = area_num_to_letter[p[0]] # Letra del Area
        proto = int(p[-1]) # protocolo
        
        area_name = f"E{area}{proto}"
        
        if area_name not in dict_res:
            dict_res[area_name] = {'qty': 1, 'products':[p]}
        else:
            # aumenta el contador de producto para el area
            dict_res[area_name]['qty'] += 1
            dict_res[area_name]['products'].append(p)
    return dict_res

In [24]:
# usa la funcion GetBestRoute para determinar el orden en que debe recoger los productos
def get_bestroute(areas_visit):
    return GetBestRoute(areas_visit)

In [25]:
# ejecucion del vehiculo en un proceso iterativo
def vehicle_process(iterations):
    for i in range(iterations):
        print("=== Inicio del Recorrido ===")
        print("Recogiendo 3 productos del Almacen...")
        products = products_generator(3)

        """
            Modelo Regresion Logistica
        """
        # attach the protocol to each product
        print("Prediccion del protocolo de cada producto...")
        prods = [predict_protocol(p) for p in products]
        
        # get unique areas to visit and the product quantity to deliver
        print("Determinando cuantos productos entregar por cada area")
        prod_areas_dic = get_building_areas_to_go(prods)
        print([f"Area: {k} - Product Qty: {v['qty']}" for k,v in prod_areas_dic.items()])
        
        print("Determinando la mejor ruta de entrega")
        best_route_res = get_bestroute(prod_areas_dic.keys())
        print(best_route_res['best_path'])
        
        for area in best_route_res['best_path']:
            product_info = prod_areas_dic[area]
            
            print(f"Entregando {product_info['qty']} producto(s) en Area {area}...")
            
            idx = 1
            for p in product_info['products']:
                # convert product into numpy array
                np_prod = np.array([p.T])
                
                # get product area
                prod_area = area_num_to_letter[np_prod[0][0]]
                # get product protocol
                prod_protocol = int(np_prod[0][-1])
                
                # remove area and protocol columns from product array
                np_prod = np_prod[:, 1:-1]
                
                """
                    Modelo No supervisado K-Mean
                """
                # predict the product cluster
                print("> Prediccion del Deposito...")
                prod_cluster_result = productClusterOrganizerObject.predict_product_sample_cluster(np_prod, prod_area, prod_protocol)
                
                print(f"> Entregando producto #{idx} en Area: {prod_area}, Protocolo: {prod_protocol}, Deposito: {prod_cluster_result}")
                
                idx +=1
        print("=== Fin del Recorrido ===\n")
        

### Ejecucion del Vehiculo

In [27]:
# ejecucion del vehiculo 5 veces
vehicle_process(5)

=== Inicio del Recorrido ===
Recogiendo 3 productos del Almacen...
Prediccion del protocolo de cada producto...
> Protocolo:  2
> Protocolo:  1
> Protocolo:  2
Determinando cuantos productos entregar por cada area
['Area: EA2 - Product Qty: 1', 'Area: EA1 - Product Qty: 1', 'Area: EC2 - Product Qty: 1']
Determinando la mejor ruta de entrega
['EA1', 'EA2', 'EC2']
Entregando 1 producto(s) en Area EA1...
> Prediccion del Deposito...
> Entregando producto #1 en Area: A, Protocolo: 1, Deposito: 3
Entregando 1 producto(s) en Area EA2...
> Prediccion del Deposito...
> Entregando producto #1 en Area: A, Protocolo: 2, Deposito: 3
Entregando 1 producto(s) en Area EC2...
> Prediccion del Deposito...
> Entregando producto #1 en Area: C, Protocolo: 2, Deposito: 2
=== Fin del Recorrido ===

=== Inicio del Recorrido ===
Recogiendo 3 productos del Almacen...
Prediccion del protocolo de cada producto...
> Protocolo:  1
> Protocolo:  2
> Protocolo:  1
Determinando cuantos productos entregar por cada are

### Evaluacion de los Modelos 

#### Modelo Regresion Logistica

In [28]:
# matriz de confusion
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
print(f"Matriz de Confusion: \n{cnf_matrix}")
precision, recall, f1, accuracy = EvaluacionModelo(y_test, y_pred)
print(f"Precision: {precision}, Recall: {recall}, F1: {f1}, Accuracy: {accuracy}\n\n")

Matriz de Confusion: 
[[ 688    0]
 [   0 1312]]
Precision: 1.0, Recall: 1.0, F1: 1.0, Accuracy: 1.0




#### Modelo K-Mean

In [29]:
clustering_models_dic = productClusterOrganizerObject.clustering_models_trained

for k, v in clustering_models_dic.items():
    print(f"Modelo Area: {k}, Silhouette Score: {v['silhouette_score']}")


Modelo Area: A_1, Silhouette Score: 0.15342519960556503
Modelo Area: A_2, Silhouette Score: 0.14432842299220988
Modelo Area: B_1, Silhouette Score: 0.1523273163858303
Modelo Area: B_2, Silhouette Score: 0.14011914558268063
Modelo Area: C_1, Silhouette Score: 0.15345683607590863
Modelo Area: C_2, Silhouette Score: 0.142090414968825
