# PIA
Proyecto para verificar el correcto despacho de ordenes de marketplaces (Amazon, Mercado Libre) mediante métodos de computación visual (*Object counting*) con una version afinada de *YOLO*.

## Entrenamiento de modelo
Hay dos maneras en que se planea obtener datos (dependiendo del tiempo que se disponga):
1. **Post-despacho**
    - Obtener imagenes de ordenes mediante ERP
    - Obtener detalles de las órdenes mediante reportes de plataformas
2. **Real time**
    - Obtener imágenes con empleo de cámara
    - Obtener detalles de las órdenes mediante petición a API de plataformas

### Librerias

In [1]:
# Para conexion con ERP
import xmlrpc.client
# Para extraer imagenes
from PIL import Image
import io
import base64

import pandas as pd
from datetime import datetime, timedelta
import json
import os
import re

### Peticióna a ERP
Se realizará una conexión a la API del ERP que emplea la empresa y se obtendrán las ultimas imágenes tomadas a cada pedido e informacion del contenido de cada pedido

In [2]:
# Información de acceso
with open('./secrets/access_keys.json') as file:
    db_info = json.load(file)

# Autenticación
common = xmlrpc.client.ServerProxy('{}/xmlrpc/2/common'.format(db_info['url']))
uid = common.authenticate(db_info['db'], db_info['user'], db_info['password'], {})
# Conexión con base de datos
models = xmlrpc.client.ServerProxy('{}/xmlrpc/2/object'.format(db_info["url"]))

In [None]:
# Obtener imagenes creadas
limite = 100
fields = ['res_id', 'res_name', 'datas']
filters = [
    ['&',
        ('create_date', '>=', '2025-10-14'), # Creadas despues de una fecha
        ('res_model', '=', 'sale.order'), # Adjuntos a modelo de ventas
        ('mimetype', '=', 'image/jpeg') # Tipo de archivo: imagenes
    ]
]
attachments = models.execute_kw(db_info['db'], uid, db_info['password'], 'ir.attachment', 'search_read', filters, {'fields': fields, 'limit': limite})

In [None]:
## Lista de ordenes asociadas a imagenes
orders = [file.split('.')[0] for file in os.listdir('./training_data/labels') if file.endswith('.txt')]

# Obtener ventas asociadas
fields = ['name', 'order_line']
filters = [[("name", "in", orders)]]
sales = models.execute_kw(db_info['db'], uid, db_info['password'], 'sale.order', 'search_read', filters, {'fields': fields})
## Lista de lineas de las ordenes
lines = [line for record in sales for line in record['order_line']]

# Obtener lineas de la orden
fields = ['product_id', 'product_uom_qty']
filters = [[("id", "in", lines)]]
order_lines = models.execute_kw(db_info["db"], uid, db_info['password'], 'sale.order.line', 'search_read', filters, {'fields': fields})

# Obtener kits de productos
## Kits
fields = ['product_tmpl_id', 'bom_line_ids']
filters = []
kits = models.execute_kw(db_info["db"], uid, db_info['password'], 'mrp.bom', 'search_read', filters, {'fields': fields})
## Componentes de kits
fields = ['product_id', 'product_qty']
filters = []
components = models.execute_kw(db_info["db"], uid, db_info['password'], 'mrp.bom.line', 'search_read', filters, {'fields': fields})

### Procesamiento

In [5]:
# Obtener imagenes creadas
limite = 100
fields = ['res_id', 'res_name', 'datas']
ventas = [img.split('.')[0] for img in os.listdir('./datasets/training_data/labels')]
#ids =
ids = []
with open('ids.txt', 'r') as file:
    for line in file.readlines():
        ids.append(line[:-1])
filters = [
    [
#     '&',
#     '&',
#        ('res_name', 'in', ventas), # Creadas despues de una fecha
        ('res_id', 'in', ids), # Creadas despues de una fecha
#        ('mimetype', '=', 'image/jpeg'), # Tipo de archivo: imagenes
#        ('res_model', '=', 'sale.order'), # Adjuntos a modelo de ventas
    ]
]
print("Antes de solicitud")
attachments = models.execute_kw(db_info['db'], uid, db_info['password'], 'ir.attachment', 'search_read', filters, {'fields': fields, 'limit': limite})
print(len(attachments))
# Guardar imagenes
for record in attachments:
    image = Image.open(io.BytesIO(base64.b64decode(record['datas'])))
    if image.size[0] > image.size[1]:
        image = image.rotate(-90, expand=True)
    image.save(f'./datasets/training_data/images/{record['res_name']}.jpg')

Antes de solicitud
100


Guardar las imagenes en una carpeta y rotar en caso de ser necesario para mantener proporciones adecuadas

In [None]:
# Guardar imagenes
for record in attachments:
    image = Image.open(io.BytesIO(base64.b64decode(record['datas'])))
    if image.size[0] > image.size[1]:
        image = image.rotate(-90, expand=True)
    image.save(f'./images/{record['res_name']}.jpg')


KeyboardInterrupt



De los datos obtenidos calcular la cantidad de productos por venta

In [None]:
# Utilidad
def extract_sku(name: str) -> str:
    return re.match(r'\[(.*)\]', name).group(1)

# Paquetes
# Obtener paquetes y sus componentes
kits_df = pd.DataFrame.from_records(kits)
kits_df.loc[:,'product_tmpl_id'] = kits_df['product_tmpl_id'].apply(lambda row: extract_sku(row[1]))
kits_df = kits_df.explode('bom_line_ids')

components_df = pd.DataFrame.from_records(components)
components_df.loc[:,'product_id'] = components_df['product_id'].apply(lambda row: extract_sku(row[1]))

# Obtener dataframe de paquetes con componentes
packs_df = kits_df.merge(components_df, how='left', left_on='bom_line_ids', right_on='id')
packs_df = (packs_df[['product_tmpl_id', 'product_id', 'product_qty']]
            .rename(columns={
                'product_tmpl_id': 'pack_id',
                'product_id': 'component_id',
                'product_qty': 'cantidad'
                })
            )

# Ventas
# Convertir columnas de lista a un solo valor
sales_df = (pd.DataFrame.from_records(sales)
            .explode('order_line'))

# Convertir columnas de lista a un solo valor y obtener SKU
order_lines_df = pd.DataFrame.from_records(order_lines)
## Ignorar lineas vacias
order_lines_df = order_lines_df[order_lines_df['product_id'] != False]
order_lines_df.loc[:,'product_id'] = order_lines_df['product_id'].apply(lambda l: extract_sku(l[1]))

# # Convertir paquetes en piezas individuales
order_lines_df = order_lines_df.merge(packs_df, how='left', left_on='product_id', right_on='pack_id')
order_lines_df.loc[:,'product_id'] = order_lines_df.apply(lambda row: row['product_id'] if pd.isna(row['pack_id']) else row['component_id'], axis=1)
order_lines_df.loc[:,'cantidad'] = order_lines_df.apply(lambda row: row['product_uom_qty'] if pd.isna(row['pack_id']) else row['product_uom_qty']*row['cantidad'], axis=1)
order_lines_df = order_lines_df.loc[:,['id', 'product_id', 'cantidad']]

# Unir dataframes anteriores
orders_df = sales_df.merge(order_lines_df, how='left', left_on='order_line', right_on='id')
# Seleccionar y renombrar columnas
orders_df = orders_df[['name', 'product_id', 'cantidad']]

## Pipeline
El _pipeline_ a realizar consiste de los siguientes pasos:
1. Obtener imagenes
    1. Acceder a carpeta (posiblemente de Drive) y obtener todos los archivos de imagen.
    2. Aplicar transformaciones a imagenes
2. Aplicar modelo
    - De cada producto detectado, determinar su SKU. (1)
    - De la guía detectada, obtener el número de orden. (2)
3. Separar por confianza en prediccion:
    - Si la confianza es alta, cargar a ERP.
    - Si la confianza es baja, enviar a carpeta para revisión. (3)

(1) Utilizar modelo aparte  
(2) Tratar de emplear API de Mercado Libre, en caso contrario descaragr reporte  
(3) Agregar flujo de trabajo de Label Studio

### 1. Obtención de imágenes
Estableceremos una conexión con la API de Drive y obtendremos el contenido de una carpeta

In [1]:
# Librerias
## Obtencion de imagenes
import io
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
from PIL import Image
## Transformacion de imagenes
import os
# Parametros
CREDENTIALS = service_account.Credentials.from_service_account_file(
   './secrets/access_keys_drive_api.json',
   scopes=['https://www.googleapis.com/auth/drive']
   )
FOLDER_ID = '1H6miVnKLQh8AzWEWH1Yq2fL273XTvCFa'
DEST_FOLDER = './datasets/downloaded_images'

In [2]:
# Obtencion de imagenes
## TO-DO: Logs con logger
## TO-DO: Documentacion docstring
# Obtener imagenes de archivo
def get_images(folder_id: str, credentials):
    try:
        # Crear cliente
        service = build("drive", "v3", credentials=CREDENTIALS)
        # Obtener lista de archivos
        response = (service.files()
                    .list(q=f"trashed=false and '{folder_id}' in parents and mimeType = 'image/jpeg'",
                          spaces ='drive',
                          fields='files(id, name)'
                          )
                    .execute()
                    )

    except HttpError as error:
        print(f"Ocurrio un error: {error}")
        response = {}
    return response

def download_images(response: dict, dest_folder: str):
    try:
        # Crear cliente
        service = build("drive", "v3", credentials=CREDENTIALS)
        # Descargar cada imagen
        for image in response.get('files', []):
            print(f'Descargando {image['name']}')
            request = service.files().get_media(fileId=image['id'])
            file = io.BytesIO()
            downloader = MediaIoBaseDownload(file, request)
            # Esperar a que este lista la descarga
            done = False
            while done is False:
              status, done = downloader.next_chunk()
              print(f"Download {int(status.progress() * 100)}.")
            # Guardar en carpeta
            with open(os.path.join(dest_folder, image['name']), 'wb') as img_file:
              img_file.write(file.getbuffer())
    except HttpError as error:
        print(f"Ocurrio un error: {error}")


download_images(get_images(FOLDER_ID, CREDENTIALS), DEST_FOLDER)


Descargando 701-0285686-8888270.jpg
Download 100.


In [None]:
# Transformacion
## ¿Sigue siendo necesario? YOLO ya reescala
## TO-DO: ¿Cambiar a blanco y negro?
def transform_images(origin_folder: str, width: int, height: int):
    for file in os.listdir(origin_folder):
        if not file.endswith('.jpg'):
            continue
        img = Image.open(f"{origin_folder}/{file}").convert('RGB')

        # Rotar imagen para estar en vertical si esta en horizontal
        if img.size[0] > img.size[1]:
            img = img.rotate(-90, expand=True)
        
        # Reescalar imagenes
        img = img.resize((width, height), resample=Image.LANCZOS)

        # Save image
        if not os.path.exists(f"{origin_folder}/transformed"):
            os.mkdir(f"{origin_folder}/transformed")
        img.save(f"{origin_folder}/transformed/{file}")
        img.close()

HEIGHT = 640
WIDTH = int(3/4 * HEIGHT)
transform_images(DEST_FOLDER, WIDTH, HEIGHT)

### 2. Detección de objetos

In [3]:
# Librerias
from ultralytics import YOLO
from pyzbar.pyzbar import decode
from PIL import Image
import numpy as np
import os

# Parametros
ORIGIN_FOLDER = './datasets/downloaded_images/'
MODEL = YOLO('./models/modelo_2025-11-13.pt')

In [4]:
def detect_objects(origin_folder: str, model):
    # Obtener predicciones
    preds = model.predict(origin_folder, verbose=False)
    # Generar informacion de cada imagen
    imgs_info = []
    for pred in preds:
        objs = pred.summary()
        imgs_info.append({
            'path': pred.path,
            'products': [obj for obj in objs if obj['name'] == 'Producto'],
            'guides': [obj for obj in objs if obj['name'] == 'Guia']
        })
    # for i, img in enumerate(os.listdir(origin_folder)):
    #     if not img.endswith('.jpg'):
    #         continue
    #     objs = preds[i].summary()
    #     imgs_info.append({
    #         'img_path': origin_folder + img,
    #         'products': [obj for obj in objs if obj['name'] == 'Producto'],
    #         'guides': [obj for obj in objs if obj['name'] == 'Guia']
    #     })
    return imgs_info

def process_products(products: list):
    # Por el momento solo regresa el numero de productos que pase cierto umbral
    treeshold = 0.7
    # Cantidad de productos con seguridad mayor al treeshold
    n = len([product for product in products if product['confidence']>treeshold])
    return n

def region_properties(box: list|tuple) -> tuple:
    # Esquinas de la region
    quad = (box['x4'], box['y4'], box['x1'], box['y1'], box['x2'], box['y2'], box['x3'], box['y3'])
    # Largo y ancho
    width = ((box['x1']-box['x2'])**2 + (box['y1']-box['y2'])**2)**0.5
    height = ((box['x3']-box['x2'])**2 + (box['y3']-box['y2'])**2)**0.5
    if width > height:
        width, height = height, width
    # Area
    area = width*height
    # Diccionario con propiedades del area
    props = {'quad': quad, 'width': int(width), 'height': int(height), 'area': area}
    return props

def process_guides(guides: list, img_path: str):
    # Filtrar guia con area mas grande
    regions = [region_properties(guide['box']) for guide in guides]
    idx = np.argmax([box['area'] for box in regions])
    region = regions[idx]

    # Obtener region de la guia
    with Image.open(img_path) as img:
        guide_img = img.transform((region['width'], region['height']), Image.QUAD, region['quad'])

    # Abrir imagen y buscar codigo de barras
    code_list = [code.data.decode() for code in decode(guide_img) if code.type=='CODE128']
    track_ids = [code for code in code_list if len(code)==11]
    track_id = track_ids[0] if track_ids else ''
    return track_id

def process_image(imgs_info: list):
    for info in imgs_info:
        info['n_products'] = process_products(info.pop('products', []))
        info['tracking_id'] = process_guides(info.pop('guides', []), info['path'])
    return imgs_info

imgs_info = process_image(detect_objects(ORIGIN_FOLDER, MODEL))
imgs_info
#print(process_products(imgs_info['2000009651078123.jpg']['products']))
#print(process_guides(imgs_info['2000009651078123.jpg']['img_path'], imgs_info['2000009651078123.jpg']['guides']))
#MODEL.predict(imgs_info['2000009651078123.jpg']['img_path'])[0].show()
#process_guides(imgs_info['2000009651078123.jpg']['img_path'], imgs_info['2000009651078123.jpg']['guides'])


[{'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\2000009651078123.jpg',
  'n_products': 1,
  'tracking_id': '45707191070'},
 {'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\2000009765719417.jpg',
  'n_products': 2,
  'tracking_id': '45769083914'},
 {'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\701-0285686-8888270.jpg',
  'n_products': 8,
  'tracking_id': ''}]

### 3. Cargar imagen a ERP

In [5]:
# Librerias
import xmlrpc.client
import base64
import json
import os
from PIL import Image
TRACK_FILE = './datasets/tracks_id.csv'

In [20]:
# Información de acceso
with open('./secrets/access_keys_test.json') as file:
    db_info = json.load(file)

# Autenticación
common = xmlrpc.client.ServerProxy('{}/xmlrpc/2/common'.format(db_info['url']))
uid = common.authenticate(db_info['db'], db_info['user'], db_info['password'], {})
# Conexión con base de datos
models = xmlrpc.client.ServerProxy('{}/xmlrpc/2/object'.format(db_info["url"]))

In [30]:
# TO-DO: Funciones de pipeline
# Decorador para debugg
def debug(func):
    def wrapper(*args):
        print(f'Inicio de funcion: {func.__name__}')
        y = func(*args)
        print(f'Fin de funcion: {func.__name__}')
        return y
    return wrapper

# Agregar informacion de seguimiento
@debug
def get_tracking_info(imgs_info: list, tracking_file: str) -> list:
    """
    imgs_info: Lista de diccionarios con propiedades
        - tracking_id: numero de rastreo del pedido
    """
    track_order = {}
    with open(tracking_file) as file:
        # Omitir encabezado
        next(file)
        for line in file:
            track_id, order_id = line.strip().split(',')
            track_order[track_id] = order_id
    for info in imgs_info:
        info['order_name'] = track_order.get(info['tracking_id'], '')
    return imgs_info

# Obtener id de venta
@debug
def get_sale_order(imgs_info: list) -> list:
    """
    imgs_info: Lista de diccionarios con propiedades
        - order_name: Numero de venta
    """
    # Peticion a ERP de ids
    fields = ['name']
    order_names = [info['order_name'] for info in imgs_info]
    filters = [[("name", "in", order_names)]]
    order_info = models.execute_kw(db_info["db"], uid, db_info['password'], 'sale.order', 'search_read', filters, {'fields': fields})

    # Crear diccionario de numero de venta - id
    order_id = {info['name']: info['id'] for info in order_info}

    # Agregar id
    for info in imgs_info:
        info['order_id'] = order_id.get(info['order_name'], '')

    return imgs_info

# Cargar archivo
@debug
def upload_image(imgs_info: list, db_info: dict = db_info, user_id: int = uid) -> list:
    """
    imgs_info: Lista de diccionarios con propiedades
        - order_id: ID de orden en ERP
        - path: Ruta de acceso a imagen
    """
    # Preparar lista de archivos a subir
    records = []
    for info in imgs_info:
        if not info['order_id']:
            continue
        vals = {
            'name': os.path.basename(info['path']), # Nombre de archivo
            'res_model': 'sale.order',
            'res_id': info['order_id']
        }
        with open(info['path'], "rb") as img:
            vals['datas'] = base64.b64encode(img.read()).decode('utf-8')
        records.append(vals)

    # Crear registro de archivo
    records_id = models.execute_kw(db_info["db"], user_id, db_info['password'],
                                   'ir.attachment', 'create',
                                   [records])
    # IDs de archivo
    files_ids = {}
    for i, record in enumerate(records):
        files_ids[record['name']] = records_id[i]
    for info in imgs_info:
        info['file_id'] = files_ids.get(os.path.basename(info['path']),'')
    return imgs_info

# Crear mensaje
@debug
def create_message(imgs_info: list, db_info: dict = db_info, user_id: int = uid) -> list:
    """
    imgs_info: Lista de diccionarios con propiedades
        - path: Ruta de acceso a imagen
        - order_id: ID de orden en ERP
        - file_id: ID de imagen en ERP
        - order_name: Numero de orden
        - tracking_id: Numero de rastreo detectado
        - n_products: Cantidad de productos detectados
    """
    # Preparar lista de mensajes a crear
    msg_template = "<b>Numero de orden:</b> {order_name}<br>" + \
    "<b>Numero de rastreo:</b> {tracking_id}<br>" + \
    "<b>Numero de products:</b> {n_products}"
    records = []
    for info in imgs_info:
        if not info['order_id']:
            continue
        msg = {
            'model': 'sale.order',
            'res_id': info['order_id'],
            'author_id': user_id,
            'subtype_id': 2,
            'body': msg_template.format(**info),
            'attachment_ids': [info['file_id']]
        }
        records.append(msg)
    # Crear mensajes en ERP
    models.execute_kw(db_info['db'], uid, db_info['password'], 'mail.message', 'create',
                      [records])

ordenes = {'701-0285686-8888270': {'img_path': './downloaded_images/701-0285686-8888270.jpg'},
            '701-0573713-1097802':{'img_path': './downloaded_images/701-0573713-1097802.jpg'}
           }
create_message(upload_image(get_sale_order(get_tracking_info(imgs_info, TRACK_FILE))))
#imgs_info
#create_message(upload_image(get_sale_order(ordenes)))
#upload_image(get_sale_order(ordenes))

Inicio de funcion: get_tracking_info
Fin de funcion: get_tracking_info
Inicio de funcion: get_sale_order
Fin de funcion: get_sale_order
Inicio de funcion: upload_image
Fin de funcion: upload_image
Inicio de funcion: create_message
Fin de funcion: create_message


In [31]:
imgs_info

[{'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\2000009651078123.jpg',
  'n_products': 1,
  'tracking_id': '45707191070',
  'order_name': '2000009651078123',
  'order_id': 124995,
  'file_id': 121007},
 {'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\2000009765719417.jpg',
  'n_products': 2,
  'tracking_id': '45769083914',
  'order_name': '2000009765719417',
  'order_id': 129511,
  'file_id': 121008},
 {'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\701-0285686-8888270.jpg',
  'n_products': 8,
  'tracking_id': '',
  'order_name': '',
  'order_id': '',
  'file_id': ''}]

In [104]:
print(imgs_info)
for info in imgs_info:
    if info['tracking_id']:
        print('Verdadero')
    else:
        print(f'Salio esto {info['tracking_id']}.')

[{'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\2000009651078123.jpg', 'n_products': 1, 'tracking_id': '45707191070'}, {'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\2000009765719417.jpg', 'n_products': 2, 'tracking_id': '45769083914'}, {'path': 'd:\\alan_\\Documents\\Alan\\Facultad\\Tetramestre 4\\Procesamiento y Clasificacion de Datos\\PIA\\datasets\\downloaded_images\\701-0285686-8888270.jpg', 'n_products': 8, 'tracking_id': ''}]
Verdadero
Verdadero
Salio esto .


In [None]:
# Obtener ventas asociadas
## https://www.odoo.com/es/forum/ayuda-1/odoo-14-external-api-how-to-add-notescommentsmessageslogs-in-contact-194917
# fields = ['name', 'order_line']
# filters = [[("name", "=", '701-0285686-8888270')]]
# sales = models.execute_kw(db_info['db'], uid, db_info['password'],
#                           'sale.order',
#                           'search_read',
#                           filters,
#                           {'fields': fields})
# sales
#info = models.execute_kw(db_info['db'], uid, db_info['password'], 'mail.message', 'fields_get', [], {'attributes': ['string', 'help', 'type']})
#list(info.keys())[100:]
# models.execute_kw(db_info['db'], uid, db_info['password'], 'mail.message', 'create',
#                   [{'model': 'sale.order',
#                     'res_id': id,
#                     'body': "This note was made using the API 2",
#                     'author_id': uid,
#                     'create_date': datetimestring,
#                     'date': datetimestring,
#                     'write_date': datetimestring
#                     'attachment_ids':[]}]
#                     )

[{'id': 128733, 'name': '701-0285686-8888270', 'order_line': [181534]}]