In [1]:
# import requests
# from requests.auth import HTTPBasicAuth

# JBPM_HOST = "http://localhost:8080"
# USERNAME = "wbadmin"
# PASSWORD = "wbadmin"
# CONTAINER_ID = "Publica_In_Out_1.0.0-SNAPSHOT"

# headers = {
#     "Accept": "application/json"
# }

# # Paso 1: Obtener todas las instancias activas del contenedor
# url = f"{JBPM_HOST}/kie-server/services/rest/server/queries/processes/instances?page=0&pageSize=10&status=1"
# response = requests.get(url, headers=headers, auth=HTTPBasicAuth(USERNAME, PASSWORD))

# if response.status_code == 200:
#     data = response.json()
#     process_instances = data.get("process-instance", [])
    
#     if not process_instances:
#         print("No hay procesos activos.")
#     else:
#         # Tomamos el primero 
#         latest_instance = process_instances[0]
#         process_instance_id = latest_instance.get("process-instance-id")
#         print(f"Último processInstanceId activo: {process_instance_id}")

#         # Ahora puedes llamar tu función get_documents(process_instance_id)
# else:
#     print(f"Error al consultar procesos: {response.status_code}")
#     print(response.text)


In [2]:
# # ID de la instancia del proceso recuperada

# url = f"{JBPM_HOST}/kie-server/services/rest/server/containers/{CONTAINER_ID}/processes/instances/{process_instance_id}/variables"

# response = requests.get(url, headers=headers, auth=HTTPBasicAuth(USERNAME, PASSWORD))
# documentos = response.text
# print(documentos)



In [3]:
import requests
from requests.auth import HTTPBasicAuth
import pandas as pd

# Parámetros de conexión
JBPM_HOST = "http://localhost:8080"
USERNAME = "wbadmin"
PASSWORD = "wbadmin"
CONTAINER_ID = "Publica_In_Out_1.0.0-SNAPSHOT"

headers = {
    "Accept": "application/json"
}

# Paso 1: Obtener último processInstanceId activo
def get_latest_process_instance():
    url = f"{JBPM_HOST}/kie-server/services/rest/server/queries/processes/instances?status=1&page=0&pageSize=10"
    response = requests.get(url, headers=headers, auth=HTTPBasicAuth(USERNAME, PASSWORD))
    if response.status_code == 200:
        instances = response.json().get("process-instance", [])
        if instances:
            return instances[0].get("process-instance-id")
    return None

# Paso 2: Extraer todos los documentos de cualquier variable
def get_all_documents(process_instance_id):
    url = f"{JBPM_HOST}/kie-server/services/rest/server/containers/{CONTAINER_ID}/processes/instances/{process_instance_id}/variables"
    response = requests.get(url, headers=headers, auth=HTTPBasicAuth(USERNAME, PASSWORD))
    resultados = []

    if response.status_code == 200:
        data = response.json()

        for var_name, var_value in data.items():
            if isinstance(var_value, dict) and "documents" in var_value:
                for doc in var_value["documents"]:
                    doc_data = doc.get("org.jbpm.document.service.impl.DocumentImpl", {})
                    name = doc_data.get("name", "sin_nombre")
                    identifier = doc_data.get("identifier", "sin_id")
                    date = doc_data.get("lastModified", {}).get("java.util.Date", None)
                    value = f"{name}####{identifier}"
                    
                    resultados.append({
                        "processinstanceid": process_instance_id,
                        "value": value,
                        "lastModified": date,
                        "variable": var_name,
                        "identifier": identifier
                    })
    else:
        print(f"Error al obtener variables del proceso {process_instance_id}: {response.status_code}")
        print(response.text)
    
    return resultados

# Ejecutar flujo completo
process_instance_id = get_latest_process_instance()
if process_instance_id:
    docs = get_all_documents(process_instance_id)
    dfCollect = pd.DataFrame(docs)
    print(dfCollect)
else:
    print("No se encontró ningún proceso activo.")


   processinstanceid                        value   lastModified  \
0                 87  afirme (1).pdf####951176812  1747153925793   
1                 87      afirme.pdf####453402779  1747153528332   
2                 87  afirme (1).pdf####329411496  1747153925793   

          variable identifier  
0   documentosSETN  951176812  
1   documentosSETN  453402779  
2  documentosSRTIC  329411496  


In [4]:
from flask import Flask, render_template, jsonify, send_from_directory
from sqlalchemy import create_engine, inspect
import pandas as pd

app = Flask(__name__)

# Configuración de conexión a PostgreSQL
user = 'jbpm'
password = 'jbpm'
host = 'localhost'
port = '5432'
database = 'jbpm'
engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}')

# Extraer identifiers nuevos y guardar en la base de datos
with engine.connect() as conn:
    inspector = inspect(engine)
    tables = inspector.get_table_names()

    # Asegúrate de que dfCollect ya exista antes de este bloque

    # Crear la tabla si no existe
    if 'tabla_document_collections' not in tables:
        dfCollect.to_sql(
            name='tabla_document_collections',
            con=engine,
            if_exists='replace',
            index=False
        )
        print("Tabla creada e información insertada.")
    else:
        # Obtener identifiers ya existentes
        existing_identifiers = pd.read_sql(
            'SELECT identifier FROM tabla_document_collections',
            con=engine
        )['identifier'].astype(str).tolist()

        # Filtrar el DataFrame original para insertar solo nuevos identifiers
        df_nuevos = dfCollect[~dfCollect['identifier'].astype(str).isin(existing_identifiers)]

        if not df_nuevos.empty:
            df_nuevos.to_sql(
                name='tabla_document_collections',
                con=engine,
                if_exists='append',
                index=False
            )
            print(f"{len(df_nuevos)} documento(s) insertado(s) exitosamente.")
        else:
            print("No hay nuevos documentos para insertar (identifiers ya existentes).")

# Verificar contenido de la tabla
df_verificacion = pd.read_sql('SELECT * FROM tabla_document_collections', engine)
df_verificacion.tail()


3 documento(s) insertado(s) exitosamente.


Unnamed: 0,processinstanceid,value,lastModified,variable,identifier
6,77,formulario_carga_archivos-taskform (1).frm####...,1745863528002,documentosSETN,571058653
7,77,ambienteAnaconda.rtf####949245523,1745519828070,documentosSETN,949245523
8,87,afirme (1).pdf####951176812,1747153925793,documentosSETN,951176812
9,87,afirme.pdf####453402779,1747153528332,documentosSETN,453402779
10,87,afirme (1).pdf####329411496,1747153925793,documentosSRTIC,329411496


In [7]:
# pidos = str(dfCollect['value'])
# print (pidos)
df_verificacion.tail(50)

Unnamed: 0,processinstanceid,value,lastModified,variable,identifier
0,76,CSD_FUNK671228PH6_20230509_130458.sdg####42227453,1683659102000,documentosSETN,42227453
1,76,CSD_Sucursal_1_FUNK671228PH6_20230509_130451.c...,1684423334000,documentosSETN,451591489
2,76,CSD_Sucursal_1_FUNK671228PH6_20230509_130451.k...,1683659102000,documentosSETN,622413742
3,76,CSD_FUNK671228PH6_20230509_130458.sdg####26811...,1683659102000,documentosSRTIC,268117661
4,76,CSD_Sucursal_1_FUNK671228PH6_20230509_130451.c...,1684423334000,documentosSRTIC,37307117
5,76,CSD_Sucursal_1_FUNK671228PH6_20230509_130451.k...,1683659102000,documentosSRTIC,569366915
6,77,formulario_carga_archivos-taskform (1).frm####...,1745863528002,documentosSETN,571058653
7,77,ambienteAnaconda.rtf####949245523,1745519828070,documentosSETN,949245523
8,87,afirme (1).pdf####951176812,1747153925793,documentosSETN,951176812
9,87,afirme.pdf####453402779,1747153528332,documentosSETN,453402779
