## 1. Carregamento e preparacao dos dados

In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

pd.set_option("display.max_columns", None)
pd.options.display.float_format = "{:,.2f}".format

sns.set_theme(style="whitegrid")

In [4]:
DATA_DIR = Path('../../data/02 - trusted/parquet')

SENTINEL_INT = [-1]
SENTINEL_STR = ['-1', '-1.0']

In [5]:
df_tarefcon_raw = pd.read_parquet(DATA_DIR / 'tb_tarefcon.parquet')
df_paradas_raw = pd.read_parquet(DATA_DIR / 'tb_paradas.parquet')
lista_paradas = pd.read_csv('lista_cds_tarefcon.csv', dtype={'CD_CODIGOPARADAOUCONV': 'string'}).drop_duplicates('CD_CODIGOPARADAOUCONV')



In [6]:
df_tarefcon_raw.CD_CODIGOPARADAOUCONV.fillna('-1.0')

0           -1.0
1           -1.0
2           -1.0
3           -1.0
4          138.0
           ...  
1604557     -1.0
1604558    130.0
1604559    140.0
1604560    140.0
1604561    140.0
Name: CD_CODIGOPARADAOUCONV, Length: 1604562, dtype: string

In [7]:


def preparar_tarefcon(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    datetime_cols = ['DT_DIADATURMA', 'DT_INICIO', 'DT_FIM']
    for col in datetime_cols:
        if col in df.columns:
            df[col] = df[col].fillna('2999-01-01')
            df[col] = pd.to_datetime(df[col])

    id_cols = ['ID_PEDIDO', 'ID_ITEM', 'ID_IDCLIENTE', 'ID_USUARIO']
    df['ID_IDCLIENTE']= df.ID_IDCLIENTE.fillna('-1.0').astype(float).astype(int).astype(str)
    for col in id_cols:
        if col in df.columns:
            df[col] = df[col].fillna('-1')

    numeric_cols = [
        'QT_CHAPASALIMENTADAS', 'QT_QUANTIDADEAJUSTE',
        'QT_QUANTIDADEPRODUZIDA', 'QT_QUANTIDADEPROGRAMADA'
    ]
    for col in numeric_cols:
        df[col] = df[col].fillna(int(0))
        if col in df.columns:
            df[col] = pd.to_numeric(df[col]).astype('Int64')

    float_cols = ['VL_ARRANJO', 'VL_DURACAOPREVISTA', 'VL_GRAMATURA','VL_DURACAO']
    for col in float_cols:
        df[col] = df[col].fillna(float(0))
        if col in df.columns:
            df[col] = pd.to_numeric(df[col])

    code_cols = ['CD_CODIGOPARADAOUCONV', 'CD_FACA', 'CD_OP', 'CD_OPONDULADA', 'CD_MAQUINA','CD_ORIGEMREGISTRO', 'CD_TURMA', 'TX_DESCORIGEMREGISTRO']
    df['CD_CODIGOPARADAOUCONV']= df.CD_CODIGOPARADAOUCONV.fillna('-1.0').astype(float).astype(int).astype(str)
    
    for col in code_cols:
        if col in df.columns:
            df[col] = df[col].fillna('-1')


    bool_cols = ['FL_FLAGPARADA', 'FL_SKIPFEED', 'FL_REPROGRAMACAO']
    for col in bool_cols:
        if col in df.columns:
            df[col] = df[col].fillna(False)
            df[col] = df[col].replace({1: True, 0: False, '1': True, '0': False, 'True': True, 'False': False})
            df[col] = df[col].astype('boolean')

    return df

def preparar_paradas(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    rename_map = {
        'TX_DESCRICAO': 'TX_DESCRICAO_PARADA',
        'FL_FLAGEXTERNA': 'FL_MOTIVO_EXTERNO_PARADA'
    }
    df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

    if 'CD_PARADA' in df.columns:
        df['CD_PARADA'] = df['CD_PARADA'].astype('string')
    if 'TX_DESCRICAO_PARADA' in df.columns:
        df['TX_DESCRICAO_PARADA'] = df['TX_DESCRICAO_PARADA'].astype('string')
    if 'FL_MOTIVO_EXTERNO_PARADA' in df.columns:
        df['FL_MOTIVO_EXTERNO_PARADA'] = df['FL_MOTIVO_EXTERNO_PARADA'].replace({1: True, 0: False, 'True': True, 'False': False}).astype('boolean')


    return df

In [8]:
df_tarefcon = preparar_tarefcon(df_tarefcon_raw)
df_tarefcon = df_tarefcon[sorted(df_tarefcon.columns)]
df_paradas = preparar_paradas(df_paradas_raw)
df_paradas_filtro = df_paradas[df_paradas['CD_PARADA'].isin(lista_paradas['CD_CODIGOPARADAOUCONV'])]

In [9]:
print('Quantidade de ID_pedidos ausentes e CD_OP ausentes juntos:',df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] == '-1')].shape[0])
print('Quantidade de ID_pedidos ausentes',df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1')].shape[0])
print('Quantidade de cd_op ausentes',df_tarefcon[(df_tarefcon['CD_OP'] == '-1')].shape[0])

Quantidade de ID_pedidos ausentes e CD_OP ausentes juntos: 65710
Quantidade de ID_pedidos ausentes 1284911
Quantidade de cd_op ausentes 65710


Ou seja, sempre que temos CD_OP = -1 tmbm temos id_pedido = -1, mas a reciproca não é verdadeira, nem sempre que temos ID_Pedidos ausente, quer dizer que temos CD_OP ausente

### 1.1.2 Vamos analisar quando o ID_PEDIDO é faltante mas existe o código CD_OP

In [10]:
df_tarefcon[['CD_OP','ID_PEDIDO','ID_ITEM']]

Unnamed: 0,CD_OP,ID_PEDIDO,ID_ITEM
0,598850-2/659652,598850-2,659652
1,598850-2/659652,598850-2,659652
2,598850-2/659652,598850-2,659652
3,598771-1/398692,598771-1,398692
4,-1,-1,-1
...,...,...,...
1604557,717003-6/350001,717003-6,350001
1604558,710976-28/798711,-1,-1
1604559,710976-28/798711,-1,-1
1604560,710976-28/798711,-1,-1


Ou seja, o CD_OP é formado pela string df['ID_PEDIDO] + "/" + df['ID_ITEM]

In [19]:
import pandas as pd

def corrigir_tarefcon_relacoes(df_tarefcon: pd.DataFrame) -> pd.DataFrame:
    """
    Corrige e infere relacionamentos de pedido/item/cliente na tabela TAREFCON.
    
    Regras:
      - Se CD_OP contém 'pedido/item' e ID_PEDIDO e ID_ITEM estão '-1',
        extrai os valores de CD_OP e preenche.
      - Atualiza CD_OP no formato 'ID_PEDIDO/ID_ITEM' (quando ambos válidos).
      - Propaga ID_IDCLIENTE a partir de combinações únicas (pedido + item).
    """

    df = df_tarefcon.copy()

    # === 1️⃣ Corrigir ID_PEDIDO e ID_ITEM com base em CD_OP ===
    mask = (
        (df['CD_OP'] != '-1')
        & (df['ID_PEDIDO'] == '-1')
        & (df['ID_ITEM'] == '-1')
    )

    # Separar CD_OP no formato PEDIDO/ITEM
    split_df = df.loc[mask, 'CD_OP'].astype(str).str.split('/', expand=True)

    if not split_df.empty and split_df.shape[1] >= 2:
        df.loc[mask, 'ID_PEDIDO'] = split_df[0]
        df.loc[mask, 'ID_ITEM'] = split_df[1]

    # === 2️⃣ Atualizar CD_OP para refletir o formato correto ===
    mask_op_valid = df['CD_OP'] != '-1'
    df.loc[mask_op_valid, 'CD_OP'] = (
        df['ID_PEDIDO'].astype(str) + '/' + df['ID_ITEM'].astype(str)
    )

    # === 3️⃣ Mapear cliente por (ID_PEDIDO, ID_ITEM) ===
    map_clientes = (
        df.loc[df['ID_IDCLIENTE'] != -1, ['ID_PEDIDO', 'ID_ITEM', 'ID_IDCLIENTE']]
        .drop_duplicates(subset=['ID_PEDIDO', 'ID_ITEM'])
        .set_index(['ID_PEDIDO', 'ID_ITEM'])['ID_IDCLIENTE']
        .to_dict()
    )

    # === 4️⃣ Preencher ID_IDCLIENTE ausentes com base no mapa ===
    df['ID_IDCLIENTE'] = df.apply(
        lambda row: map_clientes.get((row['ID_PEDIDO'], row['ID_ITEM']), row['ID_IDCLIENTE']),
        axis=1
    )

    return df
df_tarefcon_corrigido = corrigir_tarefcon_relacoes(df_tarefcon)


In [12]:
df_tarefcon[['CD_OP','ID_PEDIDO','ID_ITEM','ID_IDCLIENTE']]

Unnamed: 0,CD_OP,ID_PEDIDO,ID_ITEM,ID_IDCLIENTE
0,598850-2/659652,598850-2,659652,1428
1,598850-2/659652,598850-2,659652,1428
2,598850-2/659652,598850-2,659652,1428
3,598771-1/398692,598771-1,398692,9758
4,-1,-1,-1,-1
...,...,...,...,...
1604557,717003-6/350001,717003-6,350001,1774
1604558,710976-28/798711,710976-28,798711,-1
1604559,710976-28/798711,710976-28,798711,-1
1604560,710976-28/798711,710976-28,798711,-1


In [14]:
df_tarefcon[['CD_OP','ID_PEDIDO','ID_ITEM','ID_IDCLIENTE']]

Unnamed: 0,CD_OP,ID_PEDIDO,ID_ITEM,ID_IDCLIENTE
0,598850-2/659652,598850-2,659652,1428
1,598850-2/659652,598850-2,659652,1428
2,598850-2/659652,598850-2,659652,1428
3,598771-1/398692,598771-1,398692,9758
4,-1,-1,-1,-1
...,...,...,...,...
1604557,717003-6/350001,717003-6,350001,-1
1604558,710976-28/798711,710976-28,798711,13854
1604559,710976-28/798711,710976-28,798711,13854
1604560,710976-28/798711,710976-28,798711,13854


### Tarefcon x Itens

In [15]:

df_itens = pd.read_parquet('../../data/02 - trusted/parquet/tb_itens.parquet')


In [17]:
df_itens = df_itens[sorted(df_itens.columns)]

In [18]:
df_itens.columns

Index(['BL_AMARRADO', 'CD_CODIGOREFERENCIA', 'CD_COMPOSICAO', 'CD_TIPOABNT',
       'COMMPALETEFECHADO', 'FL_ESPELHO', 'FL_EXIGELAUDO', 'FL_FILME',
       'FL_LAPINTERNO', 'FL_LAPNOCOMP', 'FL_PALETIZADO', 'FL_REFILADO',
       'FL_RESINAINTERNA', 'ID_FACA', 'ID_IDCLIENTE', 'ID_IDFAMILIA',
       'ID_IDPALETE', 'ID_IDTIPOFT2', 'ID_ITEM', 'QT_ARRANJO', 'QT_NRCORES',
       'QT_PACOTESPORPALETE', 'QT_PECASPORPACOTE', 'QT_PECASPORPALETE',
       'QT_UNIDADESPORPALETE', 'ST_ESTADOFT_DETEC', 'ST_STATUSFT', 'TX_COR1',
       'TX_COR2', 'TX_COR3', 'TX_COR4', 'TX_PATHFIGURADOLASTRO',
       'TX_REFERENCIA', 'TX_TEXTOESTADOFT_DETEC', 'TX_TEXTOSTATUSFT',
       'VL_ALTURAINTERNA', 'VL_ALTURAPACOTE', 'VL_ALTURAPALETEFECHADO',
       'VL_AREABRUTACHAPA', 'VL_AREABRUTAPECA', 'VL_AREABRUTAPECACOMREFILOS',
       'VL_AREALIQUIDACHAPA', 'VL_AREALIQUIDAPECA', 'VL_COBBINTMAXIMO',
       'VL_COLUNAMINIMO', 'VL_COMPPACOTE', 'VL_COMPPECA', 'VL_COMPRESSAO',
       'VL_COMPRIMENTO', 'VL_COMPRIMENTOINTERNO', '

In [135]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] != '-1')]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA


In [90]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] != '-1')].CD_TURMA.value_counts()

CD_TURMA
C    413029
B    409127
A    397045
Name: count, dtype: Int64

In [None]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] != '-1')].FL_FLAGPARADA.value_counts()

Temos evidências de que sempre que há ausencia de ID_PEDIDO E POSSUI CÓDIGO DE OP, é um parada... Mas sempre que é uma parada tmbm tem esse comportamento?

In [66]:
df_tarefcon[(df_tarefcon['FL_FLAGPARADA'] == True)].shape[0]

1284911

In [83]:
df_tarefcon[ (df_tarefcon['CD_CODIGOPARADAOUCONV'] != '-1')].shape[0]

1284911

Já vimos que é diferente, ou seja, nem toda parada tem ausencia de id_pedido e codigo op

In [84]:
df_tarefcon[(df_tarefcon['CD_CODIGOPARADAOUCONV'] != '-1') & (df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] == '-1')].FL_FLAGPARADA.value_counts()

FL_FLAGPARADA
True    65710
Name: count, dtype: Int64

In [85]:
df_tarefcon[(df_tarefcon['CD_CODIGOPARADAOUCONV'] != '-1') & (df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] == '-1')]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,149,0.00,0.00
39,138,-1,ACB,-1,-1,0.0,B,2022-07-08,2022-07-08,2022-07-08,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,164,0.00,0.00
69,138,-1,ACB,-1,-1,0.0,B,2022-07-22,2022-07-22,2022-07-22,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,325,0.00,0.00
86,139,-1,ACB,-1,-1,0.0,C,2022-08-05,2022-08-05,2022-08-05,True,False,False,-1,-1,-1,631.0,0,0,0,0,Manual padrão,0.00,150,0.00,0.00
120,107,-1,ACB,-1,-1,0.0,A,2022-01-18,2022-01-18,2022-01-18,True,False,False,-1,-1,-1,87.0,0,0,0,0,Manual padrão,0.00,40,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1603875,1,-1,WAR3,-1,-1,1.0,B,2025-08-18,2025-08-18,2025-08-18,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,20,0.00,0.00
1603910,1,-1,WAR3,-1,-1,1.0,C,2025-08-23,2025-08-24,2025-08-24,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,26,0.00,0.00
1604332,1,-1,WAR3,-1,-1,1.0,C,2025-01-31,2025-02-01,2025-02-01,True,False,False,-1,-1,-1,223.0,0,0,0,0,Online CLP,0.00,10,0.00,0.00
1604423,1,-1,WAR3,-1,-1,1.0,B,2025-08-09,2025-08-09,2025-08-09,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,21,0.00,0.00


In [77]:
lista_cds_parada_com_op_sem_pedido = df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] != '-1')].CD_CODIGOPARADAOUCONV.unique().tolist()

In [78]:
len(lista_cds_parada_com_op_sem_pedido)

39

In [88]:
lista_cds_parada_sem_op_sem_pedido = df_tarefcon[(df_tarefcon['CD_CODIGOPARADAOUCONV'] != '-1') & (df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['CD_OP'] == '-1')].CD_CODIGOPARADAOUCONV.unique().tolist()

In [89]:
len(lista_cds_parada_sem_op_sem_pedido)

39

In [57]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == '-1') & (df_tarefcon['ID_ITEM'] == '-1')]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,149,0.00,0.00
7,1,-1,ACB,599653-1/804302,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,10,0.00,0.00
8,127,-1,ACB,599653-1/804302,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,10,0.00,0.00
9,107,-1,ACB,599653-1/804302,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191.0,0,0,0,0,Manual padrão,0.00,40,0.00,0.00
11,116,-1,ACB,599804-2/659652,-1,0.0,C,2022-06-27,2022-06-28,2022-06-28,True,False,False,-1,-1,-1,429.0,0,0,0,0,Manual padrão,0.00,10,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604555,146,-1,WAR3,713455-3/723140,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,10,0.00,0.00
1604558,130,-1,WAR3,710976-28/798711,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,2,0.00,0.00
1604559,140,-1,WAR3,710976-28/798711,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,1,0.00,0.00
1604560,140,-1,WAR3,710976-28/798711,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445.0,0,0,0,0,Online CLP,0.00,2,0.00,0.00


Os que tem ID_pedido ausente 300k tem id_item e id_cliente

In [98]:
df_tarefcon[(df_tarefcon['ID_IDCLIENTE'] !='-1') & (df_tarefcon['ID_ITEM'] !='-1')]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
0,-1,6111-2,ACB,598850-2/659652,-1,0.0,C,2022-06-20,2022-06-21,2022-06-20,False,True,False,1428.0,659652,598850-2,191.0,800,0,800,800,Manual padrão,1.00,180,3161.00,364.00
1,-1,6111-2,ACB,598850-2/659652,-1,0.0,C,2022-06-21,2022-06-21,2022-06-21,False,True,False,1428.0,659652,598850-2,191.0,700,0,700,700,Manual padrão,1.00,60,2338.00,364.00
2,-1,6111-2,ACB,598850-2/659652,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,False,True,False,1428.0,659652,598850-2,630.0,2100,0,2100,2365,Manual padrão,1.00,80,572.00,364.00
3,-1,4756,ACB,598771-1/398692,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,False,True,False,9758.0,398692,598771-1,191.0,14650,0,14650,15596,Manual padrão,1.00,251,3773.00,475.00
5,-1,6111-2,ACB,599653-1/804302,PRD054144/Várias FTs,0.0,B,2022-06-27,2022-06-27,2022-06-27,False,True,False,8399.0,804302,599653-1,191.0,3300,0,3300,3300,Manual padrão,1.00,480,2395.00,353.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604524,-1,3332-1,WAR3,718088-1/822811,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,False,True,False,11124.0,822811,718088-1,1012.0,1847,81,5500,5582,Online CLP,3.00,31,0.00,458.00
1604530,-1,3332-1,WAR3,716925-1/868031,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,False,True,False,9292.0,868031,716925-1,1012.0,10026,69,30000,31194,Online CLP,3.00,92,155.00,458.00
1604551,-1,8455,WAR3,713455-3/723140,-1,1.0,C,2025-07-30,2025-07-31,2025-07-30,False,True,False,1792.0,723140,713455-3,1012.0,14334,50,28147,26634,Online CLP,2.00,215,194.00,603.00
1604556,-1,7585-1,WAR3,717101-3/791791,PRD099451/791791,1.0,C,2025-07-30,2025-07-31,2025-07-31,False,True,False,13602.0,791791,717101-3,445.0,7000,75,21000,21000,Online CLP,3.00,86,99.00,481.00


In [22]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_SKIPFEED'] == False)]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,149,0.0,0.0
7,1,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
8,127,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
9,107,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,40,0.0,0.0
11,116,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-28,2022-06-28,True,False,False,-1,-1,-1,429,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604555,146,-1,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,10,0.0,0.0
1604558,130,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0
1604559,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,1,0.0,0.0
1604560,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0


In [29]:
display(df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True)].head())
print(df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True)].shape[0]/df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1)].shape[0])

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,149,0.0,0.0
7,1,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
8,127,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
9,107,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,40,0.0,0.0
11,116,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-28,2022-06-28,True,False,False,-1,-1,-1,429,0,0,0,0,Manual padrão,0.0,10,0.0,0.0


0.9812016237911675


98% dos que não possuem id_pedido e nem id_item, está com parada

Vamos analisar quantos dados desses possuem id_cliente

In [33]:
display(df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True) & (df_tarefcon['ID_IDCLIENTE'] == -1)].head())
print(df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True) & (df_tarefcon['ID_IDCLIENTE'] == -1)].shape[0]/df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True)].shape[0])

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,149,0.0,0.0
7,1,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
8,127,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
9,107,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,40,0.0,0.0
11,116,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-28,2022-06-28,True,False,False,-1,-1,-1,429,0,0,0,0,Manual padrão,0.0,10,0.0,0.0


1.0


100% dos dados que não possuem id_pedido, nem id_item e paradas, não possuem id_cliente

Mas será se todos os dados que n possuem ID_Cliente possui parada?

In [40]:
df_tarefcon[(df_tarefcon['ID_IDCLIENTE'] == -1)].shape[0]

1284911

In [None]:
df_tarefcon[df_tarefcon['FL_FLAGPARADA'] == True].shape[0]

1284911

In [41]:
df_tarefcon[(df_tarefcon['FL_FLAGPARADA'] == True) & (df_tarefcon['ID_ITEM'] == -1)  & (df_tarefcon['ID_IDCLIENTE'] == -1)].shape[0]

1284911

Ou seja, todos as informações que possuem ID_CLIENTE AUSENTE, é parada, ou seja tmbm nao tem id_pedido.., id_item..

Agora vamos analisar aquele registros que n tem id_pedido, mas tem id_cliente

In [44]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_IDCLIENTE'] != -1)].shape[0]

319651

In [46]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_IDCLIENTE'] != -1) & (df_tarefcon['FL_REPROGRAMACAO'] == True)].shape[0]

312919

In [47]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_IDCLIENTE'] != -1) & (df_tarefcon['FL_REPROGRAMACAO'] == True)].shape[0]/df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_IDCLIENTE'] != -1)].shape[0]

0.9789395309259161

98% dos que tem id_cliente e n tem id_pedido é reporgramação

In [48]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_IDCLIENTE'] != -1) & (df_tarefcon['FL_REPROGRAMACAO'] == True)]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
0,-1,-1,ACB,-1,-1,0.0,C,2022-06-20,2022-06-21,2022-06-20,False,True,False,1428,659652,-1,191,800,0,800,800,Manual padrão,1.0,180,3161.0,364.0
1,-1,-1,ACB,-1,-1,0.0,C,2022-06-21,2022-06-21,2022-06-21,False,True,False,1428,659652,-1,191,700,0,700,700,Manual padrão,1.0,60,2338.0,364.0
2,-1,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,False,True,False,1428,659652,-1,630,2100,0,2100,2365,Manual padrão,1.0,80,572.0,364.0
3,-1,4756,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,False,True,False,9758,398692,-1,191,14650,0,14650,15596,Manual padrão,1.0,251,3773.0,475.0
5,-1,-1,ACB,-1,-1,0.0,B,2022-06-27,2022-06-27,2022-06-27,False,True,False,8399,804302,-1,191,3300,0,3300,3300,Manual padrão,1.0,480,2395.0,353.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604524,-1,-1,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,False,True,False,11124,822811,-1,1012,1847,81,5500,5582,Online CLP,3.0,31,0.0,458.0
1604530,-1,-1,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,False,True,False,9292,868031,-1,1012,10026,69,30000,31194,Online CLP,3.0,92,155.0,458.0
1604551,-1,8455,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-31,2025-07-30,False,True,False,1792,723140,-1,1012,14334,50,28147,26634,Online CLP,2.0,215,194.0,603.0
1604556,-1,-1,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-31,2025-07-31,False,True,False,13602,791791,-1,445,7000,75,21000,21000,Online CLP,3.0,86,99.0,481.0


In [49]:
df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_IDCLIENTE'] != -1) & (df_tarefcon['FL_REPROGRAMACAO'] == False)]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
78,-1,-1,ACB,-1,-1,0.0,A,2022-08-04,2022-08-04,2022-08-04,False,False,False,11572,788942,-1,247,1,0,1,2160,Manual padrão,1.0,480,523.0,356.0
378,-1,-1,ACB,-1,-1,0.0,B,2023-05-11,2023-05-11,2023-05-11,False,False,False,8399,804302,-1,401,4500,0,4500,4998,Manual padrão,1.0,168,1209.0,364.0
396,-1,-1,ACB,-1,-1,0.0,A,2023-06-09,2023-06-09,2023-06-09,False,False,False,1428,659652,-1,247,3600,0,3600,3700,Manual padrão,1.0,60,0.0,365.0
507,-1,-1,ACB,-1,-1,0.0,A,2022-10-27,2022-10-27,2022-10-27,False,False,False,8399,804302,-1,59,2700,0,2700,2796,Manual padrão,1.0,195,676.0,371.0
509,-1,-1,ACB,-1,-1,0.0,A,2022-10-27,2022-10-27,2022-10-27,False,False,False,1867,822092,-1,59,3200,0,3200,3192,Manual padrão,1.0,195,772.0,371.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1600935,-1,7927,WAR3,-1,-1,1.0,C,2025-01-29,2025-01-30,2025-01-30,False,False,False,10555,817980,-1,1012,944,172,3750,3704,Online CLP,4.0,31,27.0,419.0
1602149,-1,6844,WAR3,-1,-1,1.0,C,2025-08-27,2025-08-28,2025-08-28,False,False,False,13854,798710,-1,1096,2775,20,8320,8328,Online CLP,3.0,62,49.0,415.0
1602490,-1,5618,WAR3,-1,-1,1.0,C,2025-02-03,2025-02-04,2025-02-03,False,False,False,5465,622630,-1,1012,4576,44,18040,17500,Online CLP,4.0,109,65.0,400.0
1602847,-1,4121,WAR3,-1,-1,1.0,C,2025-07-24,2025-07-25,2025-07-25,False,False,False,9758,864041,-1,1012,916,0,3512,2400,Online CLP,4.0,12,5.0,446.0


In [55]:
df_tarefcon[(df_tarefcon['ID_IDCLIENTE'] == 11572) & (df_tarefcon['ID_ITEM'] == 788942) & (df_tarefcon['DT_DIADATURMA'] >= '2022-08-01')  ].sort_values(by=['DT_DIADATURMA'])

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
85339,-1,-1,CUR2,-1,-1,1.0,C,2022-08-01,2022-08-02,2022-08-02,False,True,False,11572,788942,-1,80,31,186,180,28080,Online CLP,6.0,51,146.0,369.0
85938,-1,-1,CUR2,-1,-1,1.0,A,2022-08-02,2022-08-02,2022-08-02,False,True,False,11572,788942,-1,188,5229,0,31351,27900,Online CLP,6.0,441,50.0,369.0
78,-1,-1,ACB,-1,-1,0.0,A,2022-08-04,2022-08-04,2022-08-04,False,False,False,11572,788942,-1,247,1,0,1,2160,Manual padrão,1.0,480,523.0,356.0
86379,-1,-1,CUR2,-1,-1,1.0,C,2022-08-10,2022-08-11,2022-08-11,False,True,False,11572,788942,-1,90,11,66,66,7020,Online CLP,6.0,18,0.0,369.0
85951,-1,-1,CUR2,-1,-1,1.0,C,2022-08-10,2022-08-11,2022-08-11,False,True,False,11572,788942,-1,90,1330,38,7980,15000,Online CLP,6.0,101,87.0,369.0
86559,-1,-1,CUR2,-1,-1,1.0,A,2022-08-11,2022-08-11,2022-08-11,False,False,False,11572,788942,-1,90,155,1,930,2160,Online CLP,6.0,2,10.0,369.0
86558,-1,-1,CUR2,-1,-1,1.0,A,2022-08-11,2022-08-11,2022-08-11,False,True,False,11572,788942,-1,90,2400,18,14400,15000,Online CLP,6.0,105,34.0,369.0
86545,-1,-1,CUR2,-1,-1,1.0,A,2022-08-11,2022-08-11,2022-08-11,False,True,False,11572,788942,-1,90,2008,20,12000,11046,Online CLP,6.0,96,14.0,369.0
86380,-1,-1,CUR2,-1,-1,1.0,A,2022-08-11,2022-08-11,2022-08-11,False,True,False,11572,788942,-1,90,1200,0,7200,6954,Online CLP,6.0,73,0.0,369.0
86381,-1,-1,CUR2,-1,-1,1.0,A,2022-08-11,2022-08-11,2022-08-11,False,True,False,11572,788942,-1,90,259,0,1554,12600,Online CLP,6.0,4,76.0,369.0


In [None]:
display(df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True)].head())
print(df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1) & (df_tarefcon['FL_FLAGPARADA'] == True)].shape[0]/df_tarefcon[(df_tarefcon['ID_PEDIDO'] == -1) & (df_tarefcon['ID_ITEM'] == -1)].shape[0])

In [7]:
df_tarefcon[df_tarefcon['CD_CODIGOPARADAOUCONV'] == '-1'].FL_REPROGRAMACAO.value_counts()

FL_REPROGRAMACAO
True     312919
False      6732
Name: count, dtype: Int64

Quando o código de parada é

In [24]:
df_tarefcon[df_tarefcon['CD_OP']== '-1']

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
0,-1,-1,ACB,-1,-1,0.0,C,2022-06-20,2022-06-21,2022-06-20,False,True,False,1428,659652,-1,191,800,0,800,800,Manual padrão,1.0,180,3161.0,364.0
1,-1,-1,ACB,-1,-1,0.0,C,2022-06-21,2022-06-21,2022-06-21,False,True,False,1428,659652,-1,191,700,0,700,700,Manual padrão,1.0,60,2338.0,364.0
2,-1,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,False,True,False,1428,659652,-1,630,2100,0,2100,2365,Manual padrão,1.0,80,572.0,364.0
3,-1,4756,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,False,True,False,9758,398692,-1,191,14650,0,14650,15596,Manual padrão,1.0,251,3773.0,475.0
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,149,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604557,-1,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,False,True,False,1774,350001,-1,445,4800,0,19200,19200,Online CLP,4.0,60,98.0,416.0
1604558,130,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0
1604559,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,1,0.0,0.0
1604560,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0


In [26]:
df_tarefcon[df_tarefcon['FL_REPROGRAMACAO'] == False]

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA
4,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,149,0.0,0.0
7,1,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
8,127,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
9,107,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,40,0.0,0.0
11,116,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-28,2022-06-28,True,False,False,-1,-1,-1,429,0,0,0,0,Manual padrão,0.0,10,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604555,146,-1,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,10,0.0,0.0
1604558,130,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0
1604559,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,1,0.0,0.0
1604560,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0


In [27]:
cds_paradas_taref = list(df_tarefcon.CD_CODIGOPARADAOUCONV.unique())

In [49]:
import pandas as pd
df_paradas = pd.read_parquet('../../data/02 - trusted/parquet/tb_paradas.parquet')



FL_FLAGEXTERNA = Flag indica que a parada é externa, ou seja, não relacionada ao produto.. Como refeição, manutenção..

In [50]:
df_paradas.rename(columns ={'TX_DESCRICAO':'TX_DESCRICAO_PARADA','FL_FLAGEXTERNA':'FL_MOTIVO_EXTERNO_PARADA'}, inplace =True)

In [51]:
print(df_paradas[df_paradas.CD_PARADA.isin(cds_paradas_taref)].FL_USADACONVERSAO.value_counts())
print(df_paradas[df_paradas.CD_PARADA.isin(cds_paradas_taref)].FL_DESATIVADA.value_counts())

FL_USADACONVERSAO
True    38
Name: count, dtype: Int64
FL_DESATIVADA
False    38
Name: count, dtype: Int64


Como todo o histórico que está sendo usado é True para USADACONVERSAO e False para desativada, podemos excluir

In [52]:
df_paradas.drop(columns = ['FL_DESATIVADA','FL_USADACONVERSAO'],inplace=True)

In [56]:
print(df_paradas[df_paradas.CD_PARADA.isin(cds_paradas_taref)].FL_MOTIVO_EXTERNO_PARADA.value_counts())

FL_MOTIVO_EXTERNO_PARADA
False    28
True     10
Name: count, dtype: Int64


In [57]:
lista_paradas_cds_tarefcon =pd.read_csv('lista_cds_tarefcon.csv')
lista_paradas_cds_tarefcon['CD_CODIGOPARADAOUCONV'] = lista_paradas_cds_tarefcon['CD_CODIGOPARADAOUCONV'].astype(str)
lista_paradas_cds_tarefcon = (lista_paradas_cds_tarefcon['CD_CODIGOPARADAOUCONV'].unique().tolist())

In [58]:
df_merge = df_tarefcon.merge(df_paradas, how='inner', left_on='CD_CODIGOPARADAOUCONV', right_on='CD_PARADA')

In [59]:
df_merge

Unnamed: 0,CD_CODIGOPARADAOUCONV,CD_FACA,CD_MAQUINA,CD_OP,CD_OPONDULADA,CD_ORIGEMREGISTRO,CD_TURMA,DT_DIADATURMA,DT_FIM,DT_INICIO,FL_FLAGPARADA,FL_REPROGRAMACAO,FL_SKIPFEED,ID_IDCLIENTE,ID_ITEM,ID_PEDIDO,ID_USUARIO,QT_CHAPASALIMENTADAS,QT_QUANTIDADEAJUSTE,QT_QUANTIDADEPRODUZIDA,QT_QUANTIDADEPROGRAMADA,TX_DESCORIGEMREGISTRO,VL_ARRANJO,VL_DURACAO,VL_DURACAOPREVISTA,VL_GRAMATURA,CD_PARADA,TX_DESCRICAO_PARADA,FL_MOTIVO_EXTERNO_PARADA
0,138,-1,ACB,-1,-1,0.0,B,2022-06-22,2022-06-22,2022-06-22,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,149,0.0,0.0,138,FALTA DE PROGRAMAÇÃO,True
1,1,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0,1,AJUSTE,False
2,127,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,10,0.0,0.0,127,GINÁSTICA LABORAL,True
3,107,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-27,2022-06-27,True,False,False,-1,-1,-1,191,0,0,0,0,Manual padrão,0.0,40,0.0,0.0,107,REFEIÇÃO,True
4,116,-1,ACB,-1,-1,0.0,C,2022-06-27,2022-06-28,2022-06-28,True,False,False,-1,-1,-1,429,0,0,0,0,Manual padrão,0.0,10,0.0,0.0,116,LIMPEZA GERAL,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1273265,146,-1,WAR3,-1,-1,1.0,C,2025-07-30,2025-07-30,2025-07-30,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,10,0.0,0.0,146,MANUTENÇÃO OPERACIONAL,False
1273266,130,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0,130,CRAVAMENTO NO EMPILHADOR,False
1273267,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,1,0.0,0.0,140,CRAVAMENTO NA ALIMENTAÇÃO,False
1273268,140,-1,WAR3,-1,-1,1.0,C,2025-08-04,2025-08-05,2025-08-05,True,False,False,-1,-1,-1,445,0,0,0,0,Online CLP,0.0,2,0.0,0.0,140,CRAVAMENTO NA ALIMENTAÇÃO,False


# 1. PREPARAÇÃO E LIMPEZA DOS DADOS

## 1.1 Verificar tipos de dados e completude

In [12]:
print(f"Total de registros: {len(df_merge):,}")
print(f"Período: {df_merge['DT_INICIO'].min()} até {df_merge['DT_FIM'].max()}")


Total de registros: 1,604,562
Período: 2021-12-27 00:00:00 até 2025-09-24 00:00:00


In [13]:
df_merge['DURACAO_CALCULADA'] = (
    (pd.to_datetime(df_merge['DT_FIM']) - pd.to_datetime(df_merge['DT_INICIO']))
    .dt.total_seconds() / 60
).round(2)

inconsistencias = (
    (df_merge['DURACAO_CALCULADA'] - df_merge['VL_DURACAO']).abs() > 1
).sum()

display(inconsistencias)

np.int64(1402448)

In [18]:
print(f"\nInconsistências temporais (>1 min diferença): {inconsistencias} ({inconsistencias/len(df_merge)*100:.2f}%)")


Inconsistências temporais (>1 min diferença): 1402448 (87.40%)


In [21]:
colunas_criticas = [
 'FL_REPROGRAMACAO',
 'ID_IDCLIENTE',
 'ID_ITEM',
 'ID_PEDIDO',
 'ID_USUARIO',
 'QT_CHAPASALIMENTADAS',
 'QT_QUANTIDADEAJUSTE',
 'QT_QUANTIDADEPRODUZIDA',
 'QT_QUANTIDADEPROGRAMADA',
 'TX_DESCORIGEMREGISTRO',
 'VL_ARRANJO',
 'VL_DURACAO',
 'VL_DURACAOPREVISTA',
 'VL_GRAMATURA',
 'CD_PARADA',
 'TX_DESCRICAO_PARADA',
 'FL_USADACONVERSAO',
 'FL_DESATIVADA',
 'FL_FLAGEXTERNA',
 'DURACAO_CALCULADA']

for col in colunas_criticas:
    missing = df_merge[col].isna().sum()
    print(f"  {col:30} | Missing: {missing:6,} ({missing/len(df_merge)*100:5.2f}%)")

  FL_REPROGRAMACAO               | Missing: 1,291,643 (80.50%)
  ID_IDCLIENTE                   | Missing: 1,284,911 (80.08%)
  ID_ITEM                        | Missing: 1,284,911 (80.08%)
  ID_PEDIDO                      | Missing: 1,284,911 (80.08%)
  ID_USUARIO                     | Missing: 85,304 ( 5.32%)
  QT_CHAPASALIMENTADAS           | Missing: 1,284,911 (80.08%)
  QT_QUANTIDADEAJUSTE            | Missing: 1,284,911 (80.08%)
  QT_QUANTIDADEPRODUZIDA         | Missing: 1,284,911 (80.08%)
  QT_QUANTIDADEPROGRAMADA        | Missing: 1,284,911 (80.08%)
  TX_DESCORIGEMREGISTRO          | Missing:      0 ( 0.00%)
  VL_ARRANJO                     | Missing: 1,284,911 (80.08%)
  VL_DURACAO                     | Missing:      0 ( 0.00%)
  VL_DURACAOPREVISTA             | Missing: 1,318,618 (82.18%)
  VL_GRAMATURA                   | Missing: 1,284,911 (80.08%)
  CD_PARADA                      | Missing: 331,292 (20.65%)
  TX_DESCRICAO_PARADA            | Missing: 331,292 (20.65%)
  FL_

In [1]:
df_merge.info()

NameError: name 'df_merge' is not defined

In [26]:
df_merge.CD_FACA.fillna(str(0))

0          6111-2
1          6111-2
2          6111-2
3            4756
4            <NA>
            ...  
1604557    6844-1
1604558      <NA>
1604559      <NA>
1604560      <NA>
1604561      <NA>
Name: CD_FACA, Length: 1604562, dtype: object

In [24]:
df_merge[df_merge['ID_PEDIDO'].isna()].isna().sum()

CD_CODIGOPARADAOUCONV            0
CD_FACA                          0
CD_MAQUINA                       0
CD_OP                            0
CD_OPONDULADA                    0
CD_ORIGEMREGISTRO                0
CD_TURMA                         0
DT_DIADATURMA                    0
DT_FIM                           0
DT_INICIO                        0
FL_FLAGPARADA                    0
FL_REPROGRAMACAO           1284911
FL_SKIPFEED                      0
ID_IDCLIENTE               1284911
ID_ITEM                    1284911
ID_PEDIDO                  1284911
ID_USUARIO                   62351
QT_CHAPASALIMENTADAS       1284911
QT_QUANTIDADEAJUSTE        1284911
QT_QUANTIDADEPRODUZIDA     1284911
QT_QUANTIDADEPROGRAMADA    1284911
TX_DESCORIGEMREGISTRO            0
VL_ARRANJO                 1284911
VL_DURACAO                       0
VL_DURACAOPREVISTA         1284911
VL_GRAMATURA               1284911
CD_PARADA                    11641
TX_DESCRICAO_PARADA          11641
FL_USADACONVERSAO   

In [None]:

# ========================================
# 2. ANÁLISE DESCRITIVA DE PARADAS
# ========================================

print("\n" + "="*80)
print("2. ANÁLISE DESCRITIVA DAS PARADAS")
print("-"*80)

# Filtrar apenas paradas
df_paradas = df_merge[df_merge['FL_FLAGPARADA'] == 1].copy()
df_producao = df_merge[df_merge['FL_FLAGPARADA'] == 0].copy()

print(f"\nDistribuição Produção vs Parada:")
print(f"  Eventos de Produção: {len(df_producao):,} ({len(df_producao)/len(df_merge)*100:.1f}%)")
print(f"  Eventos de Parada:   {len(df_paradas):,} ({len(df_paradas)/len(df_merge)*100:.1f}%)")

# Tempo total
tempo_total_producao = df_producao['VL_DURACAO'].sum()
tempo_total_parada = df_paradas['VL_DURACAO'].sum()
tempo_total = tempo_total_producao + tempo_total_parada

print(f"\nDistribuição Temporal:")
print(f"  Tempo em Produção: {tempo_total_producao:,.0f} min ({tempo_total_producao/tempo_total*100:.1f}%)")
print(f"  Tempo em Parada:   {tempo_total_parada:,.0f} min ({tempo_total_parada/tempo_total*100:.1f}%)")

# OEE simplificado (disponibilidade)
disponibilidade = tempo_total_producao / tempo_total * 100
print(f"\nDisponibilidade (OEE parcial): {disponibilidade:.2f}%")


2. ANÁLISE DESCRITIVA DAS PARADAS
--------------------------------------------------------------------------------

Distribuição Produção vs Parada:
  Eventos de Produção: 319,651 (19.9%)
  Eventos de Parada:   1,284,911 (80.1%)

Distribuição Temporal:
  Tempo em Produção: 23,178,823 min (63.3%)
  Tempo em Parada:   13,450,479 min (36.7%)

Disponibilidade (OEE parcial): 63.28%


In [None]:


# ========================================
# 3. ANÁLISE POR TIPO DE PARADA
# ========================================

print("\n" + "="*80)
print("3. RANKING DE PARADAS POR IMPACTO")
print("-"*80)

# Agregar por tipo de parada
paradas_agg = df_paradas.groupby('TX_DESCRICAO').agg({
    'VL_DURACAO': ['count', 'sum', 'mean', 'median', 'std', 'min', 'max'],
    'CD_MAQUINA': 'nunique',  # Quantas máquinas diferentes
    'FL_ORIGEMREGISTRO': lambda x: (x == 1).sum()  # Quantos automáticos
}).round(2)

paradas_agg.columns = ['Ocorrências', 'Tempo_Total_min', 'Tempo_Médio_min', 
                       'Tempo_Mediano_min', 'Desvio_Padrão', 'Tempo_Min', 
                       'Tempo_Max', 'Num_Maquinas', 'Apont_Automaticos']

# Calcular métricas adicionais
paradas_agg['Tempo_Total_h'] = paradas_agg['Tempo_Total_min'] / 60
paradas_agg['Pct_Tempo_Parada'] = (
    paradas_agg['Tempo_Total_min'] / tempo_total_parada * 100
).round(2)

paradas_agg['Coef_Variacao'] = (
    paradas_agg['Desvio_Padrão'] / paradas_agg['Tempo_Médio_min'] * 100
).round(2)

# Ordenar por impacto (tempo total)
paradas_agg = paradas_agg.sort_values('Tempo_Total_min', ascending=False)

# Calcular Pareto
paradas_agg['Tempo_Acum_Pct'] = (
    paradas_agg['Tempo_Total_min'].cumsum() / 
    paradas_agg['Tempo_Total_min'].sum() * 100
).round(2)

paradas_agg['Ocorrencias_Acum_Pct'] = (
    paradas_agg['Ocorrências'].cumsum() / 
    paradas_agg['Ocorrências'].sum() * 100
).round(2)

print("\nTOP 20 PARADAS POR TEMPO TOTAL:")
print(paradas_agg[['Ocorrências', 'Tempo_Total_h', 'Tempo_Médio_min', 
                   'Pct_Tempo_Parada', 'Tempo_Acum_Pct', 'Coef_Variacao']].head(20))

# Identificar paradas críticas (Pareto 80%)
paradas_criticas = paradas_agg[paradas_agg['Tempo_Acum_Pct'] <= 80]

print(f"\n{'='*80}")
print(f"PRINCÍPIO DE PARETO (Regra 80/20)")
print("-"*80)
print(f"\n{len(paradas_criticas)} tipos de parada ({len(paradas_criticas)/len(paradas_agg)*100:.1f}% do total)")
print(f"representam 80% do tempo total de parada")
print(f"\nParadas críticas que devem ser priorizadas:")
print(paradas_criticas[['Ocorrências', 'Tempo_Total_h', 'Tempo_Médio_min', 
                        'Pct_Tempo_Parada']].head(15))

ANÁLISE ESTATÍSTICA DE PARADAS - TX_DESCRICAO

1. QUALIDADE DOS DADOS
--------------------------------------------------------------------------------
Total de registros: 1,604,562
Período: 2021-12-27 00:00:00 até 2025-09-24 00:00:00

Inconsistências temporais (>1 min diferença): 1402448 (87.40%)

Completude das colunas críticas:
  CD_MAQUINA                     | Missing:      0 ( 0.00%)
  FL_FLAGPARADA                  | Missing:      0 ( 0.00%)
  TX_DESCRICAO                   | Missing: 331,292 (20.65%)
  VL_DURACAO                     | Missing:      0 ( 0.00%)
  CD_TURMA                       | Missing:      0 ( 0.00%)
  QT_QUANTIDADEPRODUZIDA         | Missing: 1,284,911 (80.08%)

2. ANÁLISE DESCRITIVA DAS PARADAS
--------------------------------------------------------------------------------

Distribuição Produção vs Parada:
  Eventos de Produção: 319,651 (19.9%)
  Eventos de Parada:   1,284,911 (80.1%)

Distribuição Temporal:
  Tempo em Produção: 23,178,823 min (63.3%)
  Temp

KeyError: "Column(s) ['FL_ORIGEMREGISTRO'] do not exist"