## Utilizando as informações de torques

No nosso principal modelo (que está no notebook main.py), não utilizamos algumas features, como os valores de torques. Aqui nesse notebook, estamos pensando e testando manipulações desses dados a fim de que seja possível usar essas features.

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [20]:
df_resultados = pd.read_csv("./data/df_resultados_full.csv")
df_merge = pd.read_csv('./data/Merge_Falhas_Resultados.csv')

In [5]:
df_resultados.head()

Unnamed: 0,KNR,NAME,ID,STATUS,UNIT,VALUE_ID,VALUE,DATA
0,2023-2056234,42000,1,10,Deg,5.0,15601 ...,2024-05-16 13:21:03
1,2023-2056234,42000,1,10,mm,3.0,1.69 ...,2024-05-16 13:21:03
2,2023-2056234,42000,1,10,mm,2.0,1.98 ...,2024-05-16 13:21:03
3,2023-2056234,42000,1,10,mm,1.0,3.67 ...,2024-05-16 13:21:03
4,2023-2056234,42000,1,10,Nm,4.0,2.93 ...,2024-05-16 13:21:03


In [6]:

#mostrar dados que tem o knr = 2023-2056234
df_resultados[df_resultados["KNR"] == "2023-2056234"]

Unnamed: 0,KNR,NAME,ID,STATUS,UNIT,VALUE_ID,VALUE,DATA
0,2023-2056234,042000,1,10,Deg,5.0,15601 ...,2024-05-16 13:21:03
1,2023-2056234,042000,1,10,mm,3.0,1.69 ...,2024-05-16 13:21:03
2,2023-2056234,042000,1,10,mm,2.0,1.98 ...,2024-05-16 13:21:03
3,2023-2056234,042000,1,10,mm,1.0,3.67 ...,2024-05-16 13:21:03
4,2023-2056234,042000,1,10,Nm,4.0,2.93 ...,2024-05-16 13:21:03
...,...,...,...,...,...,...,...,...
841,2023-2056234,SECTION_ALS_00000002,718,10,%,21.0,-2.23 ...,2024-05-20 21:10:52
842,2023-2056234,SECTION_ALS_00000002,718,10,%,19.0,-2.33 ...,2024-05-16 17:02:05
843,2023-2056234,SECTION_ALS_00000002,718,10,%,19.0,-2.44 ...,2024-05-20 21:10:52
844,2023-2056234,SECTION_ALS_00000002,718,10,%,21.0,-2.5 ...,2024-05-16 17:02:05


In [7]:
#mostrar quantas categorias temos na coluna unit
df_resultados["UNIT"].value_counts()

UNIT
Grad          19377844
Nm            19365690
min             856386
%               451572
mm              101706
\u00B0           95105
Clicks           94930
                 93671
kg               56490
V                53502
Deg              33902
Unnamed: 5           2
Name: count, dtype: int64

In [8]:
#retirar dados que tem UNIT  == '          '
df_resultados = df_resultados[df_resultados["UNIT"] != '          ']

#retirar os espaços vazios após os nomes das categorias
df_resultados["UNIT"] = df_resultados["UNIT"].str.strip()

In [9]:
#mostrar quantas categorias temos na coluna unit
df_resultados["UNIT"].unique()

array(['Deg', 'mm', 'Nm', 'Grad', 'Clicks', 'min', '\\u00B0', '%', 'V',
       'Unnamed: 5', 'kg'], dtype=object)

In [10]:
df_resultados.info()

<class 'pandas.core.frame.DataFrame'>
Index: 40487129 entries, 0 to 40580799
Data columns (total 8 columns):
 #   Column    Dtype  
---  ------    -----  
 0   KNR       object 
 1   NAME      object 
 2   ID        int64  
 3   STATUS    int64  
 4   UNIT      object 
 5   VALUE_ID  float64
 6   VALUE     object 
 7   DATA      object 
dtypes: float64(1), int64(2), object(5)
memory usage: 2.7+ GB


In [11]:
# Remover espaços extras e substituir valores não numéricos por NaN
df_resultados["VALUE"] = df_resultados["VALUE"].str.strip()  # Remove espaços em branco
df_resultados["VALUE"] = df_resultados["VALUE"].replace('', pd.NA)  # Substitui strings vazias por NaN
df_resultados["VALUE"] = df_resultados["VALUE"].str.replace(',', '.', regex=False)  # Substitui vírgulas por pontos

# Convertendo para float, erros serão convertidos para NaN
df_resultados["VALUE"] = pd.to_numeric(df_resultados["VALUE"], errors='coerce')

In [12]:
#Descobrir quantos Nan temos na coluna VALUE
df_resultados["VALUE"].isna().sum()

15844

In [13]:
#Tirar linhas que tem Nan na coluna VALUE
df_resultados = df_resultados.dropna(subset=["VALUE"])

In [14]:
# Agrupar por KNR e UNIT, e calcule a média dos VALUES
df_grouped = df_resultados.groupby(['KNR', 'UNIT'])['VALUE'].mean().reset_index()

# Criar um DataFrame com um índice de KNR e as colunas das unidades
df_pivot = df_grouped.pivot(index='KNR', columns='UNIT', values='VALUE').reset_index()

# Preencher os valores NaN com 0
df_pivot = df_pivot.fillna(0)
df_pivot.head()

UNIT,KNR,%,Clicks,Deg,Grad,Nm,V,\u00B0,kg,min,mm
0,2023-2016173,0.0,0.0,0.0,59.666667,6.606667,12.357,0.0,0.0,0.0,0.0
1,2023-2026098,0.0,0.0,0.0,104.25,8.29,12.848,0.0,0.0,0.0,0.0
2,2023-2026162,0.0,0.0,0.0,74.5,8.3275,12.871,0.0,0.0,0.0,0.0
3,2023-2026175,0.0,0.0,0.0,98.25,8.3275,12.489,0.0,0.0,0.0,0.0
4,2023-2026215,0.0,0.0,0.0,67.75,8.3275,12.737,0.0,0.0,0.0,0.0


In [15]:
#Deixando todas as letras em maiúsculo para padronização
columns = df_pivot[['Clicks', 'Deg', 'Grad', 'Nm', 'V', 'kg', 'min', 'mm']]
df_pivot.columns = df_pivot.columns.str.upper()

df_pivot.head()

UNIT,KNR,%,CLICKS,DEG,GRAD,NM,V,\U00B0,KG,MIN,MM
0,2023-2016173,0.0,0.0,0.0,59.666667,6.606667,12.357,0.0,0.0,0.0,0.0
1,2023-2026098,0.0,0.0,0.0,104.25,8.29,12.848,0.0,0.0,0.0,0.0
2,2023-2026162,0.0,0.0,0.0,74.5,8.3275,12.871,0.0,0.0,0.0,0.0
3,2023-2026175,0.0,0.0,0.0,98.25,8.3275,12.489,0.0,0.0,0.0,0.0
4,2023-2026215,0.0,0.0,0.0,67.75,8.3275,12.737,0.0,0.0,0.0,0.0


In [16]:
# Listar todas as colunas
print("Todas as colunas:", df_pivot.columns.tolist())

Todas as colunas: ['KNR', '%', 'CLICKS', 'DEG', 'GRAD', 'NM', 'V', '\\U00B0', 'KG', 'MIN', 'MM']


In [17]:
# Colunas a serem excluídas da normalização
columns_to_exclude = ['KNR', 'UNIT']

# Colunas a serem normalizadas
columns_to_normalize = df_pivot.columns.difference(columns_to_exclude)

# Verifique se as colunas a serem normalizadas são numéricas
numeric_columns = df_pivot[columns_to_normalize].select_dtypes(include=[float, int]).columns

# Normalizar as colunas
scaler = MinMaxScaler()
df_pivot[numeric_columns] = scaler.fit_transform(df_pivot[numeric_columns])

df_pivot.head()

UNIT,KNR,%,CLICKS,DEG,GRAD,NM,V,\U00B0,KG,MIN,MM
0,2023-2016173,0.616167,0.0,0.0,0.264989,0.074849,0.884665,0.470523,0.0,0.811406,0.0
1,2023-2026098,0.616167,0.0,0.0,0.46299,0.09392,0.919817,0.470523,0.0,0.811406,0.0
2,2023-2026162,0.616167,0.0,0.0,0.330866,0.094345,0.921463,0.470523,0.0,0.811406,0.0
3,2023-2026175,0.616167,0.0,0.0,0.436343,0.094345,0.894115,0.470523,0.0,0.811406,0.0
4,2023-2026215,0.616167,0.0,0.0,0.300888,0.094345,0.91187,0.470523,0.0,0.811406,0.0


In [22]:
# Mergear a merge_falhas_resultados com a df_pivot

df_final = df_pivot.merge(df_merge, on='KNR', how='left')
df_final.head()

Unnamed: 0,KNR,%,CLICKS,DEG,GRAD,NM,V,\U00B0,KG,MIN,...,COR_6K6K,COR_6KA1,COR_6UA1,COR_A1A1,COR_K2A1,COR_K2K2,MOTOR_CWL,MOTOR_CWS,MOTOR_DHS,MOTOR_DRP
0,2023-2016173,0.616167,0.0,0.0,0.264989,0.074849,0.884665,0.470523,0.0,0.811406,...,,,,,,,,,,
1,2023-2026098,0.616167,0.0,0.0,0.46299,0.09392,0.919817,0.470523,0.0,0.811406,...,,,,,,,,,,
2,2023-2026162,0.616167,0.0,0.0,0.330866,0.094345,0.921463,0.470523,0.0,0.811406,...,,,,,,,,,,
3,2023-2026175,0.616167,0.0,0.0,0.436343,0.094345,0.894115,0.470523,0.0,0.811406,...,,,,,,,,,,
4,2023-2026215,0.616167,0.0,0.0,0.300888,0.094345,0.91187,0.470523,0.0,0.811406,...,,,,,,,,,,


In [24]:
df_final.columns

Index(['KNR', '%', 'CLICKS', 'DEG', 'GRAD', 'NM', 'V', '\U00B0', 'KG', 'MIN',
       'MM', 'ID1NAME', 'ID1SOK', 'ID1SNOK', 'ID1DATA', 'ID2NAME', 'ID2SOK',
       'ID2SNOK', 'ID2DATA', 'ID718NAME', 'ID718SOK', 'ID718SNOK', 'ID718DATA',
       'S_GROUP_ID_-2', 'S_GROUP_ID_1', 'S_GROUP_ID_2', 'S_GROUP_ID_4',
       'S_GROUP_ID_5', 'S_GROUP_ID_133', 'S_GROUP_ID_137', 'S_GROUP_ID_140',
       'S_GROUP_ID_9830946', 'COR_0Q0Q', 'COR_0QA1', 'COR_2R2R', 'COR_2RA1',
       'COR_5T5T', 'COR_5TA1', 'COR_6K6K', 'COR_6KA1', 'COR_6UA1', 'COR_A1A1',
       'COR_K2A1', 'COR_K2K2', 'MOTOR_CWL', 'MOTOR_CWS', 'MOTOR_DHS',
       'MOTOR_DRP'],
      dtype='object')

In [25]:
#drop do que não será utilizado

df_final = df_final.drop(columns = ['COR_0Q0Q', 'COR_0QA1', 'COR_2R2R', 'COR_2RA1',
       'COR_5T5T', 'COR_5TA1', 'COR_6K6K', 'COR_6KA1', 'COR_6UA1', 'COR_A1A1',
       'COR_K2A1', 'COR_K2K2', 'MOTOR_CWL', 'MOTOR_CWS', 'MOTOR_DHS',
       'MOTOR_DRP', 'S_GROUP_ID_-2'])

In [28]:
df_final.columns

Index(['KNR', '%', 'CLICKS', 'DEG', 'GRAD', 'NM', 'V', '\U00B0', 'KG', 'MIN',
       'MM', 'ID1NAME', 'ID1SOK', 'ID1SNOK', 'ID1DATA', 'ID2NAME', 'ID2SOK',
       'ID2SNOK', 'ID2DATA', 'ID718NAME', 'ID718SOK', 'ID718SNOK', 'ID718DATA',
       'S_GROUP_ID_1', 'S_GROUP_ID_2', 'S_GROUP_ID_4', 'S_GROUP_ID_5',
       'S_GROUP_ID_133', 'S_GROUP_ID_137', 'S_GROUP_ID_140',
       'S_GROUP_ID_9830946'],
      dtype='object')

In [27]:
df_final.shape

(49191, 31)

In [None]:
#exportar csv
#Exportando o DataFrame final para um arquivo CSV
df_final.to_csv('../data/df_torques_falhas.csv', index=False)
