In [2]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

import pandas as pd
import os
from src.preprocessing import load_data, clean_data, feature_engineering, scale_and_split
from src.config import RAW_DATA_PATH, PROCESSED_DATA_PATH
from src.logger import logger
from src.utils import validar_dataframe

pd.set_option('display.max_columns', None)


In [3]:
df = load_data(RAW_DATA_PATH)
logger.info("Datos crudos cargados.")
validar_dataframe(df)
df.head()


2025-04-02 22:22:28,400 [INFO] Datos crudos cargados.
2025-04-02 22:22:28,404 [INFO] Dataset validado correctamente.


Unnamed: 0,timestamp,temperatura,vibracion,presion,falla
0,2024-01-01 00:00:00,72.48,2.79,28.64,0
1,2024-01-01 01:00:00,69.31,2.77,29.39,0
2,2024-01-01 02:00:00,73.24,2.1,28.81,0
3,2024-01-01 03:00:00,77.62,2.83,30.22,0
4,2024-01-01 04:00:00,68.83,3.37,32.39,0


In [4]:
df = clean_data(df)
logger.info("Datos limpios (sin filas nulas).")

df = feature_engineering(df)
logger.info("Feature engineering aplicado.")
df.head()


2025-04-02 22:22:37,015 [INFO] Datos limpios (sin filas nulas).
2025-04-02 22:22:37,032 [INFO] Feature engineering aplicado.


Unnamed: 0,temperatura,vibracion,presion,falla,hora,dia_semana
0,72.48,2.79,28.64,0,0,0
1,69.31,2.77,29.39,0,1,0
2,73.24,2.1,28.81,0,2,0
3,77.62,2.83,30.22,0,3,0
4,68.83,3.37,32.39,0,4,0


In [5]:
X_train_scaled, X_test_scaled, y_train, y_test = scale_and_split(df, target='falla')
logger.info("Escalado y división realizados.")


2025-04-02 22:22:44,939 [INFO] Escalado y división realizados.


In [6]:
os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)

X_train_scaled.to_csv(os.path.join(PROCESSED_DATA_PATH, "X_train_scaled.csv"), index=False)
X_test_scaled.to_csv(os.path.join(PROCESSED_DATA_PATH, "X_test_scaled.csv"), index=False)
y_train.to_csv(os.path.join(PROCESSED_DATA_PATH, "y_train.csv"), index=False)
y_test.to_csv(os.path.join(PROCESSED_DATA_PATH, "y_test.csv"), index=False)

logger.info("Conjuntos guardados en 'processed/'.")


2025-04-02 22:22:56,633 [INFO] Conjuntos guardados en 'processed/'.
