In [17]:
import os

import pandas as pd
from dotenv import load_dotenv



### 1. Definición de variables a utilizar

In [18]:
load_dotenv()
AZURE_CONNECTION_STRING=os.getenv('AZURE_CONNECTION_STRING')
AZURE_CONTAINER_NAME=os.getenv('AZURE_CONTAINER_NAME')

In [None]:
df = pd.read_excel("dataset/data_raw.xlsx")
df.to_csv("dataset/data_raw.csv", index=False)
df = pd.read_csv("dataset/data_raw.csv")

In [20]:
df.head(5)

Unnamed: 0,sensor_name,event_date_reg,avg_ax,avg_ay,avg_az,avg_gx,avg_gy,avg_gz,min_ax,min_ay,...,max_gx,max_gy,max_gz,std_ax,std_ay,std_az,std_gx,std_gy,std_gz,value_predict
0,sensor_01,2025-10-15 22:11:55,-0.046,0.016,1.003,-2.25,0.148,-1.007,-0.052,0.009,...,-1.908,0.336,-0.573,0.003,0.003,0.005,0.111,0.102,0.142,5
1,sensor_01,2025-10-15 22:11:53,-0.047,0.016,1.002,-2.234,0.127,-1.026,-0.056,0.007,...,-1.885,0.74,-0.626,0.003,0.003,0.008,0.106,0.161,0.138,5
2,sensor_01,2025-10-15 22:11:50,-0.047,0.016,1.001,-2.246,0.156,-1.005,-0.055,0.008,...,-2.038,0.458,-0.718,0.004,0.003,0.005,0.087,0.111,0.146,5
3,sensor_01,2025-10-15 22:11:43,-0.039,0.006,1.002,-2.238,0.145,-1.013,-0.047,-0.002,...,-2.023,0.397,-0.763,0.003,0.003,0.004,0.108,0.114,0.122,5
4,sensor_01,2025-10-15 22:11:41,-0.039,0.005,1.002,-2.261,0.127,-1.011,-0.045,-0.009,...,-1.29,1.458,-0.687,0.003,0.004,0.008,0.284,0.208,0.133,5


### 2. Transformación de datos: Extracción de características.

In [21]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# X = todas las columnas menos label
X = df.drop(columns=["value_predict", "sensor_name", "event_date_reg"])
y = df["value_predict"]

# separar en train/test (ej. 80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


### 2.1: Entrenamiento del modelo

In [22]:
clf = RandomForestClassifier(
    n_estimators=200, 
    max_depth=None, 
    random_state=42,
    n_jobs=-1
)

clf.fit(X_train, y_train)


0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [23]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00        10
           3       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         2

    accuracy                           1.00        17
   macro avg       1.00      1.00      1.00        17
weighted avg       1.00      1.00      1.00        17

[[ 3  0  0  0]
 [ 0 10  0  0]
 [ 0  0  2  0]
 [ 0  0  0  2]]


In [24]:
importances = pd.Series(clf.feature_importances_, index=X.columns)
importances.sort_values(ascending=False).head(15)


avg_ay    0.093759
min_az    0.081267
max_ay    0.080196
max_gx    0.072593
std_ax    0.071789
std_gx    0.059737
min_ay    0.057193
min_ax    0.056661
std_gz    0.053520
min_gz    0.052861
max_gz    0.051410
avg_az    0.042029
std_az    0.041091
max_ax    0.038514
min_gx    0.033567
dtype: float64

In [None]:
import joblib

# Entrenas tu modelo
clf = RandomForestClassifier(
    n_estimators=200, 
    max_depth=None, 
    random_state=42,
    n_jobs=-1
)

clf.fit(X_train, y_train)

joblib.dump(clf, "models/modelo_rf.pkl")

['modelo_rf.pkl']

### 3. Implementación: Despliegue del modelo

In [26]:
from azure.storage.blob import BlobServiceClient
import joblib
import io

# Conexión a Azure Storage
connection_string = AZURE_CONNECTION_STRING
container_name = AZURE_CONTAINER_NAME
blob_name = "modelo_rf.pkl"

# Crear cliente del servicio
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)

# Guardar el modelo en memoria
model_bytes = io.BytesIO()
joblib.dump(clf, model_bytes)
model_bytes.seek(0)

# Subir directamente a Azure Blob
blob_client.upload_blob(model_bytes, overwrite=True)

print(f"Modelo subido a: {container_name}/{blob_name}")


Modelo subido a: unfv/modelo_rf.pkl
