In [10]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix

In [11]:
window_size = 112
n_components = 2

In [12]:
# Cargar las variables independientes (X_train) sin cabecera desde archivo
X_train = np.loadtxt(f'../data/v6/transformed_train_data_comp_{n_components}.csv', delimiter=',')

# Cargar las variables dependientes (y_train) con cabecera desde archivo
y_train = pd.read_csv(f'../data/train_value_min_label_windows_{window_size}_llm.csv')

# Cargar el conjunto de test (X_test sin cabeceras y y_test con cabeceras)
X_test = np.loadtxt(f'../data/v6/transformed_test_data_comp_{n_components}.csv', delimiter=',')
y_test = pd.read_csv(f'../data/test_value_min_label_windows_{window_size}_llm.csv')

# Eliminar la columna "row" que es solo un índice
y_train = y_train.drop(columns=['row'])
y_test = y_test.drop(columns=['row'])


In [13]:
# Escalar los datos de entrenamiento y test
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [14]:
X_train.shape, X_test.shape

((23876, 2), (10682, 2))

In [15]:
# Bucle para realizar clasificación binaria para cada columna (clase)
for col in y_train.columns:
    y_train_bin = y_train[col]
    y_test_bin = y_test[col]
    
    # Verificar que haya más de una clase en el conjunto de entrenamiento
    if len(np.unique(y_test_bin)) > 1:
        # Entrenar modelo SVM
        svm_clf = SVC(kernel='linear', probability=True)
        svm_clf.fit(X_train_scaled, y_train_bin)
    
        # Predecir en el conjunto de test
        y_pred_bin = svm_clf.predict(X_test_scaled)
        y_pred_proba = svm_clf.predict_proba(X_test_scaled)[:, 1]  # Para calcular AUC
    
        # Calcular métricas
        accuracy = accuracy_score(y_test_bin, y_pred_bin)
        f1 = f1_score(y_test_bin, y_pred_bin)
        auc = roc_auc_score(y_test_bin, y_pred_proba)
    
        # Mostrar resultados
        print(f"Resultados para la clase {col}:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"AUC: {auc:.4f}")
    else:
        # En caso de tener solo una clase
        accuracy = accuracy_score(y_test_bin, np.full_like(y_test_bin, y_train_bin.iloc[0]))
        
        # Mostrar solo accuracy, el resto no se puede calcular
        print(f"Resultados para la clase {col}:")
        print(f"Accuracy (una sola clase): {accuracy:.4f}")
        print("F1 Score: No calculable (una sola clase)")
        print("AUC: No calculable (una sola clase)")

Resultados para la clase Sockets01:
Accuracy: 0.9857
F1 Score: 0.0000
AUC: 0.9850
Resultados para la clase Sockets02:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase Light01:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase CE appliance01:
Accuracy: 0.9387
F1 Score: 0.9404
AUC: 0.9449
Resultados para la clase Fridge01:
Accuracy: 0.7922
F1 Score: 0.0000
AUC: 0.2034
Resultados para la clase Waste disposal unit01:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase Dish washer01:
Accuracy: 0.9938
F1 Score: 0.0000
AUC: 0.5924
Resultados para la clase Electric furnace01:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase Light02:
Accuracy: 0.7813
F1 Sco

In [16]:
from xgboost import XGBClassifier

# Bucle para realizar clasificación binaria para cada columna (clase)
for col in y_train.columns:
    y_train_bin = y_train[col]
    y_test_bin = y_test[col]
    
    # Verificar que haya más de una clase en el conjunto de entrenamiento
    if len(np.unique(y_test_bin)) > 1:
        # Calcular la proporción de clases
        neg_count = np.sum(y_train_bin == 0)
        pos_count = np.sum(y_train_bin == 1)
        
        # Ajustar el peso para desbalanceo de clases
        scale_pos_weight = neg_count / pos_count if pos_count > 0 else 1
        
        # Entrenar modelo XGBoost
        xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss', scale_pos_weight=scale_pos_weight)
        xgb_clf.fit(X_train_scaled, y_train_bin)
    
        # Predecir en el conjunto de test
        y_pred_bin = xgb_clf.predict(X_test_scaled)
        y_pred_proba = xgb_clf.predict_proba(X_test_scaled)[:, 1]  # Para calcular AUC
    
        # Calcular métricas
        accuracy = accuracy_score(y_test_bin, y_pred_bin)
        f1 = f1_score(y_test_bin, y_pred_bin)
        auc = roc_auc_score(y_test_bin, y_pred_proba)
        report = classification_report(y_test_bin, y_pred_bin)
        cm = confusion_matrix(y_test_bin, y_pred_bin)
    
        # Mostrar resultados
        print(f"Resultados para la clase {col}:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"AUC: {auc:.4f}")
        print("Classification Report:\n", report)
        print(f"Confusion Matrix:\n{cm}")
    else:
        # En caso de tener solo una clase
        accuracy = accuracy_score(y_test_bin, np.full_like(y_test_bin, y_train_bin.iloc[0]))
        
        # Mostrar solo accuracy, el resto no se puede calcular
        print(f"Resultados para la clase {col}:")
        print(f"Accuracy (una sola clase): {accuracy:.4f}")
        print("F1 Score: No calculable (una sola clase)")
        print("AUC: No calculable (una sola clase)")

Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Sockets01:
Accuracy: 0.9859
F1 Score: 0.5648
AUC: 0.9761
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99     10529
           1       0.51      0.64      0.56       153

    accuracy                           0.99     10682
   macro avg       0.75      0.82      0.78     10682
weighted avg       0.99      0.99      0.99     10682

Confusion Matrix:
[[10433    96]
 [   55    98]]
Resultados para la clase Sockets02:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase Light01:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase CE appliance01:
Accuracy: 0.9378
F1 Score: 0.9395
AUC: 0.9444
Classification Report:
               precision    recall  f1-score   support

           0       0.88      1.00      0.94      4862
      

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Fridge01:
Accuracy: 0.7974
F1 Score: 0.6230
AUC: 0.8400
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.80      0.86      8462
           1       0.51      0.81      0.62      2220

    accuracy                           0.80     10682
   macro avg       0.72      0.80      0.74     10682
weighted avg       0.85      0.80      0.81     10682

Confusion Matrix:
[[6730 1732]
 [ 432 1788]]
Resultados para la clase Waste disposal unit01:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)
Resultados para la clase Dish washer01:
Accuracy: 0.9756
F1 Score: 0.0578
AUC: 0.8715
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.99     10616
           1       0.04      0.12      0.06        66

    accuracy                           0.98     10682
   macro avg       0.52      0.55   

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Light02:
Accuracy: 0.8134
F1 Score: 0.6144
AUC: 0.8461
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.85      0.88      8346
           1       0.56      0.68      0.61      2336

    accuracy                           0.81     10682
   macro avg       0.73      0.77      0.75     10682
weighted avg       0.83      0.81      0.82     10682

Confusion Matrix:
[[7101 1245]
 [ 748 1588]]
Resultados para la clase Sockets03:
Accuracy: 0.7576
F1 Score: 0.0464
AUC: 0.7660
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.76      0.86     10558
           1       0.02      0.51      0.05       124

    accuracy                           0.76     10682
   macro avg       0.51      0.63      0.45     10682
weighted avg       0.98      0.76      0.85     10682

Confusion Matrix:
[[8030 2528]
 [  61   63]]
Resultados para la clase Light03:
Accuracy (un

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Microwave01:
Accuracy: 0.9937
F1 Score: 0.1928
AUC: 0.9309
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     10653
           1       0.15      0.28      0.19        29

    accuracy                           0.99     10682
   macro avg       0.57      0.64      0.59     10682
weighted avg       1.00      0.99      0.99     10682

Confusion Matrix:
[[10607    46]
 [   21     8]]


Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Light04:
Accuracy: 0.8404
F1 Score: 0.6181
AUC: 0.8773
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.85      0.90      8942
           1       0.51      0.79      0.62      1740

    accuracy                           0.84     10682
   macro avg       0.73      0.82      0.76     10682
weighted avg       0.88      0.84      0.85     10682

Confusion Matrix:
[[7597 1345]
 [ 360 1380]]
Resultados para la clase Smoke alarm01:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)


Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Light05:
Accuracy: 0.8211
F1 Score: 0.5262
AUC: 0.8715
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.83      0.89      9244
           1       0.41      0.74      0.53      1438

    accuracy                           0.82     10682
   macro avg       0.68      0.79      0.71     10682
weighted avg       0.88      0.82      0.84     10682

Confusion Matrix:
[[7710 1534]
 [ 377 1061]]


Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Unknown01:
Accuracy: 0.9488
F1 Score: 0.0585
AUC: 0.6881
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.95      0.97     10641
           1       0.03      0.41      0.06        41

    accuracy                           0.95     10682
   macro avg       0.51      0.68      0.52     10682
weighted avg       0.99      0.95      0.97     10682

Confusion Matrix:
[[10118   523]
 [   24    17]]
Resultados para la clase Sockets04:
Accuracy (una sola clase): 1.0000
F1 Score: No calculable (una sola clase)
AUC: No calculable (una sola clase)


Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Sockets05:
Accuracy: 0.8737
F1 Score: 0.0160
AUC: 0.7396
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.88      0.93     10632
           1       0.01      0.22      0.02        50

    accuracy                           0.87     10682
   macro avg       0.50      0.55      0.47     10682
weighted avg       0.99      0.87      0.93     10682

Confusion Matrix:
[[9322 1310]
 [  39   11]]
Resultados para la clase Washer dryer01:
Accuracy: 0.9967
F1 Score: 0.7953
AUC: 0.9987
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     10603
           1       0.74      0.86      0.80        79

    accuracy                           1.00     10682
   macro avg       0.87      0.93      0.90     10682
weighted avg       1.00      1.00      1.00     10682

Confusion Matrix:
[[10579    24]
 [   11    68]]


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Resultados para la clase Washer dryer02:
Accuracy: 0.9773
F1 Score: 0.6411
AUC: 0.9847
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99     10417
           1       0.53      0.82      0.64       265

    accuracy                           0.98     10682
   macro avg       0.76      0.90      0.81     10682
weighted avg       0.98      0.98      0.98     10682

Confusion Matrix:
[[10222   195]
 [   48   217]]
