In [1]:
import warnings
warnings.filterwarnings(action="ignore")

import os
import pandas as pd
import numpy as np
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from PIL import Image
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [2]:
# Cargar el DataFrame
df_train = pd.read_csv('../data/train_value_mmc_label_windows_1792.csv')
df_test = pd.read_csv('../data/test_value_mmc_label_windows_1792.csv')

In [3]:
# Modelo preentrenado (usar VGG16 o ResNet50)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

In [4]:
# Función para cargar y procesar imágenes
def load_and_process_image(image_folder, img_name):
    img_r_path = os.path.join(image_folder, f"{img_name}_mains.png")
    img_g_path = os.path.join(image_folder, f"{img_name}_amplitude_spectrum.png")
    img_b_path = os.path.join(image_folder, f"{img_name}_phase_spectrum.png")
    
    img_r = Image.open(img_r_path).convert('L')
    img_g = Image.open(img_g_path).convert('L')
    img_b = Image.open(img_b_path).convert('L')
    
    img_r = img_r.resize((224, 224))
    img_g = img_g.resize((224, 224))
    img_b = img_b.resize((224, 224))
    
    img_rgb = Image.merge("RGB", (img_r, img_g, img_b))
    img_array = img_to_array(img_rgb)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = vgg_preprocess(img_array)  # Normalizar las imágenes en el rango esperado por VGG16
    return img_array


In [5]:
# Extraer características
features = []
for img_name in df_train['row']:
    img_array = load_and_process_image('../data/v2/window_size_1792/train/all',img_name)
    feature = model.predict(img_array)
    features.append(feature.flatten())

In [6]:
# Convertir a array numpy
features_train = np.array(features)

In [7]:
# Extraer características
features = []
for img_name in df_test['row']:
    img_array = load_and_process_image('../data/v2/window_size_1792/test/all',img_name)
    feature = model.predict(img_array)
    features.append(feature.flatten())

In [8]:
# Convertir a array numpy
features_test = np.array(features)

In [9]:
# Separar las columnas de etiquetas
labels_train = df_train.drop(columns=['row'])
labels_test = df_test.drop(columns=['row'])
# Diccionario para almacenar los resultados
appliances = labels_train.columns
results = {}

In [10]:
# Entrenar y evaluar modelos para cada columna
for a in tqdm(appliances):
    y_train = labels_train[a]
    y_test = labels_test[a]
    X_train = features_train
    X_test = features_test
    
    # MLP
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    mlp = MLPClassifier(hidden_layer_sizes=(128, 64),  # Configuración con dos capas ocultas
                        max_iter=500,
                        learning_rate_init=0.001,
                        solver='adam',
                        alpha=0.01,
                        random_state=42)
    mlp.fit(X_train_scaled, y_train)
    y_pred_mlp = mlp.predict(X_test_scaled)
    
    # XGBoost
    xgb = XGBClassifier(random_state=42)
    xgb.fit(X_train, y_train)
    y_pred_xgb = xgb.predict(X_test)

    # Evaluación
    metrics = {}
    
    # Función para manejar errores en AUC
    def safe_roc_auc_score(y_true, y_pred):
        try:
            return roc_auc_score(y_true, y_pred)
        except ValueError as e:
            #print(f"ROC AUC score error: {e}")
            return np.nan  # Retorna NaN si ocurre un error
        
    metrics['MLP'] = {
        'accuracy': accuracy_score(y_test, y_pred_mlp),
        'auc': safe_roc_auc_score(y_test, y_pred_mlp),
        'f1_score': f1_score(y_test, y_pred_mlp),
        'confusion_matrix': confusion_matrix(y_test, y_pred_mlp)
    }
    
    metrics['XGBoost'] = {
        'accuracy': accuracy_score(y_test, y_pred_xgb),
        'auc': safe_roc_auc_score(y_test, y_pred_xgb),
        'f1_score': f1_score(y_test, y_pred_xgb),
        'confusion_matrix': confusion_matrix(y_test, y_pred_xgb)
    }
    
    results[a] = metrics

100%|██████████| 20/20 [12:46<00:00, 38.32s/it]


In [11]:
# Mostrar resultados
for column, metrics in results.items():
    print(f"Column: {column}")
    for model_name, scores in metrics.items():
        print(f"  Model: {model_name}")
        print(f"    Accuracy: {scores['accuracy']:.4f}")
        print(f"    AUC: {scores['auc']:.4f}")
        print(f"    F1 Score: {scores['f1_score']:.4f}")
        print(f"    Confusion Matrix:\n{scores['confusion_matrix']}")

Column: Sockets01
  Model: MLP
    Accuracy: 0.9610
    AUC: 0.5241
    F1 Score: 0.0714
    Confusion Matrix:
[[640  12]
 [ 14   1]]
  Model: XGBoost
    Accuracy: 0.9775
    AUC: 0.5000
    F1 Score: 0.0000
    Confusion Matrix:
[[652   0]
 [ 15   0]]
Column: Sockets02
  Model: MLP
    Accuracy: 1.0000
    AUC: nan
    F1 Score: 0.0000
    Confusion Matrix:
[[667]]
  Model: XGBoost
    Accuracy: 1.0000
    AUC: nan
    F1 Score: 0.0000
    Confusion Matrix:
[[667]]
Column: Light01
  Model: MLP
    Accuracy: 0.9865
    AUC: nan
    F1 Score: 0.0000
    Confusion Matrix:
[[658   9]
 [  0   0]]
  Model: XGBoost
    Accuracy: 1.0000
    AUC: nan
    F1 Score: 0.0000
    Confusion Matrix:
[[667]]
Column: CE appliance01
  Model: MLP
    Accuracy: 0.8996
    AUC: 0.9050
    F1 Score: 0.9039
    Confusion Matrix:
[[285  13]
 [ 54 315]]
  Model: XGBoost
    Accuracy: 0.8951
    AUC: 0.9029
    F1 Score: 0.8974
    Confusion Matrix:
[[291   7]
 [ 63 306]]
Column: Fridge01
  Model: MLP
    Accu