In [1]:
import warnings
warnings.filterwarnings(action="ignore")

import os
import json
import pandas as pd
import numpy as np
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from PIL import Image
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import cv2
from concurrent.futures import ThreadPoolExecutor

In [2]:

# Verificar que TensorFlow puede detectar la GPU
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


Num GPUs Available:  0


In [3]:
# Cargar el DataFrame
df_train = pd.read_csv('../data/train_value_min_label_windows_112.csv')
df_test = pd.read_csv('../data/test_value_min_label_windows_112.csv')
#labels = [c for c in df_train.columns.values if not c in ['row']]
appliances = ['Fridge01','Dish washer01','Microwave01','Washer dryer01','Washer dryer02']

df_train['valid'] = 0
for label in appliances:
    df_train.loc[df_train[label] == 1, 'valid'] = 1

df_filtered_train = df_train[df_train['valid'] == 1]

In [4]:
df_filtered_train = df_filtered_train.sample(frac=0.5, random_state=42)
df_filtered_test = df_filtered_train.sample(frac=0.1, random_state=42)

In [5]:
# Modelo preentrenado (usar VGG16 o ResNet50)
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

In [6]:
# Función para cargar y procesar imágenes
def load_and_process_image(image_folder, img_name):
    # Paths to images
    img_r_path = os.path.join(image_folder, f"{img_name}_rp.png")
    img_g_path = os.path.join(image_folder, f"{img_name}_gasf.png")
    img_b_path = os.path.join(image_folder, f"{img_name}_gadf.png")
    
    # Load and resize images using OpenCV for speed
    img_r = cv2.imread(img_r_path, cv2.IMREAD_GRAYSCALE)
    img_g = cv2.imread(img_g_path, cv2.IMREAD_GRAYSCALE)
    img_b = cv2.imread(img_b_path, cv2.IMREAD_GRAYSCALE)
    
    # Resize images to 224x224
    img_r = cv2.resize(img_r, (224, 224))
    img_g = cv2.resize(img_g, (224, 224))
    img_b = cv2.resize(img_b, (224, 224))
    
    # Merge channels
    img_rgb = cv2.merge([img_r, img_g, img_b])
    
    # Convert to array and expand dimensions
    img_array = img_to_array(img_rgb)

    # Reduce memory usage by converting to uint8
    img_array = img_array.astype(np.float16)
    
    img_array = np.expand_dims(img_array, axis=0)
    
    # Preprocess for VGG16
    img_array = vgg_preprocess(img_array)
    
    return img_array


In [7]:
# Batch processing with threading to load and process multiple images in parallel
def load_and_process_images_in_parallel(image_folder, image_names, max_workers=1):
    processed_images = []
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(load_and_process_image, image_folder, img_name) for img_name in image_names]
        for future in futures:
            processed_images.append(future.result())
    
    return np.vstack(processed_images)  # Combine all processed images into a single array

In [8]:
# Extraer características
features = []
#img_array = []
for image_name in df_filtered_train['row']:
    img_array = load_and_process_image('../data/v5/window_size_112/train/all',image_name)
    feature = model.predict(img_array)
    features.append(feature.flatten())
# Convertir a array numpy
X = np.array(features)
#img_array_train = np.array(img_array)
#X = load_and_process_images_in_parallel('../data/v5/window_size_112/train/all', df_filtered_train['row'])

In [9]:
# Extraer características
features = []
for image_name in df_filtered_test['row']:
    img_array = load_and_process_image('../data/v5/window_size_112/test/all',image_name)
    feature = model.predict(img_array)
    features.append(feature.flatten())
# Convertir a array numpy
X_test = np.array(features)
#X_test = load_and_process_images_in_parallel('../data/v5/window_size_112/test/all', df_filtered_test['row'])

In [32]:
# Separar las columnas de etiquetas
labels_train = df_filtered_train.drop(columns=['row'])
labels_test = df_filtered_test.drop(columns=['row'])
# Diccionario para almacenar los resultados
results = {}

In [33]:
# Archivo para guardar los resultados
results_file = '../data/v5/model_results_window_size_112_vgg.json'

In [34]:
# Cargar resultados existentes
if os.path.exists(results_file):
    with open(results_file, 'r') as f:
        results = json.load(f)

In [35]:
# Función para guardar los resultados
def save_results(results):
    with open(results_file, 'w') as f:
        json.dump(results, f, indent=4)

In [36]:
X.shape, X_test.shape

((19581, 25088), (1958, 25088))

In [37]:
from sklearn.utils import resample

# Function to handle errors in AUC
def safe_roc_auc_score(y_true, y_pred):
    try:
        return roc_auc_score(y_true, y_pred)
    except ValueError as e:
        return np.nan

# Entrenar y evaluar modelos para cada columna
X_test = X_test
for a in appliances:
    if a in results:
        print(f"Results for appliance {a} already exist. Skipping training.")
        continue
    else:
        print(f"Training process for appliance {a}.")

    y = labels_train[a]
    y_test = labels_test[a]    
    
    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    
    # Initialize MLPClassifier
    mlp = MLPClassifier(hidden_layer_sizes=(16, 8),  # Configuración con dos capas ocultas
                        max_iter=1,  # Start with 1 iteration and increase iteratively
                        learning_rate_init=0.001,
                        solver='adam',
                        alpha=0.01,
                        random_state=42,
                        warm_start=True)  # To continue training in subsequent iterations
    
    # Early stopping parameters
    patience = 20
    best_val_score = -np.inf
    epochs_no_improve = 0
    max_epochs = 100  # Total number of epochs to train
    
    for epoch in range(max_epochs):
        # Train the model
        mlp.fit(X_train_scaled, y_train)
        
        # Evaluate on validation set
        val_predictions = mlp.predict(X_val_scaled)
        val_accuracy = accuracy_score(y_val, val_predictions)
        val_auc = safe_roc_auc_score(y_val, val_predictions)
        
        print(f"Epoch {epoch+1}/{max_epochs} - Validation Accuracy: {val_accuracy:.4f} - Validation AUC: {val_auc:.4f}")
        
        # Check for improvement
        if val_auc > best_val_score:
            best_val_score = val_auc
            epochs_no_improve = 0
            # Save the best model weights
            best_model = mlp
        else:
            epochs_no_improve += 1
        
        # Check early stopping condition
        if epochs_no_improve >= patience:
            print("Early stopping triggered.")
            break
    
    # Use the best model for prediction
    y_pred_mlp = best_model.predict(X_test_scaled)
    
    # Evaluación
    metrics = {
        'MLP': {
            'accuracy': accuracy_score(y_test, y_pred_mlp),
            'auc': safe_roc_auc_score(y_test, y_pred_mlp),
            'f1_score': f1_score(y_test, y_pred_mlp),
            'confusion_matrix': confusion_matrix(y_test, y_pred_mlp).tolist()  # Convertir a lista para guardar en JSON
        }
    }

    # Store results
    results[a] = metrics

    # Save results after each column
    save_results(results)  # Assuming this function is defined elsewhere

Training process for appliance Fridge01.
Epoch 1/100 - Validation Accuracy: 0.9139 - Validation AUC: 0.5291
Epoch 2/100 - Validation Accuracy: 0.9197 - Validation AUC: 0.6032
Epoch 3/100 - Validation Accuracy: 0.9142 - Validation AUC: 0.6203
Epoch 4/100 - Validation Accuracy: 0.9098 - Validation AUC: 0.6179
Epoch 5/100 - Validation Accuracy: 0.9079 - Validation AUC: 0.6019
Epoch 6/100 - Validation Accuracy: 0.9180 - Validation AUC: 0.5973
Epoch 7/100 - Validation Accuracy: 0.9265 - Validation AUC: 0.6480
Epoch 8/100 - Validation Accuracy: 0.9304 - Validation AUC: 0.6301
Epoch 9/100 - Validation Accuracy: 0.9319 - Validation AUC: 0.6372
Epoch 10/100 - Validation Accuracy: 0.9253 - Validation AUC: 0.6424
Epoch 11/100 - Validation Accuracy: 0.9256 - Validation AUC: 0.6563
Epoch 12/100 - Validation Accuracy: 0.9140 - Validation AUC: 0.6575
Epoch 13/100 - Validation Accuracy: 0.9180 - Validation AUC: 0.6509
Epoch 14/100 - Validation Accuracy: 0.9231 - Validation AUC: 0.6374
Epoch 15/100 - V

In [38]:
# Mostrar resultados
for column, metrics in results.items():
    print(f"Column: {column}")
    for model_name, scores in metrics.items():
        print(f"  Model: {model_name}")
        print(f"    Accuracy: {scores['accuracy']:.4f}")
        print(f"    AUC: {scores['auc']:.4f}")
        print(f"    F1 Score: {scores['f1_score']:.4f}")
        print(f"    Confusion Matrix:\n{scores['confusion_matrix']}")

Column: Fridge01
  Model: MLP
    Accuracy: 0.9142
    AUC: 0.5047
    F1 Score: 0.9551
    Confusion Matrix:
[[4, 131], [37, 1786]]
Column: Dish washer01
  Model: MLP
    Accuracy: 0.9673
    AUC: 0.5065
    F1 Score: 0.0303
    Confusion Matrix:
[[1893, 10], [54, 1]]
Column: Microwave01
  Model: MLP
    Accuracy: 0.8611
    AUC: 0.5124
    F1 Score: 0.0355
    Confusion Matrix:
[[1681, 244], [28, 5]]
Column: Washer dryer01
  Model: MLP
    Accuracy: 0.7799
    AUC: 0.5159
    F1 Score: 0.1113
    Confusion Matrix:
[[1500, 331], [100, 27]]
Column: Washer dryer02
  Model: MLP
    Accuracy: 0.8386
    AUC: 0.5478
    F1 Score: 0.1856
    Confusion Matrix:
[[1606, 157], [159, 36]]
