In [1]:
import warnings
warnings.filterwarnings(action="ignore")
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

In [2]:
window_size = 112
n_components = 30

In [3]:
# Cargar las variables independientes (X_train) sin cabecera desde archivo
X_train = np.loadtxt(f'../data/v8.1/transformed_train_data_comp_{n_components}.csv', delimiter=',')

# Cargar las variables dependientes (y_train) con cabecera desde archivo
y_train = pd.read_csv(f'../data/train_value_min_label_windows_{window_size}_llm.csv')

# Cargar el conjunto de test (X_test sin cabeceras y y_test con cabeceras)
X_test = np.loadtxt(f'../data/v8.1/transformed_test_data_comp_{n_components}.csv', delimiter=',')
y_test = pd.read_csv(f'../data/test_value_min_label_windows_{window_size}_llm.csv')

# Eliminar la columna "row" que es solo un índice
y_train = y_train.drop(columns=['row'])
y_test = y_test.drop(columns=['row'])


In [4]:
# Escalar los datos de entrenamiento y test
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Function to generate the class for each appliance
def generate_class(row, appliance):
    appliance_value = row[appliance]
    other_values = row.drop(appliance).values  # Values for the other appliances

    if appliance_value == 0 and not any(other_values):
        return 'off'
    elif appliance_value == 1 and not any(other_values):
        return 'on'
    elif appliance_value == 0 and any(other_values):
        return 'off w int'
    elif appliance_value == 1 and any(other_values):
        return 'on w int'

In [6]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import (
    accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix
)
import pandas as pd
import numpy as np

# Initialize DataFrames to store results
metrics_df = pd.DataFrame(columns=[
    "Class", "Window_Size", "Accuracy", "F1_Macro", "F1_Weighted", "AUC_Macro", "AUC_Weighted"
])
all_reports_df = pd.DataFrame()
all_confusion_matrices_df = pd.DataFrame()

# Bucle para realizar clasificación binaria para cada columna (clase)
for col in y_train.columns:
    # Create the class labels for each appliance
    y_train_bin = y_train.apply(lambda row: generate_class(row, col), axis=1)
    y_test_bin = y_test.apply(lambda row: generate_class(row, col), axis=1)

    label_encoder = LabelEncoder()
    y_train_bin_encoded = label_encoder.fit_transform(y_train_bin)
    y_test_bin_encoded = label_encoder.transform(y_test_bin)

    # Get unique classes for the current appliance
    unique_classes = np.unique(y_train_bin_encoded)

    # Handle class imbalance by computing sample weights
    class_weights = compute_class_weight('balanced', classes=unique_classes, y=y_train_bin_encoded)
    class_weight_dict = {cls: weight for cls, weight in zip(unique_classes, class_weights)}
    sample_weight = np.array([class_weight_dict[cls] for cls in y_train_bin_encoded])

    # Train XGBoost with class weights
    xgb_clf = XGBClassifier(
        eval_metric='mlogloss',
        tree_method='hist',
        device='cuda'
    )

    xgb_clf.fit(X_train_scaled, y_train_bin_encoded, sample_weight=sample_weight)

    # Create mapping for target names based on the fitted label encoder
    class_mapping = {i: label for i, label in enumerate(label_encoder.classes_)}

    # Make predictions
    y_pred_bin = xgb_clf.predict(X_test_scaled)
    y_pred_proba = xgb_clf.predict_proba(X_test_scaled)

    # Verificar que haya más de una clase en el conjunto de entrenamiento
    if len(np.unique(y_test_bin_encoded)) > 1:
        # Calculate metrics
        accuracy = accuracy_score(y_test_bin_encoded, y_pred_bin)
        f1_macro = f1_score(y_test_bin_encoded, y_pred_bin, average="macro")
        f1_weighted = f1_score(y_test_bin_encoded, y_pred_bin, average="weighted")

        # Check if there are more than two classes to calculate AUC
        if len(np.unique(y_test_bin_encoded)) > 2:
            auc_macro = roc_auc_score(y_test_bin_encoded, y_pred_proba, multi_class="ovr", average="macro")
            auc_weighted = roc_auc_score(y_test_bin_encoded, y_pred_proba, multi_class="ovr", average="weighted")
        else:
            # If only two classes, calculate AUC differently
            auc_macro = roc_auc_score(y_test_bin_encoded, y_pred_proba[:, 1])  # Use probabilities of the positive class
            auc_weighted = auc_macro

        # Get unique classes in the predictions
        unique_pred_classes = np.unique(y_pred_bin)

        # Create the classification report using the unique classes found
        report = classification_report(
            y_test_bin_encoded,
            y_pred_bin,
            target_names=[class_mapping[label] for label in unique_pred_classes],
            labels=unique_pred_classes,
            output_dict=True
        )

        cm = confusion_matrix(y_test_bin_encoded, y_pred_bin)

        # Save results to DataFrame
        new_metrics_row = {
            "Class": col,
            "Window_Size": "Your_Window_Size",  # Replace with actual window size variable if available
            "Accuracy": accuracy,
            "F1_Macro": f1_macro,
            "F1_Weighted": f1_weighted,
            "AUC_Macro": auc_macro,
            "AUC_Weighted": auc_weighted
        }

        metrics_df = pd.concat([metrics_df, pd.DataFrame([new_metrics_row])], ignore_index=True)

        # Append classification report and confusion matrix to combined DataFrames
        report_df = pd.DataFrame(report).transpose()
        report_df.insert(0, "Class", col)
        all_reports_df = pd.concat([all_reports_df, report_df], ignore_index=True)

        cm_df = pd.DataFrame(cm)
        cm_df.insert(0, "Class", col)
        all_confusion_matrices_df = pd.concat([all_confusion_matrices_df, cm_df], ignore_index=True)


# Display or save the results
print(metrics_df)
metrics_df.to_csv(f"../data/metrics_results_{window_size}_{n_components}.csv", index=False)
all_reports_df.to_csv(f"../data/combined_classification_reports_{window_size}_{n_components}.csv", index=False)
all_confusion_matrices_df.to_csv(f"../data/combined_confusion_matrices_{window_size}_{n_components}.csv", index=False)

                    Class       Window_Size  Accuracy  F1_Macro  F1_Weighted  \
0               Sockets01  Your_Window_Size  0.932597  0.883265     0.932228   
1               Sockets02  Your_Window_Size  0.937933  0.937883     0.938041   
2                 Light01  Your_Window_Size  0.938027  0.937976     0.938135   
3          CE appliance01  Your_Window_Size  0.865194  0.817860     0.858521   
4                Fridge01  Your_Window_Size  0.828777  0.794739     0.823332   
5   Waste disposal unit01  Your_Window_Size  0.937933  0.937883     0.938041   
6           Dish washer01  Your_Window_Size  0.931942  0.642530     0.929274   
7      Electric furnace01  Your_Window_Size  0.935780  0.624664     0.937035   
8                 Light02  Your_Window_Size  0.820071  0.768782     0.808161   
9               Sockets03  Your_Window_Size  0.863883  0.621769     0.883789   
10                Light03  Your_Window_Size  0.937933  0.937883     0.938041   
11            Microwave01  Your_Window_S