In [4]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    f1_score,
    multilabel_confusion_matrix,
    precision_score,
    recall_score,
)
from sklearn.preprocessing import StandardScaler
from torch import nn
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [None]:
# configs
DATA_DIR = "house_1_chunks"
TRAIN_FILE = os.path.join(DATA_DIR, "train.csv")
TEST_FILE = os.path.join(DATA_DIR, "test.csv")
MODEL_SAVE_PATH = "models/eco_multilabel_model.pth"

In [9]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: cuda


In [None]:
FEATURE_COLS = [
    "powerallphases",
    "powerl1",
    "powerl2",
    "powerl3",
    "currentneutral",
    "currentl1",
    "currentl2",
    "currentl3",
    "voltagel1",
    "voltagel2",
    "voltagel3",
    "phaseanglevoltagel2l1",
    "phaseanglevoltagel3l1",
    "phaseanglecurrentvoltagel1",
    "phaseanglecurrentvoltagel2",
    "phaseanglecurrentvoltagel3",
]

In [8]:
TARGET_COLS = [
    "Fridge",
    "Dryer",
    "Coffee machine",
    "Kettle",
    "Washing machine",
    "PC (including router)",
    "Freezer",
]

Dataset

In [15]:
class EcoDataset(Dataset):
    def __init__(
        self, csv_file, feature_cols, target_cols, scaler=None, fit_scaler=False
    ):
        self.df = pd.read_csv(csv_file)
        self.features = self.df[feature_cols].values.astype(np.float32)
        self.targets = self.df[target_cols].values.astype(np.float32)

        if scaler is None:
            self.scaler = StandardScaler()
            if fit_scaler:
                self.features = self.scaler.fit_transform(self.features)
            else:
                self.features = self.scaler.transform(self.features)
        else:
            self.scaler = scaler
            self.features = self.scaler.transform(self.features)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        x = torch.tensor(self.features[idx])
        y = torch.tensor(self.targets[idx])
        return x, y

model

In [14]:
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MultiLabelClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, output_dim),
        )

    def forward(self, x):
        return self.net(x)


metrics

In [12]:
def calculate_metrics(y_true, y_pred, threshold=0.5):
    y_pred_bin = (y_pred >= threshold).astype(int)

    f1 = f1_score(y_true, y_pred_bin, average="macro", zero_division=0)
    recall = recall_score(y_true, y_pred_bin, average="macro", zero_division=0)
    precision = precision_score(y_true, y_pred_bin, average="macro", zero_division=0)
    accuracy = accuracy_score(y_true, y_pred_bin)

    # mAP calculation: mean average precision over labels (use precision/recall curve)
    # Here we do a simplified version: average of precisions
    mAP = precision  # simplified proxy

    return f1, recall, precision, accuracy, mAP, y_pred_bin

In [16]:
def train_epoch(model, dataloader, criterion, optimizer): # train and test
    model.train()
    running_loss = 0
    all_targets = []
    all_preds = []
    for x, y in tqdm(dataloader, desc="Training", leave=False):
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        raw_outputs = model(x)
        outputs = torch.sigmoid(raw_outputs)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * x.size(0)
        all_targets.append(y.detach().cpu().numpy())
        all_preds.append(outputs.detach().cpu().numpy())

    epoch_loss = running_loss / len(dataloader.dataset)
    all_targets = np.vstack(all_targets)
    all_preds = np.vstack(all_preds)
    f1, recall, precision, accuracy, mAP, _ = calculate_metrics(all_targets, all_preds)

    return epoch_loss, f1, recall, precision, accuracy, mAP

In [17]:
def test_epoch(model, dataloader, criterion):
    model.eval()
    running_loss = 0
    all_targets = []
    all_preds = []
    with torch.no_grad():
        for x, y in tqdm(dataloader, desc="Testing", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            raw_outputs = model(x)
            outputs = torch.sigmoid(raw_outputs)

            loss = criterion(outputs, y)
            running_loss += loss.item() * x.size(0)

            all_targets.append(y.detach().cpu().numpy())
            all_preds.append(outputs.detach().cpu().numpy())

    epoch_loss = running_loss / len(dataloader.dataset)
    all_targets = np.vstack(all_targets)
    all_preds = np.vstack(all_preds)
    f1, recall, precision, accuracy, mAP, y_pred_bin = calculate_metrics(
        all_targets, all_preds
    )

    true_bin = all_targets.astype(int)
    pred_bin = y_pred_bin.astype(int)

    assert true_bin.shape == pred_bin.shape
    print("\n=== Classification Report (Test) ===")
    print(
        classification_report(
            true_bin, pred_bin, target_names=TARGET_COLS, zero_division=0
        )
    )

    return epoch_loss, f1, recall, precision, accuracy, mAP