In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset, random_split
import pandas as pd


data = pd.read_csv('data/EPC_Catalonia.csv')

  data = pd.read_csv('data/EPC_Catalonia.csv')


In [2]:
df = data.copy()
# Define target variable (Energy Consumption)
target = "Consum d'energia final"

# Extract date features
df["DATA_ENTRADA"] = pd.to_datetime(df["DATA_ENTRADA"], errors="coerce")
df["YEAR"] = df["DATA_ENTRADA"].dt.year
df["MONTH"] = df["DATA_ENTRADA"].dt.month

# Drop original date column
df.drop(columns=["DATA_ENTRADA"], inplace=True)

# Numerical features
numeric_features = [
    "METRES_CADASTRE", "ANY_CONSTRUCCIO", "Energia primària no renovable", "Emissions de CO2",
    "Energia calefacció", "Energia refrigeració", "Energia ACS", "Energia enllumenament",
    "Energia calefacció demanda", "Energia refrigeració demanda", "VALOR AILLAMENTS",
    "VALOR FINESTRES", "YEAR", "MONTH", "Cost anual aproximat d'energia per habitatge"
]

# Categorical features (to encode)
categorical_features = [
    "POBLACIO", "COMARCA", "NOM_PROVINCIA", "CODI_POBLACIO", "CODI_COMARCA", "CODI_PROVINCIA",
    "ZONA CLIMATICA", "US_EDIFICI", "VEHICLE ELECTRIC", "SOLAR TERMICA", "SOLAR FOTOVOLTAICA",
    "SISTEMA BIOMASSA", "XARXA DISTRICTE", "ENERGIA GEOTERMICA", "REHABILITACIO_ENERGETICA",
    "Qualificació de consum d'energia primaria no renovable", "Qualificacio d'emissions de CO2"
]

# Convert binary categorical features to numeric (Yes/No -> 1/0)
binary_features = ["VEHICLE ELECTRIC", "SOLAR TERMICA", "SOLAR FOTOVOLTAICA", 
                   "SISTEMA BIOMASSA", "XARXA DISTRICTE", "ENERGIA GEOTERMICA", 
                   "REHABILITACIO_ENERGETICA"]

for col in binary_features:
    df[col] = df[col].str.lower().map({'si': 1, 'no': 0}) 

  df["DATA_ENTRADA"] = pd.to_datetime(df["DATA_ENTRADA"], errors="coerce")


In [6]:
# Encode categorical features
label_encoders = {}
for cat in categorical_features:
    le = LabelEncoder()
    df[cat] = le.fit_transform(df[cat])
    label_encoders[cat] = le  # Save for later
df = df[numeric_features + categorical_features + [target]].dropna()
# Normalize numerical features
scaler = StandardScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

X = df.drop(columns=[target]).values
y = df[target].values.reshape(-1,1)

In [25]:
# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# Create dataset
dataset = TensorDataset(X_tensor, y_tensor)

# Split into train and test (80-20)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [40]:
class EnergyNN(nn.Module):
    def __init__(self, input_dim):
        super(EnergyNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.output(x)
        return x


# Initialize model
input_dim = X.shape[1]
model = EnergyNN(input_dim)

# Define loss & optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# -------------- TRAINING LOOP --------------
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    if epoch % 10 == 0:
        print(f"Epoch [{epoch}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# -------------- EVALUATION --------------
model.eval()
predictions = []
actuals = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        preds = model(X_batch)
        predictions.append(preds.numpy())
        actuals.append(y_batch.numpy())


Epoch [0/200], Loss: 13549.2870
Epoch [10/200], Loss: 11172.6763
Epoch [20/200], Loss: 13958.7698
Epoch [30/200], Loss: 9758.9553
Epoch [40/200], Loss: 10449.9565
Epoch [50/200], Loss: 8091.6381
Epoch [60/200], Loss: 8764.5358
Epoch [70/200], Loss: 5834.7030
Epoch [80/200], Loss: 5567.7651
Epoch [90/200], Loss: 6224.0936
Epoch [100/200], Loss: 5620.0012
Epoch [110/200], Loss: 4551.8710
Epoch [120/200], Loss: 6315.9488
Epoch [130/200], Loss: 5243.3092
Epoch [140/200], Loss: 7805.0443
Epoch [150/200], Loss: 4805.5783
Epoch [160/200], Loss: 6461.4075
Epoch [170/200], Loss: 13927.9698
Epoch [180/200], Loss: 4758.1559
Epoch [190/200], Loss: 4624.1373


In [41]:
import numpy as np
predictions = np.vstack(predictions)
actuals = np.vstack(actuals)

# Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print(f"\nNeural Network Metrics:")
print(f"MAE: {mean_absolute_error(actuals, predictions):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(actuals, predictions)):.2f}")
print(f"R² Score: {r2_score(actuals, predictions):.2f}")


Neural Network Metrics:
MAE: 36.65
RMSE: 48.18
R² Score: 0.65
