This is done as a notebook which works best with DataBricks. Otherwise, it would be split in two python files: ``n_model.py`` with the model class definition, and ``train_nn_model``.py where the data is loaded and the model is trained.

# nn_model.py

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import mlflow.pyfunc
import numpy as np
import pandas as pd


# Neural network class
class Net(nn.Module):
    def __init__(self, 
                 hidden_sizes=(32, 32), 
                 dropout=0.2,
                 country_idx=0,
                 year_idx=1,
                 n_countries=None, 
                 embed_dim=4):
        super(Net, self).__init__()

        # Columns where year and country are
        self.year_idx, self.country_idx = year_idx, country_idx

        # Embeddings for the countries
        self.country_embed = nn.Embedding(n_countries, embed_dim)

        # Layer setup
        hidden_sizes = [embed_dim+1] + list(hidden_sizes)
        layers = []
        for i in range(len(hidden_sizes)-1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            layers.append(nn.ReLU())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
        layers.append(nn.Linear(hidden_sizes[-1], 1))
        self.fc_layers = nn.Sequential(*layers)
    
    def forward(self, x):
        """
            Predicts the TOE_HAB/MTOE given the country and year.
            x: [country (encoded int), year (standarized float32)]
            output: predicted standarized TOE_HAB/MTOE
        """
        x = torch.cat([x[:,self.year_idx].reshape(-1,1), self.country_embed(x[:,self.country_idx].long())], dim=1)
        return self.fc_layers(x)


# Neural network model class
class NNModel(mlflow.pyfunc.PythonModel):
    def __init__(self, 
                 hidden_sizes=(32, 32),
                 lr=0.01,
                 dropout=0.2,
                 n_countries=37,
                 embed_dim=4):
        
        # Custom neural network
        self.net = Net(hidden_sizes=hidden_sizes,
                         dropout=dropout,
                         n_countries=n_countries,
                         embed_dim=embed_dim)
        
        # Scalers for standarizing the input and output values
        self.year_scaler, self.output_scaler = StandardScaler(), StandardScaler()
        self.country_idx, self.year_idx = 0, 1

        # Optimizer and loss function for training
        self.optimizer = optim.Adam(self.net.parameters(), lr=lr)
        self.mse = nn.MSELoss()

    def _prepare_training_data(self, X, y):
        """ Prepares the training data for the neural network.
            X: pd.DataFrame[country (int), year (int)].
            y: pd.DataFrame[TOE_HAB/MTOE (float)].
            output: np.array(X), np.array(y)
        """
        # Prepare data
        assert type(X) is pd.DataFrame and type(y) is pd.DataFrame, "Train data must be DataFrames"
        assert "country_encoded" in X.columns,  "X DataFrame must contain country_encoded column"
        assert "year" in X.columns, "X DataFrame must contain year column"
        self.year_scaler.fit(X["year"].values.reshape(-1,1))
        self.output_scaler.fit(y.values)
        X["year"] = self.year_scaler.transform(X["year"].values.reshape(-1,1))
        X = torch.tensor(X.values, dtype=torch.float32)
        y = torch.tensor(self.output_scaler.transform(y.values), dtype=torch.float32)
        return X, y

    def fit(self, X, y, epochs=100, test_frac=0, mlflow_run=False, verbose=False):
        """ 
            Trains the neural network.
            X: pd.DataFrame[country (int), year (int)].
            y: pd.DataFrame[TOE_HAB/MTOE (float)].
        """
        X, y = self._prepare_training_data(X.copy(), y.copy())
        
        if test_frac > 0:
            n_test = int(test_frac*len(X))
            X_train, X_test = X[:-n_test], X[-n_test:]
            y_train, y_test = y[:-n_test], y[-n_test:]
        else:
            X_train, y_train =  X, y

        # Train loop
        for epoch in range(epochs):
            self.optimizer.zero_grad()
            y_hat = self.net(X_train)
            loss = self.mse(y_hat, y_train)
            loss.backward()
            self.optimizer.step()

            train_loss = loss.item()

            # If there is test data, compute test MSE
            if test_frac > 0:
                y_hat = self.net(X_test)
                val_loss = self.mse(y_hat, y_test).item()
                if mlflow_run:
                    mlflow.log_metrics({"train_mse": train_loss, "test_mse": val_loss}, step=epoch+1)
                if (epoch+1) % 10 == 0 and verbose:
                    print(f"Epoch {epoch+1}, Train loss: {train_loss:.4f}, Test loss: {val_loss:.4f}")
            else:
                if mlflow_run:
                    mlflow.log_metric("train_mse", train_loss, step=epoch+1)
                if (epoch+1) % 10 == 0 and verbose:
                    print(f"Epoch {epoch+1}, Train loss: {train_loss:.4f}")

        self.net.eval()

# train_nn_model.py

In [0]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import StandardScaler, LabelEncoder
import mlflow.pyfunc
import mlflow
import torch
import torch.nn as nn
import torch.optim as optim
import joblib
import random

# Seed the libraries for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Load table
print("Loading table")
df = spark.read.table("energy_clean")
df = df.toPandas()

# Feature engineering
print("\nApplying feature engineering")
country_encoder = LabelEncoder()
df.insert(1, "country_encoded", country_encoder.fit_transform(df["country"]), allow_duplicates=True)

# Prepare training data
print("\nPreparing training data")
df = sklearn.utils.shuffle(df)
target_col = ["TOE_HAB"]
feature_cols = ["country_encoded", "year"]
X_train = df[feature_cols].astype("float32")
y_train = df[target_col].astype("float32")
n_countries = len(df["country"].unique().tolist())
print(f"Data shapes: X_train: {X_train.shape}, y_train: {y_train.shape}")

with mlflow.start_run(run_name="train_nn"):
    print("\nTraining neural network")
    
    # Initialize model
    params = {
        "hidden_sizes": (32, 32), 
        "lr": 0.05, 
        "dropout": 0.0, 
        "fit_epochs": 100,
        "n_countries": n_countries,
        "embed_dim": 4
    }
    model = NNModel(hidden_sizes=params["hidden_sizes"],
                lr=params["lr"],
                dropout=params["dropout"], 
                n_countries=n_countries,
                embed_dim=params["embed_dim"])
    
    # Train model
    model.fit(X_train, y_train, epochs=params["fit_epochs"], test_frac=0.05, mlflow_run=True, verbose=True)

    # Log with MLFlow and save model
    print("\nSaving neural network model")
    mlflow.log_params(params)
    mlflow.pyfunc.log_model("neural_network", 
                            python_model=model, 
                            input_example=X_train.iloc[0:1],
                            )
    joblib.dump(country_encoder, "country_encoder.pkl")
    joblib.dump(model.year_scaler, "year_scaler.pkl")
    joblib.dump(model.output_scaler, "output_scaler.pkl")
    torch.save(model.net.state_dict(), "neural_network.pt")
    joblib.dump(params, "parameters.pkl")
    mlflow.pytorch.log_model(model.net,
                             artifact_path="neural_network", 
                             input_example=np.array([[21.0, 2005.0]]).astype("float32"))
    mlflow.log_artifact("country_encoder.pkl")
    mlflow.log_artifact("year_scaler.pkl")
    mlflow.log_artifact("output_scaler.pkl")
    mlflow.log_artifact("neural_network.pt")
    mlflow.log_artifact("parameters.pkl")

Loading table

Applying feature engineering

Preparing training data
Data shapes: X_train: (813, 2), y_train: (813, 1)

Training neural network
Epoch 10, Train loss: 0.5638, Test loss: 0.3893
Epoch 20, Train loss: 0.0880, Test loss: 0.0593
Epoch 30, Train loss: 0.0272, Test loss: 0.0223
Epoch 40, Train loss: 0.0112, Test loss: 0.0131
Epoch 50, Train loss: 0.0071, Test loss: 0.0092
Epoch 60, Train loss: 0.0055, Test loss: 0.0069
Epoch 70, Train loss: 0.0045, Test loss: 0.0061
Epoch 80, Train loss: 0.0040, Test loss: 0.0059
Epoch 90, Train loss: 0.0037, Test loss: 0.0051


2025/09/30 18:30:46 INFO mlflow.pyfunc: Inferring model signature from input example


Epoch 100, Train loss: 0.0034, Test loss: 0.0054

Saving neural network model
