# Train a classic NN to use the point estimates as warmstart for the MCMC

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import sys
sys.path.append('../')
from experiments.fcn_bnns.utils.analysis_utils import *

In [None]:
exp_info = {
    "data": "airfoil.data",
    "replications": 1,
}
X_train, Y_train = load_data(exp_info, splittype='train', data_path="../data/")
X_val, Y_val = load_data(exp_info, splittype='val', data_path="../data/")
Y_train = Y_train.squeeze()
Y_val = Y_val.squeeze()


In [None]:
from tqdm import tqdm
# Define the fully connected regression model
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(in_features=5, out_features=16)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=16, out_features=16)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(in_features=16, out_features=2)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        # the one dimension is the output the other the log var of a gaussian
        return x

# Instantiate the model, loss function, and optimizer
model = RegressionModel()
criterion = nn.GaussianNLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert NumPy arrays to PyTorch tensors
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(Y_train))

# Training loop
num_epochs = 10000
with tqdm(total=num_epochs, desc="Training Progress") as pbar:
    for epoch in range(num_epochs):
        # Forward pass
        outputs = model(X_train)
        mean_pred = outputs[:, 0]
        std_pred = torch.exp(outputs[:, 1])
        loss = criterion(mean_pred, y_train, std_pred)
        # loss = torch.mean((y_train - mean_pred)**2)
        if torch.isnan(loss).any() or torch.isinf(loss).any() or loss.item() < -1e6:
            print("Loss exploded, breaking")
            break
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pbar.update(1)
        pbar.set_postfix_str("Loss: {:.4f}".format(loss.item()))
    

# Save the final weights with custom names as NumPy arrays
final_weights = {
    'W1': model.fc1.weight.data.numpy(),
    'b1': model.fc1.bias.data.numpy(),
    'W2': model.fc2.weight.data.numpy(),
    'b2': model.fc2.bias.data.numpy(),
    'W3': model.fc3.weight.data.numpy(),
    'b3': model.fc3.bias.data.numpy(),
}

# print shapes of the weights
for key in final_weights.keys():
    print(key, final_weights[key].shape)


In [None]:
# predict on the validation set
X_val = torch.from_numpy(np.array(X_val))
y_val = torch.from_numpy(np.array(Y_val))
outputs = model(X_val)
mean_pred = outputs[:, 0]
print(mean_pred[:10])
print(outputs[:10, 1])
print(y_val[:10])
rmse = torch.sqrt(torch.mean((y_val - mean_pred)**2))
print("RMSE: {:.4f}".format(rmse.item()))
# rmse of constant 0 predictor
rmse_0 = torch.sqrt(torch.mean(y_val**2))
print("RMSE_0: {:.4f}".format(rmse_0.item()))
# train a linear regression model
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(X_train, y_train)
print("RMSE_lin: {:.4f}".format(np.sqrt(np.mean((np.array(y_val) - reg.predict(X_val))**2))))

In [None]:
# save the final weights dict
# transpose all weight matrices
for key in final_weights.keys():
    if key.startswith('W'):
        final_weights[key] = final_weights[key].T
    print(key, final_weights[key].shape)
    # make dir results/de/airfoil.data|16-16|relu|1
exp_name = "airfoil.data|16-16|relu|1"
if not os.path.exists(f"../results/de/{exp_name}"):
    os.makedirs(f"../results/de/{exp_name}")
np.savez(f"../results/de/{exp_name}/{exp_name}|0.npz", **final_weights)


In [None]:
weight_dict = np.load("../results/de/airfoil_data|16-16|relu|1.npz")
# convert to dict
weight_dict = dict(weight_dict)
print(type(weight_dict))
weight_dict["W2"].shape
