## Imports

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

## Loading Data

In [8]:
background = np.load("background.npz")
background_wash = pd.DataFrame()
background_lou = pd.DataFrame()
def normalize(data):
    stds = np.std(data, axis=-1, keepdims=True)
    return data / stds
for key in background:
    background_wash = background[key][:, 0, :]
    background_wash = torch.tensor(normalize(background_wash), dtype=torch.float32)
    background_lou = background[key][:, 1, :]
    background_lou = torch.tensor(normalize(background_lou), dtype=torch.float32)

bbh_for_challenge = np.load("bbh_for_challenge.npy")
bbh_wash = bbh_for_challenge[:, 0, :]
bbh_wash = torch.tensor(normalize(bbh_wash), dtype=torch.float32)
bbh_lou = bbh_for_challenge[:, 1, :]
bbh_lou = torch.tensor(normalize(bbh_lou), dtype=torch.float32)

sglf_for_challenge = np.load("sglf_for_challenge.npy")
sglf_wash = sglf_for_challenge[:, 0, :]
sglf_wash = torch.tensor(normalize(sglf_wash), dtype=torch.float32)
sglf_lou = sglf_for_challenge[:, 1, :]
sglf_lou = torch.tensor(normalize(sglf_lou), dtype=torch.float32)

https://arxiv.org/pdf/2106.02770 

For my current lab project i'm utilizing this neural process model used in spatiotemporal ML/climate visualizations, which also has a (latent) encoder and a decoder. 

## AutoEncoder

In [9]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 4), 
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, input_dim),  
            nn.Sigmoid() 
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

## Energy-Based Model

In [10]:
class EnergyModel(nn.Module):
    def __init__(self, input_dim):
        super(EnergyModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        return self.network(x)

## Running the Models

In [None]:
train_wash, test_wash = random_split(background_wash, [80000, 20000])
train_lou, test_lou = random_split(background_wash, [80000, 20000])
batch_size = 128
train_loader_wash = DataLoader(train_wash, batch_size=batch_size, shuffle=True)
test_loader_wash = DataLoader(test_wash, batch_size=batch_size, shuffle=False)

train_loader_lou = DataLoader(train_lou, batch_size=batch_size, shuffle=True)
test_loader_lou = DataLoader(test_lou, batch_size=batch_size, shuffle=False)

def train_model(model, train_loader, criterion, optimizer, epochs=20):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in train_loader:
            inputs = batch[0]
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

input_dim = background_wash.shape[1]
autoencoder_wash = Autoencoder(input_dim)
criterion_ae = nn.MSELoss()
optimizer_ae_wash = optim.Adam(autoencoder_wash.parameters(), lr=0.001)

print("\nTraining Autoencoder for Wash:")
train_model(autoencoder_wash, train_loader_wash, criterion_ae, optimizer_ae_wash, epochs=20)

autoencoder_lou = Autoencoder(input_dim)
optimizer_ae_lou = optim.Adam(autoencoder_lou.parameters(), lr=0.001)

print("\nTraining Autoencoder for Lou:")
train_model(autoencoder_lou, train_loader_lou, criterion_ae, optimizer_ae_lou, epochs=20)

energy_model_wash = EnergyModel(input_dim)
criterion_energy = nn.MSELoss()
optimizer_energy_wash = optim.Adam(energy_model_wash.parameters(), lr=0.001)

print("\nTraining Energy-Based Model for Wash:")
train_model(energy_model_wash, train_loader_wash, criterion_energy, optimizer_energy_wash, epochs=20)

# 4. Energy-Based Model for lou
energy_model_lou = EnergyModel(input_dim)
optimizer_energy_lou = optim.Adam(energy_model_lou.parameters(), lr=0.001)

print("\nTraining Energy-Based Model for Lou:")
train_model(energy_model_lou, train_loader_lou, criterion_energy, optimizer_energy_lou, epochs=20)

def evaluate_model(model, test_loader):
    model.eval()
    results = []
    with torch.no_grad():
        # If test_loader is a tensor, convert it to a DataLoader
        if isinstance(test_loader, torch.Tensor):
            test_loader = DataLoader(test_loader, batch_size=128, shuffle=False)
        
        for batch in test_loader:
            # Ensure inputs have the right shape
            inputs = batch if isinstance(batch, torch.Tensor) else batch[0]
            if isinstance(model, Autoencoder):
                reconstructed = model(inputs)
                error = torch.mean((reconstructed - inputs) ** 2, dim=1)
                results.extend(error.numpy())
            else:
                energy = model(inputs).squeeze()
                results.extend(energy.numpy())
    return results

datasets = [
    ("Background Wash", background_wash, autoencoder_wash, energy_model_wash),
    ("Background Lou", background_lou, autoencoder_lou, energy_model_lou),
    ("BBH Wash", bbh_wash, autoencoder_wash, energy_model_wash),
    ("BBH Lou", bbh_lou, autoencoder_lou, energy_model_lou),
    ("SGLF Wash", sglf_wash, autoencoder_wash, energy_model_wash),
    ("SGLF Lou", sglf_lou, autoencoder_lou, energy_model_lou),
]

for name, data, autoencoder, energy_model in datasets:
    print(f"\nEvaluating {name} with Autoencoder:")
    reconstruction_errors = evaluate_model(autoencoder, data)
    print(f"Reconstruction Error - Mean: {np.mean(reconstruction_errors):.4f}, Std: {np.std(reconstruction_errors):.4f}")

    print(f"Evaluating {name} with Energy-Based Model:")
    energy_scores = evaluate_model(energy_model, data)
    print(f"Energy Scores - Mean: {np.mean(energy_scores):.4f}, Std: {np.std(energy_scores):.4f}")


Training Autoencoder for Wash:
Epoch 1/20, Loss: 1.2019
Epoch 2/20, Loss: 1.0701
Epoch 3/20, Loss: 1.0170
Epoch 4/20, Loss: 1.0021
Epoch 5/20, Loss: 0.9973
Epoch 6/20, Loss: 0.9952
Epoch 7/20, Loss: 0.9936
Epoch 8/20, Loss: 0.9932
Epoch 9/20, Loss: 0.9919
Epoch 10/20, Loss: 0.9912
Epoch 11/20, Loss: 0.9911
Epoch 12/20, Loss: 0.9909
Epoch 13/20, Loss: 0.9909
Epoch 14/20, Loss: 0.9907
Epoch 15/20, Loss: 0.9905
Epoch 16/20, Loss: 0.9897
Epoch 17/20, Loss: 0.9913
Epoch 18/20, Loss: 0.9899
Epoch 19/20, Loss: 0.9884
Epoch 20/20, Loss: 0.9890

Training Autoencoder for Lou:
Epoch 1/20, Loss: 1.2031
Epoch 2/20, Loss: 1.0800
Epoch 3/20, Loss: 1.0201
Epoch 4/20, Loss: 1.0028
Epoch 5/20, Loss: 0.9973
Epoch 6/20, Loss: 0.9946
Epoch 7/20, Loss: 0.9939
Epoch 8/20, Loss: 0.9932
Epoch 9/20, Loss: 0.9918
Epoch 10/20, Loss: 0.9915
Epoch 11/20, Loss: 0.9914
Epoch 12/20, Loss: 0.9903
Epoch 13/20, Loss: 0.9913
Epoch 14/20, Loss: 0.9911
Epoch 15/20, Loss: 0.9903
Epoch 16/20, Loss: 0.9885
Epoch 17/20, Loss: 