In [None]:
### train autoendcoder. 2 series, 2D, 3series, 3D autoencoder etc. 
### update cell 3 to match. 

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from itertools import combinations
import os

# Set device (multi-GPU setup)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_gpus = torch.cuda.device_count()
print(f"Using {num_gpus} GPU(s)")

# Load and inspect the data
data = np.load("/Data/MI_PHYSICS/Double-Pendulum-Simulation/test1/pendulum_data360.npz")
keys = list(data.keys())
print("Keys in the .npz file:", keys)


Using 2 GPU(s)
Keys in the .npz file: ['Time', 'Theta1', 'Theta2', 'X1', 'Y1', 'X2', 'Y2', 'Energy']


In [12]:
import torch
import torch.nn as nn

class AutoencoderDynamic(nn.Module):
    def __init__(self, input_dim, num_series):
        super(AutoencoderDynamic, self).__init__()

        # Set the latent space dimension based on the number of series
        latent_dim = num_series
        print(f"latent_dim = {num_series}")
        # Define encoder and decoder layers based on the number of series
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, latent_dim)  # Latent space size depends on num_series
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)  # Output dimension is the same as input
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

    def encode(self, x):
        return self.encoder(x)


In [13]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from itertools import combinations
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset

# Iterate over key combinations
for r in range(2, len(keys) + 1):
    for combo in combinations(keys, r):
        try:
            selected_data = np.hstack([data[k].reshape(data[k].shape[0], -1) for k in combo])
        except Exception as e:
            print(f"Skipping {combo} due to shape mismatch: {e}")
            continue

        selected_data = (selected_data - selected_data.mean(axis=0)) / (selected_data.std(axis=0) + 1e-8)
        tensor_data = torch.tensor(selected_data, dtype=torch.float32)
        dataset = TensorDataset(tensor_data)
        loader = DataLoader(dataset, batch_size=256, shuffle=True)

        input_dim = selected_data.shape[1]
        num_series = len(combo)  # The number of series is the number of selected keys

        # Initialize the model with the appropriate latent space dimension
        model = AutoencoderDynamic(input_dim, num_series)

        if num_gpus > 1:
            model = nn.DataParallel(model)
        model.to(device)

        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=1e-3)

        print(f"\nTraining on combo: {combo}")
        for epoch in range(15):
            total_loss = 0
            for batch in loader:
                x = batch[0].to(device)
                optimizer.zero_grad()
                output = model(x)
                loss = criterion(output, x)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            print(f"Epoch {epoch+1}/15 - Loss: {total_loss/len(loader):.6f}")

            torch.save(model.state_dict(), f"autoencoder1.1_{'_'.join(combo)}.pt")

        # Visualization after training
        with torch.no_grad():
            model.eval()
            if isinstance(model, nn.DataParallel):
                encoded = model.module.encode(tensor_data.to(device)).cpu().numpy()
            else:
                encoded = model.encode(tensor_data.to(device)).cpu().numpy()

        num_keys = len(combo)

        # Plotting input data (before encoding) and encoded data (Z)
        if num_keys == 2:
            # Plot input data (2D)
            plt.figure(figsize=(12, 5))

            plt.subplot(1, 2, 1)
            plt.scatter(selected_data[:, 0], selected_data[:, 1], s=10, alpha=0.7, c=selected_data[:, 0], cmap='viridis')
            plt.title(f"Input Data - Combo: {combo}")
            plt.xlabel("Feature 1")
            plt.ylabel("Feature 2")
            plt.colorbar(label="Feature 1 values")
            plt.grid(True)
            plt.tight_layout()

            # Plot encoded data (2D)
            plt.subplot(1, 2, 2)
            plt.scatter(encoded[:, 0], encoded[:, 1], s=10, alpha=0.7, c=encoded[:, 0], cmap='viridis')
            plt.title(f"2D Encoded Space - Combo: {combo}")
            plt.xlabel("Z1")
            plt.ylabel("Z2")
            plt.colorbar(label="Z1 values")
            plt.grid(True)
            plt.tight_layout()

            plt.show()

        elif num_keys == 3:
            # Plot input data (3D)
            fig = plt.figure(figsize=(14, 6))
            ax = fig.add_subplot(121, projection='3d')
            ax.scatter(selected_data[:, 0], selected_data[:, 1], selected_data[:, 2], s=10, alpha=0.7, c=selected_data[:, 0], cmap='viridis')
            ax.set_title(f"Input Data - Combo: {combo}")
            ax.set_xlabel("Feature 1")
            ax.set_ylabel("Feature 2")
            ax.set_zlabel("Feature 3")
            fig.colorbar(ax.scatter(selected_data[:, 0], selected_data[:, 1], selected_data[:, 2], c=selected_data[:, 0], cmap='viridis'), label="Feature 1 values")
            ax.grid(True)

            # Plot encoded data (3D)
            ax2 = fig.add_subplot(122, projection='3d')
            sc = ax2.scatter(encoded[:, 0], encoded[:, 1], encoded[:, 2], s=10, alpha=0.7, c=encoded[:, 0], cmap='viridis')
            ax2.set_title(f"3D Encoded Space - Combo: {combo}")
            ax2.set_xlabel("Z1")
            ax2.set_ylabel("Z2")
            ax2.set_zlabel("Z3")
            fig.colorbar(sc, label="Z1 values")
            ax2.grid(True)

            plt.tight_layout()
            plt.show()

        elif num_keys == 5:
            # Plot input data (5D) - Here we can't directly visualize 5D data, so you might want to use dimensionality reduction (e.g., PCA or t-SNE) to reduce it to 2D/3D for plotting.
            from sklearn.decomposition import PCA
            pca = PCA(n_components=3)
            reduced_data = pca.fit_transform(selected_data)

            # Plot input data (5D reduced to 3D)
            fig = plt.figure(figsize=(14, 6))
            ax = fig.add_subplot(121, projection='3d')
            ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], s=10, alpha=0.7, c=reduced_data[:, 0], cmap='viridis')
            ax.set_title(f"Input Data (PCA Reduced) - Combo: {combo}")
            ax.set_xlabel("PCA 1")
            ax.set_ylabel("PCA 2")
            ax.set_zlabel("PCA 3")
            fig.colorbar(ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], c=reduced_data[:, 0], cmap='viridis'), label="PCA 1 values")
            ax.grid(True)

            # Plot encoded data (5D reduced to 3D)
            reduced_encoded = pca.fit_transform(encoded)
            ax2 = fig.add_subplot(122, projection='3d')
            sc = ax2.scatter(reduced_encoded[:, 0], reduced_encoded[:, 1], reduced_encoded[:, 2], s=10, alpha=0.7, c=reduced_encoded[:, 0], cmap='viridis')
            ax2.set_title(f"5D Encoded Space (PCA Reduced) - Combo: {combo}")
            ax2.set_xlabel("PCA 1")
            ax2.set_ylabel("PCA 2")
            ax2.set_zlabel("PCA 3")
            fig.colorbar(sc, label="PCA 1 values")
            ax2.grid(True)

            plt.tight_layout()
            plt.show()

        else:
            print(f"Skipping visualization for combo {combo} — not 2D, 3D, or 5D.")


2

Training on combo: ('Time', 'Theta1')


KeyboardInterrupt: 

In [None]:
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd

# Convert encoded data to a DataFrame for Plotly
df = pd.DataFrame(encoded, columns=["Z1", "Z2", "Z3"])

# Optional: Add index or time labels
df['Index'] = df.index

# Create Plotly 3D scatter plot
fig = px.scatter_3d(
    df,
    x="Z1", y="Z2", z="Z3",
    color="Index",  # or use another variable if available
    title=f"Interactive 3D Latent Space - Combo: {' + '.join(combo)}",
    opacity=0.8,
    width=800,
    height=700
)

fig.update_traces(marker=dict(size=4))
fig.update_layout(scene=dict(
    xaxis_title='Z1',
    yaxis_title='Z2',
    zaxis_title='Z3'
))

fig.show()
