In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns
import umap

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load Data
X_flux = np.load("X_flux_aligned.npy")
y = np.load("y.npy")

X_flux_full = np.load("X_flux_aligned.npy")  # full dataset
X_tensor = torch.tensor(X_flux_full, dtype=torch.float32)

X_tensor = torch.tensor(X_flux, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

print(f"X_tensor shape: {X_tensor.shape}")
print(f"y_tensor shape: {y_tensor.shape}")

# Positional Encoding
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=2000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]

# Custom Multi-Head Attention
class CustomMultiHeadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.1):
        super().__init__()
        assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
        
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        self.q_linear = nn.Linear(embed_dim, embed_dim)
        self.k_linear = nn.Linear(embed_dim, embed_dim)
        self.v_linear = nn.Linear(embed_dim, embed_dim)
        self.out_linear = nn.Linear(embed_dim, embed_dim)

        self.dropout = nn.Dropout(dropout)
        self.scale = torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))

    def forward(self, query, key, value, mask=None):
        batch_size, seq_len, _ = query.size()

        Q = self.q_linear(query)
        K = self.k_linear(key)
        V = self.v_linear(value)

        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)

        scores = torch.matmul(Q, K.transpose(-2, -1)) / self.scale

        if mask is not None:
            scores = scores.masked_fill(mask == 0, float('-inf'))

        attn = torch.softmax(scores, dim=-1)
        attn = self.dropout(attn)

        context = torch.matmul(attn, V)
        context = context.transpose(1, 2).contiguous().view(batch_size, seq_len, self.embed_dim)

        output = self.out_linear(context)
        return output, attn



# Transformer Block
class TransformerEncoderBlock(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0.1):
        super().__init__()
        self.self_attn = CustomMultiHeadAttention(d_model, nhead, dropout)
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

    def forward(self, src):
        src2, _ = self.self_attn(src, src, src)
        src = src + self.dropout1(src2)
        src = self.norm1(src)
        src2 = self.linear2(self.dropout(torch.relu(self.linear1(src))))
        src = src + self.dropout2(src2)
        return self.norm2(src)

# Autoencoder with encoder-only output
class FluxTransformerAutoencoder(nn.Module):
    def __init__(self, input_dim=1, d_model=64, nhead=4, num_layers=2, seq_len=2000):
        super().__init__()
        self.input_projection = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, seq_len)
        self.encoder_blocks = nn.Sequential(*[TransformerEncoderBlock(d_model, nhead) for _ in range(num_layers)])
        self.decoder = nn.Linear(d_model, input_dim)

    def forward(self, x):
        x = x.unsqueeze(-1)
        x = self.input_projection(x)
        x = self.pos_encoder(x)
        x = self.encoder_blocks(x)
        x = self.decoder(x)
        return x.squeeze(-1)

    def encode(self, x):
        x = x.unsqueeze(-1)
        x = self.input_projection(x)
        x = self.pos_encoder(x)
        x = self.encoder_blocks(x)
        return x[:, 0, :]  # First token as embedding

# Hard Triplet Loss
def batch_hard_triplet_mining(embeddings, labels, margin=1.0):
    dist_matrix = torch.cdist(embeddings, embeddings, p=2)
    labels = labels.unsqueeze(1)
    matches = labels == labels.T
    diffs = ~matches
    hardest_pos = torch.where(matches, dist_matrix, torch.tensor(float('-inf')).to(embeddings.device)).max(dim=1)[0]
    hardest_neg = torch.where(diffs, dist_matrix, torch.tensor(float('inf')).to(embeddings.device)).min(dim=1)[0]
    triplet_loss = F.relu(hardest_pos - hardest_neg + margin)
    return triplet_loss.mean()

# Training setup
model = FluxTransformerAutoencoder().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 30
batch_size = 8
virtual_batch_size = 32  # effective batch size
accum_steps = virtual_batch_size // batch_size

model.train()
for epoch in range(epochs):
    total_loss = 0.0
    total_samples = 0

    loader = DataLoader(TensorDataset(X_tensor, y_tensor), batch_size=batch_size, shuffle=True)
    optimizer.zero_grad()

    for i, (X_batch, y_batch) in enumerate(loader):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        embeddings = model.encode(X_batch)
        embeddings = F.normalize(embeddings, p=2, dim=1)

        loss = batch_hard_triplet_mining(embeddings, y_batch)
        loss = loss / accum_steps
        loss.backward()

        if (i + 1) % accum_steps == 0 or (i + 1) == len(loader):
            optimizer.step()
            optimizer.zero_grad()

        total_loss += loss.item() * batch_size * accum_steps
        total_samples += batch_size

        # 🧹 Optional: cleanup to help long loops
        del X_batch, y_batch, embeddings, loss
        torch.cuda.empty_cache() if device.type == "cuda" else None

    avg_loss = total_loss / total_samples
    print(f"Epoch {epoch+1}/{epochs} - Triplet Loss: {avg_loss:.4f}, Samples: {total_samples}")


# Generate embeddings
print("Generating final embeddings...")
model.eval()
embeddings = []
with torch.no_grad():
    for batch in loader_full:
        X_batch = batch[0].to(device)
        emb = model.encode(X_batch)
        embeddings.append(emb.cpu().numpy())
        del X_batch, emb
        torch.cuda.empty_cache() if device.type == "cuda" else None
flux_embeddings = np.concatenate(embeddings, axis=0)
print("✅ flux_embeddings shape:", flux_embeddings.shape)

# Save embeddings
np.save("flux_embeddings.npy", flux_embeddings)

# UMAP Visualization
reducer = umap.UMAP(random_state=42)
embedding_umap = reducer.fit_transform(flux_embeddings)
plt.figure(figsize=(8, 6))
scatter = plt.scatter(embedding_umap[:, 0], embedding_umap[:, 1], c=y, cmap='viridis')
plt.colorbar(scatter)
plt.title("UMAP Visualization of Flux Embeddings")
plt.show()

# t-SNE Visualization
tsne = TSNE(n_components=2, random_state=42)
embedding_tsne = tsne.fit_transform(flux_embeddings)
plt.figure(figsize=(8, 6))
scatter = plt.scatter(embedding_tsne[:, 0], embedding_tsne[:, 1], c=y, cmap='viridis')
plt.colorbar(scatter)
plt.title("t-SNE Visualization of Flux Embeddings")
plt.show()


Using device: cpu
X_tensor shape: torch.Size([1088, 2000])
y_tensor shape: torch.Size([1088])
Epoch 1/30 - Triplet Loss: 1.0581, Samples: 1088
Epoch 2/30 - Triplet Loss: 1.0298, Samples: 1088
Epoch 3/30 - Triplet Loss: 1.0199, Samples: 1088
Epoch 4/30 - Triplet Loss: 0.9972, Samples: 1088
Epoch 5/30 - Triplet Loss: 1.0222, Samples: 1088
Epoch 6/30 - Triplet Loss: 1.0056, Samples: 1088
Epoch 7/30 - Triplet Loss: 1.0120, Samples: 1088
Epoch 8/30 - Triplet Loss: 0.9987, Samples: 1088
Epoch 9/30 - Triplet Loss: 1.0164, Samples: 1088
Epoch 10/30 - Triplet Loss: 1.0010, Samples: 1088
Epoch 11/30 - Triplet Loss: 0.9990, Samples: 1088
Epoch 12/30 - Triplet Loss: 1.0001, Samples: 1088
Epoch 13/30 - Triplet Loss: 1.0052, Samples: 1088
Epoch 14/30 - Triplet Loss: 1.0074, Samples: 1088
Epoch 15/30 - Triplet Loss: 0.9917, Samples: 1088
Epoch 16/30 - Triplet Loss: 1.0148, Samples: 1088
Epoch 17/30 - Triplet Loss: 0.9967, Samples: 1088
Epoch 18/30 - Triplet Loss: 0.9988, Samples: 1088
Epoch 19/30 - T

NameError: name 'loader_full' is not defined

In [None]:
np.save("flux_embeddings.npy", flux_embeddings)
print(f"✅ flux_embeddings.npy saved: {flux_embeddings.shape}")


NameError: name 'loader_full' is not defined