In [82]:
import numpy as np
import pandas as pd

def load_features(model_name):
    return np.load(f'{model_name}_features.npy')

In [83]:
A = load_features('albert')

In [84]:
A.shape

(48714, 768)

In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Create a dummy numpy array for demonstration
# Uncomment this line if A is not defined
# A = np.random.rand(48714, 4096)

# Convert the numpy array to PyTorch tensor
A_tensor = torch.tensor(A, dtype=torch.float32)

# Specify the CUDA device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define the AutoEncoder model
class ComplexAutoEncoder(nn.Module):
    def __init__(self):
        super(ComplexAutoEncoder, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(4096, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(2048, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            nn.ReLU()
        )
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(256, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            
            nn.Linear(1024, 2048),
            nn.BatchNorm1d(2048),
            nn.ReLU(),
            
            nn.Linear(2048, 4096),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


# Initialize the model, optimizer, and loss function
model = ComplexAutoEncoder().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
loss_function = nn.MSELoss()

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for batch_data, in train_data:
        batch_data = batch_data.to(device)
        optimizer.zero_grad()
        output = model(batch_data)
        loss = loss_function(output, batch_data)
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")
    
    # Update the learning rate
    scheduler.step(loss)


Epoch 1, Loss: 0.024480124935507774
Epoch 2, Loss: 0.02436743676662445
Epoch 3, Loss: 0.024535322561860085
Epoch 4, Loss: 0.02442612498998642
Epoch 5, Loss: 0.024631595239043236
Epoch 6, Loss: 0.02449626848101616
Epoch 7, Loss: 0.024525536224246025
Epoch 8, Loss: 0.024733291938900948
Epoch 9, Loss: 0.024591457098722458
Epoch 10, Loss: 0.02454942651093006
Epoch 11, Loss: 0.024509530514478683
Epoch 12, Loss: 0.024539312347769737
Epoch 13, Loss: 0.02451564557850361
Epoch 14, Loss: 0.02467780001461506
Epoch 15, Loss: 0.02446221187710762
Epoch 16, Loss: 0.024628471583127975
Epoch 17, Loss: 0.024357981979846954
Epoch 18, Loss: 0.024424651637673378
Epoch 19, Loss: 0.02459125593304634
Epoch 20, Loss: 0.02471555396914482
Epoch 21, Loss: 0.024535659700632095
Epoch 22, Loss: 0.024541249498724937
Epoch 23, Loss: 0.024585653096437454
Epoch 24, Loss: 0.024570763111114502
Epoch 25, Loss: 0.024509068578481674
Epoch 26, Loss: 0.024706760421395302
Epoch 27, Loss: 0.024555418640375137
Epoch 28, Loss: 0.0

KeyboardInterrupt: 

In [36]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

# For reparameterization trick
def reparameterize(mu, logvar):
    std = torch.exp(0.5*logvar)
    eps = torch.randn_like(std)
    return mu + eps*std

# VAE model
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(4096, 2048),
            nn.LayerNorm(2048),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.2),
            
            nn.Linear(2048, 1024),
            nn.LayerNorm(1024),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.2)
        )
        
        self.fc_mu = nn.Linear(1024, 64)
        self.fc_logvar = nn.Linear(1024, 64)

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(64, 1024),
            nn.LayerNorm(1024),
            nn.LeakyReLU(0.1),
            
            nn.Linear(1024, 2048),
            nn.LayerNorm(2048),
            nn.LeakyReLU(0.1),
            
            nn.Linear(2048, 4096),
            nn.Tanh()
        )
        
    def forward(self, x):
        x = self.encoder(x)
        mu = self.fc_mu(x)
        logvar = self.fc_logvar(x)
        z = reparameterize(mu, logvar)
        x_recon = self.decoder(z)
        return x_recon, mu, logvar
    
    def encode(self, x):
        x = self.encoder(x)
        mu = self.fc_mu(x)
        return mu

# Loss function
def vae_loss(recon_x, x, mu, logvar):
    recon_loss = nn.MSELoss()(recon_x, x)
    kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + 1e-5 * kl_loss


def vae_loss(recon_x, x, mu, logvar):
    recon_loss = nn.MSELoss(reduction='sum')(recon_x, x)  # or use nn.BCEWithLogitsLoss() for binary data
    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    
    # Normalizing by the same number for a fair comparison between reconstruction and KL-divergence loss
    batch_size = x.size(0)
    kl_div /= batch_size * 4096  # 4096 is the feature dimension, adjust as needed
    
    return recon_loss + kl_div


# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.001)

model.apply(weights_init)

# Initialize VAE model
model = VAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Data preparation
# Initialize StandardScaler
scaler = StandardScaler()

# Fit and transform the data
A_scaled = scaler.fit_transform(A)

# Convert to tensor
A_tensor_scaled = torch.tensor(A_scaled, dtype=torch.float32)
train_data = DataLoader(TensorDataset(A_tensor), batch_size=128, shuffle=True)

# Training
num_epochs = 20
model.train()
for epoch in range(num_epochs):
    for batch_data, in train_data:
        batch_data = batch_data.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(batch_data)
        loss = vae_loss(recon_batch, batch_data, mu, logvar)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

# Extract reduced-dimension features
model.eval()
all_latents = []

with torch.no_grad():
    for batch_data, in train_data:
        batch_data = batch_data.to(device)
        latent_mu = model.encode(batch_data)
        all_latents.append(latent_mu.cpu().numpy())

# Concatenate all the latent features into a single numpy array
all_latents_np = np.concatenate(all_latents, axis=0)


Epoch 1, Loss: 1335.4993896484375
Epoch 2, Loss: 1058.6412353515625
Epoch 3, Loss: 890.7281494140625
Epoch 4, Loss: 751.6736450195312
Epoch 5, Loss: 698.8134765625
Epoch 6, Loss: 680.2098999023438
Epoch 7, Loss: 609.8023681640625
Epoch 8, Loss: 551.2883911132812
Epoch 9, Loss: 556.24951171875
Epoch 10, Loss: 553.1036987304688
Epoch 11, Loss: 520.4054565429688
Epoch 12, Loss: 490.642578125
Epoch 13, Loss: 467.3554382324219
Epoch 14, Loss: 442.7686767578125
Epoch 15, Loss: 457.72906494140625
Epoch 16, Loss: 423.1995544433594
Epoch 17, Loss: 404.68890380859375
Epoch 18, Loss: 428.3824768066406
Epoch 19, Loss: 381.81256103515625
Epoch 20, Loss: 387.40362548828125


In [37]:
all_latents_np.shape

(48714, 64)

In [38]:
np.save('TART_features.npy', all_latents_np)

In [44]:
import numpy as np
from deap import base, creator, tools, algorithms
from sklearn.preprocessing import StandardScaler

# Generate some sample data for demonstration
n_samples, n_features = A.shape[0], A.shape[1]
X = A

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the autoencoder
def autoencoder(individual, data):
    n_inputs = n_features
    n_hidden = 64  # Number of components (latent features)
    n_outputs = n_features

    # Split individual into encoder and decoder parts
    encoder_weights = np.array(individual[:n_inputs * n_hidden]).reshape((n_inputs, n_hidden))
    decoder_weights = np.array(individual[n_inputs * n_hidden:]).reshape((n_hidden, n_outputs))

    # Encoding
    encoded = np.dot(data, encoder_weights)
    
    # Decoding
    decoded = np.dot(encoded, decoder_weights)
    
    mse = np.mean((data - decoded)**2)
    print(f"Evaluated individual with MSE: {mse}")
    # Reconstruction loss (MSE)
#     mse = np.mean((data - decoded)**2)
    
    return mse,
def autoencoder(individual, data):
    n_inputs = n_features
    n_hidden1 = 128  # Intermediate layer
    n_hidden2 = 64  # Number of components (latent features)
    n_outputs = n_features
    
    split1 = n_inputs * n_hidden1
    split2 = split1 + n_hidden1 * n_hidden2
    split3 = split2 + n_hidden2 * n_outputs
    
    # Split individual into encoder and decoder parts
    encoder_weights1 = np.array(individual[:split1]).reshape((n_inputs, n_hidden1))
    encoder_weights2 = np.array(individual[split1:split2]).reshape((n_hidden1, n_hidden2))
    decoder_weights = np.array(individual[split2:split3]).reshape((n_hidden2, n_outputs))
    
    # Encoding
    encoded1 = np.dot(data, encoder_weights1)
    encoded2 = np.dot(encoded1, encoder_weights2)
    
    # Decoding
    decoded = np.dot(encoded2, decoder_weights)
    
    mse = np.mean((data - decoded)**2)
    print(f"Evaluated individual with MSE: {mse}")

    return mse,

# Define the fitness and individual
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# Define gene and individual creation operations
toolbox.register("attr_float", np.random.uniform, -10, 10)
# toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=n_features * 64 * 2)

individual_length = n_features * 128 + 128 * 64 + 64 * n_features

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=individual_length)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)





# Define genetic operators
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
# toolbox.register("evaluate", autoencoder, data=X_scaled)
toolbox.register("evaluate", autoencoder, data=X_scaled)
# Create population and run the evolution
population = toolbox.population(n=30)
ngen = 50  # Number of generations
cxpb, mutpb = 0.5, 0.2  # Probabilities of crossing and mutating

# Statistics to be collected
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", np.min)

# Run the evolution
# Run the evolution
algorithms.eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=stats, verbose=True)


# Extract the best individual
best_ind = tools.selBest(population, 1)[0]
best_encoder_weights = np.array(best_ind[:n_features * 64]).reshape((n_features, 64))

# Encode the data to 64 dimensions
encoded_data = np.dot(X_scaled, best_encoder_weights)




Evaluated individual with MSE: 1273159096860.8179
Evaluated individual with MSE: 1485244983226.3015
Evaluated individual with MSE: 1261011239759.8057
Evaluated individual with MSE: 1059702279442.5791
Evaluated individual with MSE: 1154058876050.2278
Evaluated individual with MSE: 1253208780725.2744
Evaluated individual with MSE: 1255138046655.85
Evaluated individual with MSE: 1246633388559.9958
Evaluated individual with MSE: 1154948689169.7205
Evaluated individual with MSE: 1210825509547.1968
Evaluated individual with MSE: 1224336568960.082
Evaluated individual with MSE: 1237972452887.659
Evaluated individual with MSE: 1237733924374.3284
Evaluated individual with MSE: 1249910045244.051
Evaluated individual with MSE: 1145707283460.2961
Evaluated individual with MSE: 1245988821975.8538
Evaluated individual with MSE: 1218188489281.1978
Evaluated individual with MSE: 1083864051363.5217
Evaluated individual with MSE: 1260131409481.2144
Evaluated individual with MSE: 1085024810400.0134
Evalu

KeyboardInterrupt: 

In [50]:
import numpy as np
from scipy.linalg import eigh
from sklearn.feature_selection import mutual_info_regression
from sklearn.preprocessing import QuantileTransformer

def compute_mutual_info_matrix(X):
    n_features = X.shape[1]
    mi_matrix = np.zeros((n_features, n_features))
    
    for i in range(n_features):
        print(i, n_features)
        for j in range(i, n_features):
            mi = mutual_info_regression(X[:, [i]], X[:, j])[0]
            mi_matrix[i, j] = mi
            mi_matrix[j, i] = mi  # The matrix is symmetric
            
    return mi_matrix

# Generate some example data
np.random.seed(0)
n_samples, n_features = A.shape[0], A.shape[1]
X = A#np.random.randn(n_samples, n_features)

# Apply the Quantile Transformer
quantile_transformer = QuantileTransformer(n_quantiles=20, random_state=0)
X_transformed = quantile_transformer.fit_transform(X)

# Compute the mutual information matrix
mi_matrix = compute_mutual_info_matrix(X_transformed)

# Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = eigh(mi_matrix)

# Sort eigenvalues and corresponding eigenvectors in descending order
sorted_index = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_index]
sorted_eigenvectors = eigenvectors[:, sorted_index]

# Choose the top k eigenvalues (and their corresponding eigenvectors)
k = 32  # Number of components to keep
top_eigenvectors = sorted_eigenvectors[:, :k]

# Project the data into lower-dimensional space
X_mi_based = np.dot(X_transformed, top_eigenvectors)

# Print first 5 rows of transformed data
print("Transformed Data (First 5 Rows):")
print(X_mi_based[:5])


0 4096


KeyboardInterrupt: 

In [97]:
import torch

# Function to calculate mutual information
def calc_mutual_information(x, y, bins=900, eps=1e-10):
    joint_hist = torch.histc(x * 10 + y, bins=bins, min=0, max=400).cuda()
    joint_hist += eps  # Add epsilon to avoid zero values
    
    joint_prob = joint_hist / joint_hist.sum()
    
    x_hist = torch.histc(x, bins=30, min=0, max=20).cuda()
    x_hist += eps  # Add epsilon to avoid zero values
    x_prob = x_hist / x_hist.sum()
    
    y_hist = torch.histc(y, bins=30, min=0, max=20).cuda()
    y_hist += eps  # Add epsilon to avoid zero values
    y_prob = y_hist / y_hist.sum()
    
    outer_x_y = torch.ger(x_prob, y_prob).cuda()
    outer_x_y = outer_x_y.flatten()

    mi = torch.sum(joint_prob * (torch.log2(joint_prob + eps) - torch.log2(outer_x_y + eps)))
    
    if torch.isnan(mi):
        print("NaN detected in mutual information")
    
    return mi



# Function to compute the mutual information matrix
def compute_mutual_info_matrix(X):
    X_torch = torch.Tensor(X).cuda()
    n_features = X.shape[1]
    mi_matrix = torch.zeros((n_features, n_features)).cuda()
    
    for i in range(n_features):
        print(i, n_features)
        for j in range(i, n_features):
            mi = calc_mutual_information(X_torch[:, i], X_torch[:, j])
            mi_matrix[i, j] = mi
            mi_matrix[j, i] = mi  # The matrix is symmetric
            
    return mi_matrix.cpu().numpy()  # Convert back to numpy array if necessary

# Make sure your data 'A' is a NumPy array
A = A #.cpu().numpy()
from sklearn.preprocessing import StandardScaler

# Scale the features
scaler = StandardScaler()
A_transformed = scaler.fit_transform(A)


# Calculate and print the mutual information matrix
mi_matrix = compute_mutual_info_matrix(A_transformed)
print(mi_matrix)


0 768
1 768
2 768
3 768
4 768
5 768
6 768
7 768
8 768
9 768
10 768
11 768
12 768
13 768
14 768
15 768
16 768
17 768
18 768
19 768
20 768
21 768
22 768
23 768
24 768
25 768
26 768
27 768
28 768
29 768
30 768
31 768
32 768
33 768
34 768
35 768
36 768
37 768
38 768
39 768
40 768
41 768
42 768
43 768
44 768
45 768
46 768
47 768
48 768
49 768
50 768
51 768
52 768
53 768
54 768
55 768
56 768
57 768
58 768
59 768
60 768
61 768
62 768
63 768
64 768
65 768
66 768
67 768
68 768
69 768
70 768
71 768
72 768
73 768
74 768
75 768
76 768
77 768
78 768
79 768
80 768
81 768
82 768
83 768
84 768
85 768
86 768
87 768
88 768
89 768
90 768
91 768
92 768
93 768
94 768
95 768
96 768
97 768
98 768
99 768
100 768
101 768
102 768
103 768
104 768
105 768
106 768
107 768
108 768
109 768
110 768
111 768
112 768
113 768
114 768
115 768
116 768
117 768
118 768
119 768
120 768
121 768
122 768
123 768
124 768
125 768
126 768
127 768
128 768
129 768
130 768
131 768
132 768
133 768
134 768
135 768
136 768
137 768
138 76

In [98]:
eigenvalues, eigenvectors = eigh(mi_matrix)

# Sort eigenvalues and corresponding eigenvectors in descending order
sorted_index = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_index]
sorted_eigenvectors = eigenvectors[:, sorted_index]

# Choose the top k eigenvalues (and their corresponding eigenvectors)
k = 128  # Number of components to keep
top_eigenvectors = sorted_eigenvectors[:, :k]


# quantile_transformer = QuantileTransformer(n_quantiles=20, random_state=0)
# A_transformed = quantile_transformer.fit_transform(A)

# Project the data into lower-dimensional space
X_mi_based = np.dot(A_transformed, top_eigenvectors)

# Print first 5 rows of transformed data
print("Transformed Data (First 5 Rows):")
print(X_mi_based[:5])

Transformed Data (First 5 Rows):
[[-6.09434187e-01  1.02105319e-01  1.15341961e+00  1.19685374e-01
   4.08342540e-01 -1.38089991e+00  3.09183407e+00 -1.22194946e+00
  -3.84110212e-03  5.57337642e-01 -1.53433070e-01 -3.02158213e+00
  -1.21030760e+00  4.74165231e-01  3.18425512e+00 -2.53713250e+00
  -4.20108259e-01  3.41934711e-02  2.18286586e+00 -1.67183590e+00
   4.64980930e-01 -6.26017809e-01  4.36029792e-01  7.30823755e-01
   1.43280625e+00  1.55591750e+00  1.66121721e-01 -1.61234140e+00
  -5.64761460e-01  1.95986402e+00 -8.88687015e-01 -3.62872154e-01]
 [-8.87036562e-01 -2.36873794e+00 -3.77493203e-02 -5.49288630e-01
   1.78220093e+00 -2.98791504e+00  4.97397995e+00 -4.69161749e+00
   1.12975442e+00 -2.12534726e-01 -1.04841971e+00 -4.67019367e+00
  -7.06262589e-01  1.28673565e+00  5.74570656e+00 -4.97799206e+00
   9.60308909e-02  3.27860618e+00 -1.33796859e+00 -2.92274237e+00
   5.18779576e-01 -1.15604615e+00 -7.65139937e-01 -2.84464121e-01
   1.16572464e+00  1.50683165e+00  1.21811

In [99]:
np.save('TART_features.npy', X_mi_based)