In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import torch.nn.functional as F
from typing import Optional
from unicore.modules import TransformerEncoderLayer, LayerNorm
import pandas as pd
import os
import numpy as np
from ast import literal_eval

# Define the dataset
class CustomDataset(Dataset):
    def __init__(self, matrix_data, vector_data):
        self.matrix_data = matrix_data
        self.vector_data = vector_data

    def __len__(self):
        return len(self.matrix_data)

    def __getitem__(self, idx):
        return self.matrix_data[idx], self.vector_data[idx]

# Define the 2D matrix encoder (similar to an image encoder)
class MatrixEncoder(nn.Module):
    def __init__(self, input_channels, output_dim):
        super(MatrixEncoder, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(256 * 38 * 38, output_dim),  # Adjusted for input size 621x621 with pooling
            nn.Dropout(0.3)  # Add Dropout layer
        )

    def forward(self, x):
        # Ensure the input has the correct shape (batch_size, channels, height, width)
        if len(x.shape) == 3:
            x = x.unsqueeze(1)  # Add channel dimension if missing
        return self.cnn(x)

# Define the 1D vector encoder (similar to a text encoder)
class VectorEncoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(VectorEncoder, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.3),  # Add Dropout layer
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),  # Add Dropout layer
            nn.Linear(512, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

# Define the Transformer encoder with pair
class TransformerEncoderWithPair(nn.Module):
    def __init__(
        self,
        encoder_layers: int = 4,
        embed_dim: int = 512,
        ffn_embed_dim: int = 2048,
        attention_heads: int = 4,
        emb_dropout: float = 0.1,
        dropout: float = 0.1,
        attention_dropout: float = 0.1,
        activation_dropout: float = 0.0,
        max_seq_len: int = 256,
        activation_fn: str = "gelu",
        post_ln: bool = False,
        no_final_head_layer_norm: bool = False,
    ) -> None:

        super().__init__()
        self.emb_dropout = emb_dropout
        self.max_seq_len = max_seq_len
        self.embed_dim = embed_dim
        self.attention_heads = attention_heads
        self.emb_layer_norm = LayerNorm(self.embed_dim)
        if not post_ln:
            self.final_layer_norm = LayerNorm(self.embed_dim)
        else:
            self.final_layer_norm = None

        if not no_final_head_layer_norm:
            self.final_head_layer_norm = LayerNorm(attention_heads)
        else:
            self.final_head_layer_norm = None

        self.layers = nn.ModuleList(
            [
                TransformerEncoderLayer(
                    embed_dim=self.embed_dim,
                    ffn_embed_dim=ffn_embed_dim,
                    attention_heads=attention_heads,
                    dropout=dropout,
                    attention_dropout=attention_dropout,
                    activation_dropout=activation_dropout,
                    activation_fn=activation_fn,
                    post_ln=post_ln,
                )
                for _ in range(encoder_layers)
            ]
        )

    def forward(
        self,
        emb: torch.Tensor,
        attn_mask: Optional[torch.Tensor] = None,
        padding_mask: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:

        bsz = emb.size(0)
        seq_len = emb.size(1)
        x = self.emb_layer_norm(emb)
        x = F.dropout(x, p=self.emb_dropout, training=self.training)

        # account for padding while computing the representation
        if padding_mask is not None:
            x = x * (1 - padding_mask.unsqueeze(-1).type_as(x))

        if attn_mask is None:
            attn_mask = torch.zeros((bsz, 1, seq_len, seq_len), device=emb.device).repeat(1, self.attention_heads, 1, 1).view(-1, seq_len, seq_len)

        for i in range(len(self.layers)):
            x, attn_mask, _ = self.layers[i](
                x, padding_mask=padding_mask, attn_bias=attn_mask, return_attn=True
            )

        if self.final_layer_norm is not None:
            x = self.final_layer_norm(x)

        return x, attn_mask

# Define the CLIP model
class CLIPModel(nn.Module):
    def __init__(self, matrix_encoder, vector_encoder, transformer_encoder):
        super(CLIPModel, self).__init__()
        self.matrix_encoder = matrix_encoder
        self.vector_encoder = vector_encoder
        self.transformer_encoder = transformer_encoder

    def forward(self, matrix, vector):
        matrix_features = self.matrix_encoder(matrix)
        vector_features = self.vector_encoder(vector)
        transformer_input = torch.cat((matrix_features.unsqueeze(1), vector_features.unsqueeze(1)), dim=1)
        transformer_output, _ = self.transformer_encoder(transformer_input)
        return transformer_output[:, 0, :], transformer_output[:, 1, :]



In [3]:
# Data preparation function
def prepare_data(data_path_csv, data_path_npy):
    # Load encoded vector data from CSV file
    encoded_df = pd.read_csv(data_path_csv, keep_default_na=False)

    # Load matrix data from a separate .npy file
    matrix_data = np.load(data_path_npy)
    matrix_data = torch.tensor(matrix_data, dtype=torch.float32)
    
    # Print matrix data size for verification
    print(f"Matrix Data Size: {matrix_data.size()}")

    # Convert the lists in 'rna_embedding' and 'smiles_embedding' columns to NumPy arrays
    loaded_rna_data = np.stack(encoded_df['rna_embedding'].apply(lambda x: np.array(eval(x))).values)
    loaded_smiles_data = np.stack(encoded_df['smiles_embedding'].apply(lambda x: np.array(eval(x))).values)

    # Convert loaded data to tensors
    loaded_rna_data = torch.tensor(loaded_rna_data, dtype=torch.float32)
    loaded_smiles_data = torch.tensor(loaded_smiles_data, dtype=torch.float32)

    # Create combined data
    combined_data = torch.cat((loaded_rna_data, loaded_smiles_data), dim=1)

    # Print sizes for verification
    print(f"Loaded RNA Data Size: {loaded_rna_data.size()}")
    print(f"Loaded SMILES Data Size: {loaded_smiles_data.size()}")
    print(f"Combined Data Size: {combined_data.size()}")

    

    return matrix_data, combined_data


In [4]:
def load_and_freeze_model(model_path, output_dim=768):
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize model
    matrix_encoder = MatrixEncoder(input_channels=1, output_dim=output_dim).to(device)
    vector_encoder = VectorEncoder(input_dim=2048, output_dim=output_dim).to(device)  # Assuming input_dim is 2048 for inference
    transformer_encoder = TransformerEncoderWithPair(embed_dim=output_dim).to(device)
    model = CLIPModel(matrix_encoder, vector_encoder, transformer_encoder).to(device)

    # Load the trained model weights
    model.load_state_dict(torch.load(model_path, map_location=device))

    # Freeze model weights
    for param in model.parameters():
        param.requires_grad = False

    model.eval()
    return model

In [5]:
def get_encodings(model, data_loader, device):
    matrix_encodings = []
    vector_encodings = []
    with torch.no_grad():
        for matrix, vector in data_loader:
            matrix, vector = matrix.to(device), vector.to(device)
            matrix_features, vector_features = model(matrix, vector)
            matrix_encodings.append(matrix_features.cpu())
            vector_encodings.append(vector_features.cpu())

    matrix_encodings = torch.cat(matrix_encodings, dim=0)
    vector_encodings = torch.cat(vector_encodings, dim=0)
    return matrix_encodings, vector_encodings

# Get encodings from model
def get_encodings(model, data_loader, device):
    matrix_encoder, vector_encoder = model.matrix_encoder, model.vector_encoder
    matrix_encodings = []
    vector_encodings = []
    with torch.no_grad():
        for matrix, vector in data_loader:
            matrix, vector = matrix.to(device), vector.to(device)
            matrix_features_encoder = model.matrix_encoder(matrix)
            vector_features_encoder = model.vector_encoder(vector)
            matrix_features, vector_features = model(matrix, vector)
            # Concatenate both outputs
            matrix_features = torch.cat((matrix_features, matrix_features_encoder), dim=-1)
            vector_features = torch.cat((vector_features, vector_features_encoder), dim=-1)
            matrix_encodings.append(matrix_features.cpu())
            vector_encodings.append(vector_features.cpu())

    matrix_encodings = torch.cat(matrix_encodings, dim=0)
    vector_encodings = torch.cat(vector_encodings, dim=0)
    return matrix_encodings, vector_encodings

# Get encodings from model
def get_encodings(model, data_loader, device):
    matrix_encodings = []
    vector_encodings = []
    with torch.no_grad():
        for matrix, vector in data_loader:
            matrix, vector = matrix.to(device), vector.to(device)
            matrix_features = model.matrix_encoder(matrix)
            vector_features = model.vector_encoder(vector)
            matrix_encodings.append(matrix_features.cpu())
            vector_encodings.append(vector_features.cpu())

    matrix_encodings = torch.cat(matrix_encodings, dim=0)
    vector_encodings = torch.cat(vector_encodings, dim=0)
    return matrix_encodings, vector_encodings

In [6]:
data_path_csv = 'pdbbind_dataset_rna/pdbbind_rna_processed_index_encoded.csv'
data_path_npy = 'pdbbind_dataset_rna/combined_distance_matrices_finetune.npy'

encoded_df = pd.read_csv(data_path_csv, keep_default_na=False)

# Prepare the data
matrix_data, combined_data = prepare_data(data_path_csv, data_path_npy)

# Create dataset
dataset = CustomDataset(matrix_data, combined_data)
data_loader = DataLoader(dataset)
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "best_clip_model.pth"
model = load_and_freeze_model(model_path, output_dim=768)
matrix_encodings, vector_encodings = get_encodings(model,data_loader, device)

Matrix Data Size: torch.Size([118, 621, 621])
Loaded RNA Data Size: torch.Size([118, 1280])
Loaded SMILES Data Size: torch.Size([118, 768])
Combined Data Size: torch.Size([118, 2048])


  model.load_state_dict(torch.load(model_path, map_location=device))


In [7]:
matrix_encodings.shape

torch.Size([118, 768])

In [8]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Load and freeze pretrained model
def load_and_freeze_model(model_path, output_dim=768):
    # Load pretrained encoders (MatrixEncoder and VectorEncoder) instead of using CLIPModel's output for training
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize model
    matrix_encoder = MatrixEncoder(input_channels=1, output_dim=output_dim).to(device)
    vector_encoder = VectorEncoder(input_dim=2048, output_dim=output_dim).to(device)  # Assuming input_dim is 2048 for inference
    transformer_encoder = TransformerEncoderWithPair(embed_dim=output_dim).to(device)
    model = CLIPModel(matrix_encoder, vector_encoder, transformer_encoder).to(device)

    # Load the trained model weights
    model.load_state_dict(torch.load(model_path, map_location=device))

    # Freeze model weights
    for param in model.parameters():
        param.requires_grad = False

    model.eval()
    return model



# Define fully connected neural network class
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 2, hidden_dim // 8),
            nn.BatchNorm1d(hidden_dim // 8),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 8, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

# Train fully connected neural network to predict labels and record metrics
def train_fully_connected_nn(train_matrix_encodings, train_vector_encodings, y_train, val_matrix_encodings, val_vector_encodings, y_val, output_dim=768, learning_rate=3*1e-3, num_epochs=500, batch_size=256, early_stop_patience=50):
    # Combine matrix and vector encodings
    train_features = torch.cat((train_matrix_encodings, train_vector_encodings), dim=1)
    val_features = torch.cat((val_matrix_encodings, val_vector_encodings), dim=1)
    
    # Create training and validation datasets
    train_dataset = torch.utils.data.TensorDataset(train_features, torch.tensor(y_train, dtype=torch.float32))
    val_dataset = torch.utils.data.TensorDataset(val_features, torch.tensor(y_val, dtype=torch.float32))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model, loss function, and optimizer
    input_dim = train_features.size(1)
    hidden_dim = 256
    output_dim = 1  # Assuming regression task
    model = FullyConnectedNN(input_dim, hidden_dim, output_dim).to(device)
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50, verbose=True)

    # DataFrame to store metrics
    metrics_df = pd.DataFrame(columns=["Epoch", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR"])
    
    # Early stopping variables
    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            print(f"Training Batch Size: {features.size()}")

            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        train_rmse = torch.sqrt(torch.tensor(avg_train_loss)).item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train RMSE: {train_rmse:.4f}")

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_outputs_list = []
        val_labels_list = []
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                print(f"Validation Batch Size: {features.size()}")

                # Forward pass
                outputs = model(features).squeeze()
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()
                val_outputs_list.append(outputs.cpu())
                val_labels_list.append(labels.cpu())

        avg_val_loss = total_val_loss / len(val_loader)
        val_rmse = torch.sqrt(torch.tensor(avg_val_loss)).item()
        val_outputs = torch.cat(val_outputs_list, dim=0)
        val_labels = torch.cat(val_labels_list, dim=0)
        valid_pr = torch.corrcoef(torch.stack([val_outputs, val_labels]))[0, 1].item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, Validation RMSE: {val_rmse:.4f}, Valid PR: {valid_pr:.4f}")

        # Record metrics
        metrics_df = pd.concat([metrics_df, pd.DataFrame([{
            "Epoch": epoch + 1,
            "Train Loss": avg_train_loss,
            "Train RMSE": train_rmse,
            "Valid RMSE": val_rmse,
            "Valid PR": valid_pr
        }])], ignore_index=True)

        # Learning rate decay
        scheduler.step(avg_val_loss)

        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stop_patience:
                print("Early stopping triggered.")
                break

    # Save the metrics to a CSV file
    metrics_df.to_csv("training_metrics.csv", index=False)

    # Save the best model
    if best_model_state is not None:
        torch.save(best_model_state, "best_finetuned_fully_connected_nn.pth")



In [9]:
# Load and test the final model and record metrics
def test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path, batch_size=64):
    # Combine matrix and vector encodings
    test_features = torch.cat((test_matrix_encodings, test_vector_encodings), dim=1)
    test_dataset = torch.utils.data.TensorDataset(test_features, torch.tensor(y_test, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the trained model
    input_dim = test_features.size(1)
    hidden_dim = 256
    output_dim = 1  # Assuming regression task
    model = FullyConnectedNN(input_dim, hidden_dim, output_dim).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    
    # Define loss function
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    total_test_loss = 0
    test_outputs_list = []
    test_labels_list = []
    
    # Testing loop
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            print(f"Testing Batch Size: {features.size()}")
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)
            total_test_loss += loss.item()
            test_outputs_list.append(outputs.cpu())
            test_labels_list.append(labels.cpu())
    
    avg_test_loss = total_test_loss / len(test_loader)
    test_rmse = torch.sqrt(torch.tensor(avg_test_loss)).item()
    test_outputs = torch.cat(test_outputs_list, dim=0)
    test_labels = torch.cat(test_labels_list, dim=0)
    test_pr = torch.corrcoef(torch.stack([test_outputs, test_labels]))[0, 1].item()
    print(f"Test Loss: {avg_test_loss:.4f}, Test RMSE: {test_rmse:.4f}, Test PR: {test_pr:.4f}")
    
    # Save test metrics to a CSV file
    test_metrics_df = pd.DataFrame([{ "Test RMSE": test_rmse, "Test PR": test_pr }])
    test_metrics_df.to_csv("test_metrics.csv", index=False)

In [105]:
model_path = "best_clip_model.pth"
model = load_and_freeze_model(model_path, output_dim=768)

# Data paths for finetuning
data_path_csv = 'pdbbind_dataset_rna/pdbbind_rna_processed_index_encoded.csv'
data_path_npy = 'pdbbind_dataset_rna/combined_distance_matrices_finetune.npy'

# Prepare data
matrix_data, combined_data = prepare_data(data_path_csv, data_path_npy)
encoded_df = pd.read_csv(data_path_csv, keep_default_na=False)
train_indices = encoded_df[encoded_df['set'] == 'train'].index
val_indices = encoded_df[encoded_df['set'] == 'valid'].index
test_indices = encoded_df[encoded_df['set'] == 'test'].index
y_train = encoded_df.loc[train_indices, 'label'].values
y_val = encoded_df.loc[val_indices, 'label'].values
y_test = encoded_df.loc[test_indices, 'label'].values

# Create train, validation, and test datasets
train_dataset = CustomDataset(matrix_data[train_indices], combined_data[train_indices])
val_dataset = CustomDataset(matrix_data[val_indices], combined_data[val_indices])
test_dataset = CustomDataset(matrix_data[test_indices], combined_data[test_indices])
train_loader = DataLoader(train_dataset)
val_loader = DataLoader(val_dataset)
test_loader = DataLoader(test_dataset)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Get encodings
train_matrix_encodings, train_vector_encodings = get_encodings(model, train_loader, device)
val_matrix_encodings, val_vector_encodings = get_encodings(model, val_loader, device)
test_matrix_encodings, test_vector_encodings = get_encodings(model, test_loader, device)

# Train fully connected neural network to predict labels
train_fully_connected_nn(train_matrix_encodings, train_vector_encodings, y_train, val_matrix_encodings, val_vector_encodings, y_val)

# Test the final model
test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path="best_finetuned_fully_connected_nn.pth")

  model.load_state_dict(torch.load(model_path, map_location=device))


Matrix Data Size: torch.Size([118, 621, 621])
Loaded RNA Data Size: torch.Size([118, 1280])
Loaded SMILES Data Size: torch.Size([118, 768])
Combined Data Size: torch.Size([118, 2048])


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Training Batch Size: torch.Size([81, 1536])
Epoch [1/500], Train Loss: 37.4302, Train RMSE: 6.1180
Validation Batch Size: torch.Size([10, 1536])
Epoch [1/500], Validation Loss: 33.0325, Validation RMSE: 5.7474, Valid PR: 0.8702
Training Batch Size: torch.Size([81, 1536])
Epoch [2/500], Train Loss: 36.1020, Train RMSE: 6.0085
Validation Batch Size: torch.Size([10, 1536])
Epoch [2/500], Validation Loss: 32.6450, Validation RMSE: 5.7136, Valid PR: 0.7471
Training Batch Size: torch.Size([81, 1536])
Epoch [3/500], Train Loss: 34.3243, Train RMSE: 5.8587
Validation Batch Size: torch.Size([10, 1536])
Epoch [3/500], Validation Loss: 31.9936, Validation RMSE: 5.6563, Valid PR: 0.7819
Training Batch Size: torch.Size([81, 1536])
Epoch [4/500], Train Loss: 34.6415, Train RMSE: 5.8857
Validation Batch Size: torch.Size([10, 1536])
Epoch [4/500], Validation Loss: 30.8667, Validation RMSE: 5.5558, Valid PR: 0.7700
Training Batch Size: torch.Size([81, 1536])
Epoch [5/500], Train Loss: 34.2586, Train RM

  model.load_state_dict(torch.load(model_path, map_location=device))


In [122]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset


# Define fully connected neural network class
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 2, hidden_dim // 8),
            nn.BatchNorm1d(hidden_dim // 8),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 8, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

# Train fully connected neural network to predict labels and record metrics
def train_fully_connected_nn(train_matrix_encodings, train_vector_encodings, y_train, val_matrix_encodings, val_vector_encodings, y_val, output_dim=768, learning_rate=3*1e-3, num_epochs=500, batch_size=256, early_stop_patience=10):
    # Combine matrix and vector encodings
    train_features = torch.cat((train_matrix_encodings, train_vector_encodings), dim=1)
    val_features = torch.cat((val_matrix_encodings, val_vector_encodings), dim=1)
    
    # Create training and validation datasets
    train_dataset = torch.utils.data.TensorDataset(train_features, torch.tensor(y_train, dtype=torch.float32))
    val_dataset = torch.utils.data.TensorDataset(val_features, torch.tensor(y_val, dtype=torch.float32))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model, loss function, and optimizer
    input_dim = train_features.size(1)
    hidden_dim = 256
    output_dim = 1  # Assuming regression task
    model = FullyConnectedNN(input_dim, hidden_dim, output_dim).to(device)
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50, verbose=True)

    # DataFrame to store metrics
    metrics_df = pd.DataFrame(columns=["Epoch", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR"])
    
    # Early stopping variables
    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        train_rmse = torch.sqrt(torch.tensor(avg_train_loss)).item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train RMSE: {train_rmse:.4f}")

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_outputs_list = []
        val_labels_list = []
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)

                # Forward pass
                outputs = model(features).squeeze()
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()
                val_outputs_list.append(outputs.cpu())
                val_labels_list.append(labels.cpu())

        avg_val_loss = total_val_loss / len(val_loader)
        val_rmse = torch.sqrt(torch.tensor(avg_val_loss)).item()
        val_outputs = torch.cat(val_outputs_list, dim=0)
        val_labels = torch.cat(val_labels_list, dim=0)
        valid_pr = torch.corrcoef(torch.stack([val_outputs, val_labels]))[0, 1].item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, Validation RMSE: {val_rmse:.4f}, Valid PR: {valid_pr:.4f}")

        # Record metrics
        metrics_df = pd.concat([metrics_df, pd.DataFrame([{
            "Epoch": epoch + 1,
            "Train Loss": avg_train_loss,
            "Train RMSE": train_rmse,
            "Valid RMSE": val_rmse,
            "Valid PR": valid_pr
        }])], ignore_index=True)

        # Learning rate decay
        scheduler.step(avg_val_loss)

        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stop_patience:
                print("Early stopping triggered.")
                break

    return metrics_df, best_model_state

# Load and test the final model and record metrics
def test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path, batch_size=64):
    # Combine matrix and vector encodings
    test_features = torch.cat((test_matrix_encodings, test_vector_encodings), dim=1)
    test_dataset = torch.utils.data.TensorDataset(test_features, torch.tensor(y_test, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the trained model
    input_dim = test_features.size(1)
    hidden_dim = 256
    output_dim = 1  # Assuming regression task
    model = FullyConnectedNN(input_dim, hidden_dim, output_dim).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    
    # Define loss function
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    total_test_loss = 0
    test_outputs_list = []
    test_labels_list = []
    
    # Testing loop
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)
            total_test_loss += loss.item()
            test_outputs_list.append(outputs.cpu())
            test_labels_list.append(labels.cpu())
    
    avg_test_loss = total_test_loss / len(test_loader)
    test_rmse = torch.sqrt(torch.tensor(avg_test_loss)).item()
    test_outputs = torch.cat(test_outputs_list, dim=0)
    test_labels = torch.cat(test_labels_list, dim=0)
    test_pr = torch.corrcoef(torch.stack([test_outputs, test_labels]))[0, 1].item()
    print(f"Test Loss: {avg_test_loss:.4f}, Test RMSE: {test_rmse:.4f}, Test PR: {test_pr:.4f}")
    
    return test_rmse, test_pr

# Refactor to test three encoding methods
encoding_methods = [
    "method1",
    "method2",
    "method3"
]

metrics_summary = pd.DataFrame(columns=["Method", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR", "Test RMSE", "Test PR"])

for method in encoding_methods:
    print(f"Testing {method}...")
    
    # Define different get_encodings functions for each method
    if method == "method1":
        def get_encodings(model, data_loader, device):
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features, vector_features = model(matrix, vector)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    elif method == "method2":
        def get_encodings(model, data_loader, device):
            matrix_encoder, vector_encoder = model.matrix_encoder, model.vector_encoder
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features_encoder = model.matrix_encoder(matrix)
                    vector_features_encoder = model.vector_encoder(vector)
                    matrix_features, vector_features = model(matrix, vector)
                    # Concatenate both outputs
                    matrix_features = torch.cat((matrix_features, matrix_features_encoder), dim=-1)
                    vector_features = torch.cat((vector_features, vector_features_encoder), dim=-1)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    elif method == "method3":
        def get_encodings(model, data_loader, device):
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features = model.matrix_encoder(matrix)
                    vector_features = model.vector_encoder(vector)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    
    # Get encodings
    train_matrix_encodings, train_vector_encodings = get_encodings(model, train_loader, device)
    val_matrix_encodings, val_vector_encodings = get_encodings(model, val_loader, device)
    test_matrix_encodings, test_vector_encodings = get_encodings(model, test_loader, device)

    # Train fully connected neural network to predict labels
    metrics_df, best_model_state = train_fully_connected_nn(
        train_matrix_encodings, train_vector_encodings, y_train,
        val_matrix_encodings, val_vector_encodings, y_val
    )

    # Test the final model
    torch.save(best_model_state, f"best_finetuned_fully_connected_nn_{method}.pth")
    test_rmse, test_pr = test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path=f"best_finetuned_fully_connected_nn_{method}.pth")

    # Store metrics
    final_metrics = metrics_df.iloc[-1]
    metrics_summary = pd.concat([metrics_summary, pd.DataFrame([{
        "Method": method,
        "Train Loss": final_metrics["Train Loss"],
        "Train RMSE": final_metrics["Train RMSE"],
        "Valid RMSE": final_metrics["Valid RMSE"],
        "Valid PR": final_metrics["Valid PR"],
        "Test RMSE": test_rmse,
        "Test PR": test_pr
    }])], ignore_index=True)

# Save summary metrics to CSV
metrics_summary.to_csv("encoding_methods_summary.csv", index=False)


Testing method1...


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 41.0601, Train RMSE: 6.4078
Epoch [1/500], Validation Loss: 35.2109, Validation RMSE: 5.9339, Valid PR: 0.6663
Epoch [2/500], Train Loss: 39.5215, Train RMSE: 6.2866
Epoch [2/500], Validation Loss: 35.4287, Validation RMSE: 5.9522, Valid PR: 0.7000
Epoch [3/500], Train Loss: 38.6737, Train RMSE: 6.2188
Epoch [3/500], Validation Loss: 34.7783, Validation RMSE: 5.8973, Valid PR: 0.7101
Epoch [4/500], Train Loss: 38.3718, Train RMSE: 6.1945
Epoch [4/500], Validation Loss: 33.3149, Validation RMSE: 5.7719, Valid PR: 0.7556
Epoch [5/500], Train Loss: 37.9688, Train RMSE: 6.1619
Epoch [5/500], Validation Loss: 31.6154, Validation RMSE: 5.6228, Valid PR: 0.8083
Epoch [6/500], Train Loss: 36.8164, Train RMSE: 6.0677
Epoch [6/500], Validation Loss: 29.8202, Validation RMSE: 5.4608, Valid PR: 0.8691
Epoch [7/500], Train Loss: 36.4041, Train RMSE: 6.0336
Epoch [7/500], Validation Loss: 27.9349, Validation RMSE: 5.2853, Valid PR: 0.8871
Epoch [8/500], Train Loss: 35.3551

  model.load_state_dict(torch.load(model_path, map_location=device))
  metrics_summary = pd.concat([metrics_summary, pd.DataFrame([{
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 40.8831, Train RMSE: 6.3940
Epoch [1/500], Validation Loss: 171.3224, Validation RMSE: 13.0890, Valid PR: -0.3450
Epoch [2/500], Train Loss: 40.8246, Train RMSE: 6.3894
Epoch [2/500], Validation Loss: 59.4096, Validation RMSE: 7.7078, Valid PR: -0.2351
Epoch [3/500], Train Loss: 39.8143, Train RMSE: 6.3099
Epoch [3/500], Validation Loss: 49.6483, Validation RMSE: 7.0462, Valid PR: -0.4466
Epoch [4/500], Train Loss: 38.0747, Train RMSE: 6.1705
Epoch [4/500], Validation Loss: 47.5816, Validation RMSE: 6.8979, Valid PR: -0.4675
Epoch [5/500], Train Loss: 38.4693, Train RMSE: 6.2024
Epoch [5/500], Validation Loss: 48.0030, Validation RMSE: 6.9284, Valid PR: -0.4439
Epoch [6/500], Train Loss: 36.2706, Train RMSE: 6.0225
Epoch [6/500], Validation Loss: 47.2899, Validation RMSE: 6.8768, Valid PR: -0.4304
Epoch [7/500], Train Loss: 35.6352, Train RMSE: 5.9695
Epoch [7/500], Validation Loss: 46.4422, Validation RMSE: 6.8149, Valid PR: -0.4188
Epoch [8/500], Train Loss

  model.load_state_dict(torch.load(model_path, map_location=device))


Test Loss: 8.0697, Test RMSE: 2.8407, Test PR: 0.4184
Testing method3...


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 38.3125, Train RMSE: 6.1897
Epoch [1/500], Validation Loss: 60.9750, Validation RMSE: 7.8086, Valid PR: 0.7090
Epoch [2/500], Train Loss: 37.8201, Train RMSE: 6.1498
Epoch [2/500], Validation Loss: 50.0074, Validation RMSE: 7.0716, Valid PR: 0.6209
Epoch [3/500], Train Loss: 37.2656, Train RMSE: 6.1046
Epoch [3/500], Validation Loss: 48.9294, Validation RMSE: 6.9950, Valid PR: 0.6822
Epoch [4/500], Train Loss: 35.7026, Train RMSE: 5.9752
Epoch [4/500], Validation Loss: 46.4708, Validation RMSE: 6.8169, Valid PR: 0.7468
Epoch [5/500], Train Loss: 35.2868, Train RMSE: 5.9403
Epoch [5/500], Validation Loss: 46.2620, Validation RMSE: 6.8016, Valid PR: 0.7041
Epoch [6/500], Train Loss: 34.7832, Train RMSE: 5.8977
Epoch [6/500], Validation Loss: 45.5637, Validation RMSE: 6.7501, Valid PR: 0.2788
Epoch [7/500], Train Loss: 33.8981, Train RMSE: 5.8222
Epoch [7/500], Validation Loss: 44.9575, Validation RMSE: 6.7050, Valid PR: -0.2592
Epoch [8/500], Train Loss: 33.444

  model.load_state_dict(torch.load(model_path, map_location=device))


In [123]:
metrics_summary

Unnamed: 0,Method,Train Loss,Train RMSE,Valid RMSE,Valid PR,Test RMSE,Test PR
0,method1,2.324642,1.524678,1.400668,0.501312,2.553071,-0.083575
1,method2,4.350411,2.085764,2.402722,0.056041,2.840714,0.418415
2,method3,3.139814,1.771952,1.669624,0.080565,2.144782,-0.187259


In [136]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Define fully connected neural network class
class FullyConnectedNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FullyConnectedNN, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 2, hidden_dim // 8),
            nn.BatchNorm1d(hidden_dim // 8),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 8, output_dim)
        )

    def forward(self, x):
        return self.fc(x)

# Train fully connected neural network to predict labels and record metrics
def train_fully_connected_nn(train_matrix_encodings, train_vector_encodings, y_train, val_matrix_encodings, val_vector_encodings, y_val, output_dim=768, learning_rate=3*1e-3, num_epochs=500, batch_size=256, early_stop_patience=50):
    # Combine matrix and vector encodings
    train_features = torch.cat((train_matrix_encodings, train_vector_encodings), dim=1)
    val_features = torch.cat((val_matrix_encodings, val_vector_encodings), dim=1)
    
    # Create training and validation datasets
    train_dataset = torch.utils.data.TensorDataset(train_features, torch.tensor(y_train, dtype=torch.float32))
    val_dataset = torch.utils.data.TensorDataset(val_features, torch.tensor(y_val, dtype=torch.float32))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model, loss function, and optimizer
    input_dim = train_features.size(1)
    hidden_dim = 256
    output_dim = 1  # Assuming regression task
    model = FullyConnectedNN(input_dim, hidden_dim, output_dim).to(device)
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50, verbose=True)

    # DataFrame to store metrics
    metrics_df = pd.DataFrame(columns=["Epoch", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR"])

    # Variables to track best model
    best_val_loss = float('inf')
    early_stop_counter = 0

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        train_rmse = torch.sqrt(torch.tensor(avg_train_loss)).item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train RMSE: {train_rmse:.4f}")

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_outputs_list = []
        val_labels_list = []
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)

                # Forward pass
                outputs = model(features).squeeze()
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()
                val_outputs_list.append(outputs.cpu())
                val_labels_list.append(labels.cpu())

        avg_val_loss = total_val_loss / len(val_loader)
        val_rmse = torch.sqrt(torch.tensor(avg_val_loss)).item()
        val_outputs = torch.cat(val_outputs_list, dim=0)
        val_labels = torch.cat(val_labels_list, dim=0)
        valid_pr = torch.corrcoef(torch.stack([val_outputs, val_labels]))[0, 1].item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, Validation RMSE: {val_rmse:.4f}, Valid PR: {valid_pr:.4f}")

        # Record metrics
        metrics_df = pd.concat([metrics_df, pd.DataFrame([{
            "Epoch": epoch + 1,
            "Train Loss": avg_train_loss,
            "Train RMSE": train_rmse,
            "Valid RMSE": val_rmse,
            "Valid PR": valid_pr
        }])], ignore_index=True)

        # Update best model if validation loss improves
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), f"best_model_state_{method}.pth")
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        # Save the last model state at the end of each epoch
        torch.save(model.state_dict(), f"last_model_state_{method}.pth")

        # Early stopping check
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

        # Learning rate decay
        scheduler.step(avg_val_loss)

    return metrics_df

# Load and test the final model and record metrics
def test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path, batch_size=64):
    # Combine matrix and vector encodings
    test_features = torch.cat((test_matrix_encodings, test_vector_encodings), dim=1)
    test_dataset = torch.utils.data.TensorDataset(test_features, torch.tensor(y_test, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the trained model
    input_dim = test_features.size(1)
    hidden_dim = 256
    output_dim = 1  # Assuming regression task
    model = FullyConnectedNN(input_dim, hidden_dim, output_dim).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    
    # Define loss function
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    total_test_loss = 0
    test_outputs_list = []
    test_labels_list = []
    
    # Testing loop
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)
            total_test_loss += loss.item()
            test_outputs_list.append(outputs.cpu())
            test_labels_list.append(labels.cpu())
    
    avg_test_loss = total_test_loss / len(test_loader)
    test_rmse = torch.sqrt(torch.tensor(avg_test_loss)).item()
    test_outputs = torch.cat(test_outputs_list, dim=0)
    test_labels = torch.cat(test_labels_list, dim=0)
    test_pr = torch.corrcoef(torch.stack([test_outputs, test_labels]))[0, 1].item()
    print(f"Test Loss: {avg_test_loss:.4f}, Test RMSE: {test_rmse:.4f}, Test PR: {test_pr:.4f}")
    
    return test_rmse, test_pr

# Refactor to test three encoding methods
encoding_methods = [
    "method1",
    "method2",
    "method3"
]

metrics_summary = pd.DataFrame(columns=["Method", "Epoch", "Model Type", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR", "Test RMSE", "Test PR"])

for method in encoding_methods:
    print(f"Testing {method}...")
    
    # Define different get_encodings functions for each method
    if method == "method1":
        def get_encodings(model, data_loader, device):
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features, vector_features = model(matrix, vector)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    elif method == "method2":
        def get_encodings(model, data_loader, device):
            matrix_encoder, vector_encoder = model.matrix_encoder, model.vector_encoder
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features_encoder = model.matrix_encoder(matrix)
                    vector_features_encoder = model.vector_encoder(vector)
                    matrix_features, vector_features = model(matrix, vector)
                    # Concatenate both outputs
                    matrix_features = torch.cat((matrix_features, matrix_features_encoder), dim=-1)
                    vector_features = torch.cat((vector_features, vector_features_encoder), dim=-1)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    elif method == "method3":
        def get_encodings(model, data_loader, device):
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features = model.matrix_encoder(matrix)
                    vector_features = model.vector_encoder(vector)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    
    # Get encodings
    train_matrix_encodings, train_vector_encodings = get_encodings(model, train_loader, device)
    val_matrix_encodings, val_vector_encodings = get_encodings(model, val_loader, device)
    test_matrix_encodings, test_vector_encodings = get_encodings(model, test_loader, device)

    # Train fully connected neural network to predict labels
    metrics_df = train_fully_connected_nn(
        train_matrix_encodings, train_vector_encodings, y_train,
        val_matrix_encodings, val_vector_encodings, y_val
    )

    # Test the best model
    best_test_rmse, best_test_pr = test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path=f"best_model_state_{method}.pth")

    # Store metrics for best model
    best_metrics = metrics_df[metrics_df['Valid RMSE'] == metrics_df['Valid RMSE'].min()].copy()
    best_metrics["Method"] = method
    best_metrics["Model Type"] = "Best"
    best_metrics["Test RMSE"] = best_test_rmse
    best_metrics["Test PR"] = best_test_pr
    metrics_summary = pd.concat([metrics_summary, best_metrics], ignore_index=True)

    # Test the last model
    last_test_rmse, last_test_pr = test_fully_connected_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path=f"last_model_state_{method}.pth")

    # Store metrics for last model
    last_metrics = metrics_df.iloc[-1:].copy()
    last_metrics["Method"] = method
    last_metrics["Model Type"] = "Last"
    last_metrics["Test RMSE"] = last_test_rmse
    last_metrics["Test PR"] = last_test_pr
    metrics_summary = pd.concat([metrics_summary, last_metrics], ignore_index=True)

# Save summary metrics to CSV
metrics_summary.to_csv("encoding_methods_summary.csv", index=False)


Testing method1...


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 39.5877, Train RMSE: 6.2919
Epoch [1/500], Validation Loss: 33.9518, Validation RMSE: 5.8268, Valid PR: 0.5168
Epoch [2/500], Train Loss: 38.2543, Train RMSE: 6.1850
Epoch [2/500], Validation Loss: 33.2271, Validation RMSE: 5.7643, Valid PR: 0.6426
Epoch [3/500], Train Loss: 38.2742, Train RMSE: 6.1866
Epoch [3/500], Validation Loss: 32.3545, Validation RMSE: 5.6881, Valid PR: 0.7508
Epoch [4/500], Train Loss: 36.7306, Train RMSE: 6.0606
Epoch [4/500], Validation Loss: 31.2323, Validation RMSE: 5.5886, Valid PR: 0.7954
Epoch [5/500], Train Loss: 36.7382, Train RMSE: 6.0612
Epoch [5/500], Validation Loss: 30.0536, Validation RMSE: 5.4821, Valid PR: 0.8094
Epoch [6/500], Train Loss: 35.1673, Train RMSE: 5.9302
Epoch [6/500], Validation Loss: 28.9409, Validation RMSE: 5.3797, Valid PR: 0.8105
Epoch [7/500], Train Loss: 35.1256, Train RMSE: 5.9267
Epoch [7/500], Validation Loss: 27.7284, Validation RMSE: 5.2658, Valid PR: 0.8047
Epoch [8/500], Train Loss: 33.9705

  model.load_state_dict(torch.load(model_path, map_location=device))
  metrics_summary = pd.concat([metrics_summary, best_metrics], ignore_index=True)
  model.load_state_dict(torch.load(model_path, map_location=device))
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 35.4392, Train RMSE: 5.9531
Epoch [1/500], Validation Loss: 19.7730, Validation RMSE: 4.4467, Valid PR: 0.4544
Epoch [2/500], Train Loss: 34.6205, Train RMSE: 5.8839
Epoch [2/500], Validation Loss: 33.9745, Validation RMSE: 5.8288, Valid PR: 0.5993
Epoch [3/500], Train Loss: 34.4731, Train RMSE: 5.8714
Epoch [3/500], Validation Loss: 35.0736, Validation RMSE: 5.9223, Valid PR: -0.4495
Epoch [4/500], Train Loss: 33.3590, Train RMSE: 5.7757
Epoch [4/500], Validation Loss: 35.3204, Validation RMSE: 5.9431, Valid PR: -0.4797
Epoch [5/500], Train Loss: 32.7683, Train RMSE: 5.7244
Epoch [5/500], Validation Loss: 35.1689, Validation RMSE: 5.9303, Valid PR: -0.3427
Epoch [6/500], Train Loss: 31.7726, Train RMSE: 5.6367
Epoch [6/500], Validation Loss: 34.7590, Validation RMSE: 5.8957, Valid PR: -0.1830
Epoch [7/500], Train Loss: 31.3995, Train RMSE: 5.6035
Epoch [7/500], Validation Loss: 34.3630, Validation RMSE: 5.8620, Valid PR: 0.0438
Epoch [8/500], Train Loss: 30.

  model.load_state_dict(torch.load(model_path, map_location=device))
  model.load_state_dict(torch.load(model_path, map_location=device))


Test Loss: 4.5228, Test RMSE: 2.1267, Test PR: -0.1722
Testing method3...


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 38.6536, Train RMSE: 6.2172
Epoch [1/500], Validation Loss: 22.5266, Validation RMSE: 4.7462, Valid PR: 0.5467
Epoch [2/500], Train Loss: 38.1707, Train RMSE: 6.1782
Epoch [2/500], Validation Loss: 33.9028, Validation RMSE: 5.8226, Valid PR: 0.6741
Epoch [3/500], Train Loss: 37.0459, Train RMSE: 6.0865
Epoch [3/500], Validation Loss: 33.3722, Validation RMSE: 5.7769, Valid PR: -0.4898
Epoch [4/500], Train Loss: 37.0268, Train RMSE: 6.0850
Epoch [4/500], Validation Loss: 31.7164, Validation RMSE: 5.6317, Valid PR: -0.5392
Epoch [5/500], Train Loss: 35.7630, Train RMSE: 5.9802
Epoch [5/500], Validation Loss: 32.1879, Validation RMSE: 5.6734, Valid PR: -0.5460
Epoch [6/500], Train Loss: 34.9689, Train RMSE: 5.9135
Epoch [6/500], Validation Loss: 33.4453, Validation RMSE: 5.7832, Valid PR: -0.5536
Epoch [7/500], Train Loss: 34.4079, Train RMSE: 5.8658
Epoch [7/500], Validation Loss: 34.8533, Validation RMSE: 5.9037, Valid PR: -0.5352
Epoch [8/500], Train Loss: 34

  model.load_state_dict(torch.load(model_path, map_location=device))
  model.load_state_dict(torch.load(model_path, map_location=device))


In [137]:
metrics_summary

Unnamed: 0,Method,Epoch,Model Type,Train Loss,Train RMSE,Valid RMSE,Valid PR,Test RMSE,Test PR
0,method1,196,Best,2.032588,1.425689,1.081038,0.688684,2.556391,-0.210374
1,method1,246,Last,1.805907,1.343841,1.370979,0.411379,2.337085,-0.230181
2,method2,85,Best,4.26861,2.066061,1.621057,0.540302,2.681737,-0.248734
3,method2,135,Last,2.742895,1.656169,2.324046,-0.450179,2.126693,-0.172201
4,method3,130,Best,2.728034,1.651676,1.321446,0.493985,2.242416,-0.089293
5,method3,180,Last,2.377199,1.541817,1.928581,-0.040235,2.078161,-0.21829


In [138]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Define transformer-based neural network class
class TransformerNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads, num_layers, output_dim):
        super(TransformerNN, self).__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Adding sequence length dimension
        x = self.transformer_encoder(x)
        x = x.squeeze(1)  # Removing sequence length dimension
        return self.fc(x)

# Train transformer-based neural network to predict labels and record metrics
def train_transformer_nn(train_matrix_encodings, train_vector_encodings, y_train, val_matrix_encodings, val_vector_encodings, y_val, hidden_dim=256, num_heads=4, num_layers=2, learning_rate=3*1e-3, num_epochs=500, batch_size=256, early_stop_patience=50):
    # Combine matrix and vector encodings
    train_features = torch.cat((train_matrix_encodings, train_vector_encodings), dim=1)
    val_features = torch.cat((val_matrix_encodings, val_vector_encodings), dim=1)
    
    # Create training and validation datasets
    train_dataset = torch.utils.data.TensorDataset(train_features, torch.tensor(y_train, dtype=torch.float32))
    val_dataset = torch.utils.data.TensorDataset(val_features, torch.tensor(y_val, dtype=torch.float32))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model, loss function, and optimizer
    input_dim = train_features.size(1)
    output_dim = 1  # Assuming regression task
    model = TransformerNN(input_dim, hidden_dim, num_heads, num_layers, output_dim).to(device)
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=50, verbose=True)

    # DataFrame to store metrics
    metrics_df = pd.DataFrame(columns=["Epoch", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR"])

    # Variables to track best model
    best_val_loss = float('inf')
    early_stop_counter = 0

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        train_rmse = torch.sqrt(torch.tensor(avg_train_loss)).item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train RMSE: {train_rmse:.4f}")

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_outputs_list = []
        val_labels_list = []
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)

                # Forward pass
                outputs = model(features).squeeze()
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()
                val_outputs_list.append(outputs.cpu())
                val_labels_list.append(labels.cpu())

        avg_val_loss = total_val_loss / len(val_loader)
        val_rmse = torch.sqrt(torch.tensor(avg_val_loss)).item()
        val_outputs = torch.cat(val_outputs_list, dim=0)
        val_labels = torch.cat(val_labels_list, dim=0)
        valid_pr = torch.corrcoef(torch.stack([val_outputs, val_labels]))[0, 1].item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, Validation RMSE: {val_rmse:.4f}, Valid PR: {valid_pr:.4f}")

        # Record metrics
        metrics_df = pd.concat([metrics_df, pd.DataFrame([{
            "Epoch": epoch + 1,
            "Train Loss": avg_train_loss,
            "Train RMSE": train_rmse,
            "Valid RMSE": val_rmse,
            "Valid PR": valid_pr
        }])], ignore_index=True)

        # Update best model if validation loss improves
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), f"best_model_state_{method}.pth")
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        # Save the last model state at the end of each epoch
        torch.save(model.state_dict(), f"last_model_state_{method}.pth")

        # Early stopping check
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

        # Learning rate decay
        scheduler.step(avg_val_loss)

    return metrics_df

# Load and test the final model and record metrics
def test_transformer_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path, hidden_dim=256, num_heads=4, num_layers=2, batch_size=64):
    # Combine matrix and vector encodings
    test_features = torch.cat((test_matrix_encodings, test_vector_encodings), dim=1)
    test_dataset = torch.utils.data.TensorDataset(test_features, torch.tensor(y_test, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the trained model
    input_dim = test_features.size(1)
    output_dim = 1  # Assuming regression task
    model = TransformerNN(input_dim, hidden_dim, num_heads, num_layers, output_dim).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    
    # Define loss function
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    total_test_loss = 0
    test_outputs_list = []
    test_labels_list = []
    
    # Testing loop
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)
            total_test_loss += loss.item()
            test_outputs_list.append(outputs.cpu())
            test_labels_list.append(labels.cpu())
    
    avg_test_loss = total_test_loss / len(test_loader)
    test_rmse = torch.sqrt(torch.tensor(avg_test_loss)).item()
    test_outputs = torch.cat(test_outputs_list, dim=0)
    test_labels = torch.cat(test_labels_list, dim=0)
    test_pr = torch.corrcoef(torch.stack([test_outputs, test_labels]))[0, 1].item()
    print(f"Test Loss: {avg_test_loss:.4f}, Test RMSE: {test_rmse:.4f}, Test PR: {test_pr:.4f}")
    
    return test_rmse, test_pr

# Refactor to test three encoding methods
encoding_methods = [
    "method1",
    "method2",
    "method3"
]

metrics_summary = pd.DataFrame(columns=["Method", "Epoch", "Model Type", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR", "Test RMSE", "Test PR"])

for method in encoding_methods:
    print(f"Testing {method}...")
    
    # Define different get_encodings functions for each method
    if method == "method1":
        def get_encodings(model, data_loader, device):
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features, vector_features = model(matrix, vector)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    elif method == "method2":
        def get_encodings(model, data_loader, device):
            matrix_encoder, vector_encoder = model.matrix_encoder, model.vector_encoder
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features_encoder = model.matrix_encoder(matrix)
                    vector_features_encoder = model.vector_encoder(vector)
                    matrix_features, vector_features = model(matrix, vector)
                    # Concatenate both outputs
                    matrix_features = torch.cat((matrix_features, matrix_features_encoder), dim=-1)
                    vector_features = torch.cat((vector_features, vector_features_encoder), dim=-1)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    elif method == "method3":
        def get_encodings(model, data_loader, device):
            matrix_encodings = []
            vector_encodings = []
            with torch.no_grad():
                for matrix, vector in data_loader:
                    matrix, vector = matrix.to(device), vector.to(device)
                    matrix_features = model.matrix_encoder(matrix)
                    vector_features = model.vector_encoder(vector)
                    matrix_encodings.append(matrix_features.cpu())
                    vector_encodings.append(vector_features.cpu())

            matrix_encodings = torch.cat(matrix_encodings, dim=0)
            vector_encodings = torch.cat(vector_encodings, dim=0)
            return matrix_encodings, vector_encodings
    
    # Get encodings
    train_matrix_encodings, train_vector_encodings = get_encodings(model, train_loader, device)
    val_matrix_encodings, val_vector_encodings = get_encodings(model, val_loader, device)
    test_matrix_encodings, test_vector_encodings = get_encodings(model, test_loader, device)

    # Train transformer-based neural network to predict labels
    metrics_df = train_transformer_nn(
        train_matrix_encodings, train_vector_encodings, y_train,
        val_matrix_encodings, val_vector_encodings, y_val
    )

    # Test the best model
    best_test_rmse, best_test_pr = test_transformer_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path=f"best_model_state_{method}.pth")

    # Store metrics for best model
    best_metrics = metrics_df[metrics_df['Valid RMSE'] == metrics_df['Valid RMSE'].min()].copy()
    best_metrics["Method"] = method
    best_metrics["Model Type"] = "Best"
    best_metrics["Test RMSE"] = best_test_rmse
    best_metrics["Test PR"] = best_test_pr
    metrics_summary = pd.concat([metrics_summary, best_metrics], ignore_index=True)

    # Test the last model
    last_test_rmse, last_test_pr = test_transformer_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path=f"last_model_state_{method}.pth")

    # Store metrics for last model
    last_metrics = metrics_df.iloc[-1:].copy()
    last_metrics["Method"] = method
    last_metrics["Model Type"] = "Last"
    last_metrics["Test RMSE"] = last_test_rmse
    last_metrics["Test PR"] = last_test_pr
    metrics_summary = pd.concat([metrics_summary, last_metrics], ignore_index=True)

# Save summary metrics to CSV
metrics_summary.to_csv("encoding_methods_summary.csv", index=False)


Testing method1...




Epoch [1/500], Train Loss: 35.5746, Train RMSE: 5.9644
Epoch [1/500], Validation Loss: 28.1192, Validation RMSE: 5.3028, Valid PR: 0.7941


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [2/500], Train Loss: 32.4889, Train RMSE: 5.6999
Epoch [2/500], Validation Loss: 23.0879, Validation RMSE: 4.8050, Valid PR: 0.8529
Epoch [3/500], Train Loss: 30.1288, Train RMSE: 5.4890
Epoch [3/500], Validation Loss: 17.3053, Validation RMSE: 4.1600, Valid PR: 0.7914
Epoch [4/500], Train Loss: 28.9649, Train RMSE: 5.3819
Epoch [4/500], Validation Loss: 15.6832, Validation RMSE: 3.9602, Valid PR: 0.7433
Epoch [5/500], Train Loss: 25.7724, Train RMSE: 5.0767
Epoch [5/500], Validation Loss: 16.7092, Validation RMSE: 4.0877, Valid PR: 0.2229
Epoch [6/500], Train Loss: 23.9211, Train RMSE: 4.8909
Epoch [6/500], Validation Loss: 10.4128, Validation RMSE: 3.2269, Valid PR: 0.6545
Epoch [7/500], Train Loss: 21.1162, Train RMSE: 4.5952
Epoch [7/500], Validation Loss: 7.2028, Validation RMSE: 2.6838, Valid PR: 0.7801
Epoch [8/500], Train Loss: 19.9463, Train RMSE: 4.4661
Epoch [8/500], Validation Loss: 8.1499, Validation RMSE: 2.8548, Valid PR: 0.1267
Epoch [9/500], Train Loss: 18.1807, 

  model.load_state_dict(torch.load(model_path, map_location=device))
  metrics_summary = pd.concat([metrics_summary, best_metrics], ignore_index=True)
  model.load_state_dict(torch.load(model_path, map_location=device))
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 36.2369, Train RMSE: 6.0197
Epoch [1/500], Validation Loss: 31.9080, Validation RMSE: 5.6487, Valid PR: 0.4991
Epoch [2/500], Train Loss: 34.2277, Train RMSE: 5.8504
Epoch [2/500], Validation Loss: 35.2793, Validation RMSE: 5.9396, Valid PR: 0.2393
Epoch [3/500], Train Loss: 32.4434, Train RMSE: 5.6959
Epoch [3/500], Validation Loss: 36.1979, Validation RMSE: 6.0165, Valid PR: 0.4465
Epoch [4/500], Train Loss: 30.5154, Train RMSE: 5.5241
Epoch [4/500], Validation Loss: 35.7537, Validation RMSE: 5.9794, Valid PR: 0.5258
Epoch [5/500], Train Loss: 28.9484, Train RMSE: 5.3804
Epoch [5/500], Validation Loss: 34.7546, Validation RMSE: 5.8953, Valid PR: 0.5614
Epoch [6/500], Train Loss: 26.6735, Train RMSE: 5.1646
Epoch [6/500], Validation Loss: 32.9445, Validation RMSE: 5.7397, Valid PR: 0.5698
Epoch [7/500], Train Loss: 25.7003, Train RMSE: 5.0695
Epoch [7/500], Validation Loss: 31.3677, Validation RMSE: 5.6007, Valid PR: 0.5627
Epoch [8/500], Train Loss: 24.0105

  model.load_state_dict(torch.load(model_path, map_location=device))
  model.load_state_dict(torch.load(model_path, map_location=device))
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 35.7927, Train RMSE: 5.9827
Epoch [1/500], Validation Loss: 27.8399, Validation RMSE: 5.2764, Valid PR: 0.5456
Epoch [2/500], Train Loss: 33.6155, Train RMSE: 5.7979
Epoch [2/500], Validation Loss: 29.0170, Validation RMSE: 5.3867, Valid PR: 0.5363
Epoch [3/500], Train Loss: 31.0377, Train RMSE: 5.5711
Epoch [3/500], Validation Loss: 30.1001, Validation RMSE: 5.4864, Valid PR: 0.5475
Epoch [4/500], Train Loss: 30.1348, Train RMSE: 5.4895
Epoch [4/500], Validation Loss: 28.6606, Validation RMSE: 5.3536, Valid PR: 0.5482
Epoch [5/500], Train Loss: 27.3367, Train RMSE: 5.2285
Epoch [5/500], Validation Loss: 27.4789, Validation RMSE: 5.2420, Valid PR: 0.5605
Epoch [6/500], Train Loss: 26.4393, Train RMSE: 5.1419
Epoch [6/500], Validation Loss: 24.7414, Validation RMSE: 4.9741, Valid PR: 0.5433
Epoch [7/500], Train Loss: 25.2227, Train RMSE: 5.0222
Epoch [7/500], Validation Loss: 21.9456, Validation RMSE: 4.6846, Valid PR: 0.6295
Epoch [8/500], Train Loss: 22.6549

  model.load_state_dict(torch.load(model_path, map_location=device))
  model.load_state_dict(torch.load(model_path, map_location=device))


In [139]:
metrics_summary

Unnamed: 0,Method,Epoch,Model Type,Train Loss,Train RMSE,Valid RMSE,Valid PR,Test RMSE,Test PR
0,method1,11,Best,14.825213,3.850353,1.397021,0.663954,2.718086,0.117585
1,method1,61,Last,2.849923,1.688172,1.873621,0.206692,1.682905,0.183645
2,method2,25,Best,7.373564,2.715431,1.476381,0.508836,1.769341,-0.145751
3,method2,75,Last,3.301495,1.817002,4.741527,0.58677,3.996764,0.051575
4,method3,26,Best,6.091993,2.468196,1.471873,0.510478,1.683729,-0.203566
5,method3,76,Last,4.126519,2.031384,6.918242,0.471184,6.13715,-0.066232


In [194]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import random

# Set random seed for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set seed before every training run
set_seed(42)

# Define transformer-based neural network class
class TransformerNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads, num_layers, output_dim):
        super(TransformerNN, self).__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim // 2, output_dim)
        )

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Adding sequence length dimension
        x = self.transformer_encoder(x)
        x = x.squeeze(1)  # Removing sequence length dimension
        return self.fc(x)

# Train transformer-based neural network to predict labels and record metrics
def train_transformer_nn(train_matrix_encodings, train_vector_encodings, y_train, val_matrix_encodings, val_vector_encodings, y_val, hidden_dim=256, num_heads=2, num_layers=1, learning_rate=3e-4, num_epochs=500, batch_size=512, early_stop_patience=50):
    # Combine matrix and vector encodings
    train_features = torch.cat((train_matrix_encodings, train_vector_encodings), dim=1)
    val_features = torch.cat((val_matrix_encodings, val_vector_encodings), dim=1)
    
    # Create training and validation datasets
    train_dataset = torch.utils.data.TensorDataset(train_features, torch.tensor(y_train, dtype=torch.float32))
    val_dataset = torch.utils.data.TensorDataset(val_features, torch.tensor(y_val, dtype=torch.float32))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model, loss function, and optimizer
    input_dim = train_features.size(1)
    output_dim = 1  # Assuming regression task
    model = TransformerNN(input_dim, hidden_dim, num_heads, num_layers, output_dim).to(device)
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=75, verbose=True)

    # DataFrame to store metrics
    metrics_df = pd.DataFrame(columns=["Epoch", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR"])

    # Variables to track best model
    best_val_loss = float('inf')
    early_stop_counter = 0

    # Training loop
    accumulation_steps = 4
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        for i, (features, labels) in enumerate(train_loader):
            features, labels = features.to(device), labels.to(device)
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)
            # Backward pass
            loss = loss / accumulation_steps
            loss.backward()
            # Gradient accumulation
            if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
                optimizer.step()
                optimizer.zero_grad()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        train_rmse = torch.sqrt(torch.tensor(avg_train_loss)).item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train RMSE: {train_rmse:.4f}")

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_outputs_list = []
        val_labels_list = []
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)

                # Forward pass
                outputs = model(features).squeeze()
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()
                val_outputs_list.append(outputs.cpu())
                val_labels_list.append(labels.cpu())

        avg_val_loss = total_val_loss / len(val_loader)
        val_rmse = torch.sqrt(torch.tensor(avg_val_loss)).item()
        val_outputs = torch.cat(val_outputs_list, dim=0)
        val_labels = torch.cat(val_labels_list, dim=0)
        valid_pr = torch.corrcoef(torch.stack([val_outputs, val_labels]))[0, 1].item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {avg_val_loss:.4f}, Validation RMSE: {val_rmse:.4f}, Valid PR: {valid_pr:.4f}")

        # Record metrics
        metrics_df = pd.concat([metrics_df, pd.DataFrame([{
            "Epoch": epoch + 1,
            "Train Loss": avg_train_loss,
            "Train RMSE": train_rmse,
            "Valid RMSE": val_rmse,
            "Valid PR": valid_pr
        }])], ignore_index=True)

        # Update best model if validation loss improves
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save({'model_state_dict': model.state_dict(), 'hidden_dim': hidden_dim, 'num_heads': num_heads, 'num_layers': num_layers}, f"best_model_state_{method}_rep_{rep + 1}.pth")
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        # Early stopping check
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

        # Learning rate decay
        scheduler.step(avg_val_loss)

    return metrics_df

# Load and test the final model and record metrics
def test_transformer_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path, hidden_dim=256, num_heads=4, num_layers=2, batch_size=64):
    # Combine matrix and vector encodings
    test_features = torch.cat((test_matrix_encodings, test_vector_encodings), dim=1)
    test_dataset = torch.utils.data.TensorDataset(test_features, torch.tensor(y_test, dtype=torch.float32))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the trained model
    input_dim = test_features.size(1)
    output_dim = 1  # Assuming regression task
    checkpoint = torch.load(model_path, map_location=device)
    hidden_dim = checkpoint['hidden_dim']
    num_heads = checkpoint['num_heads']
    num_layers = checkpoint['num_layers']
    model = TransformerNN(input_dim, hidden_dim, num_heads, num_layers, output_dim).to(device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    
    # Define loss function
    criterion = nn.MSELoss() if output_dim == 1 else nn.BCEWithLogitsLoss()
    total_test_loss = 0
    test_outputs_list = []
    test_labels_list = []
    
    # Testing loop
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(features).squeeze()
            loss = criterion(outputs, labels)
            total_test_loss += loss.item()
            test_outputs_list.append(outputs.cpu())
            test_labels_list.append(labels.cpu())
    
    avg_test_loss = total_test_loss / len(test_loader)
    test_rmse = torch.sqrt(torch.tensor(avg_test_loss)).item()
    test_outputs = torch.cat(test_outputs_list, dim=0)
    test_labels = torch.cat(test_labels_list, dim=0)
    test_pr = torch.corrcoef(torch.stack([test_outputs, test_labels]))[0, 1].item()
    print(f"Test Loss: {avg_test_loss:.4f}, Test RMSE: {test_rmse:.4f}, Test PR: {test_pr:.4f}")
    
    return test_rmse, test_pr

# Refactor to test three encoding methods
encoding_methods = [
    "method1",
    "method2",
    "method3"
]

metrics_summary = pd.DataFrame(columns=["Method", "Epoch", "Model Type", "Train Loss", "Train RMSE", "Valid RMSE", "Valid PR", "Test RMSE", "Test PR"])

for method in encoding_methods:
    print(f"Testing {method}...")
    for rep in range(3):
        print(f"Replication {rep + 1} for {method}...")
        
        # Define different get_encodings functions for each method
        if method == "method1":
            def get_encodings(model, data_loader, device):
                matrix_encodings = []
                vector_encodings = []
                with torch.no_grad():
                    for matrix, vector in data_loader:
                        matrix, vector = matrix.to(device), vector.to(device)
                        matrix_features, vector_features = model(matrix, vector)
                        matrix_encodings.append(matrix_features.cpu())
                        vector_encodings.append(vector_features.cpu())

                matrix_encodings = torch.cat(matrix_encodings, dim=0)
                vector_encodings = torch.cat(vector_encodings, dim=0)
                return matrix_encodings, vector_encodings
        elif method == "method2":
            def get_encodings(model, data_loader, device):
                matrix_encoder, vector_encoder = model.matrix_encoder, model.vector_encoder
                matrix_encodings = []
                vector_encodings = []
                with torch.no_grad():
                    for matrix, vector in data_loader:
                        matrix, vector = matrix.to(device), vector.to(device)
                        matrix_features_encoder = model.matrix_encoder(matrix)
                        vector_features_encoder = model.vector_encoder(vector)
                        matrix_features, vector_features = model(matrix, vector)
                        # Concatenate both outputs
                        matrix_features = torch.cat((matrix_features, matrix_features_encoder), dim=-1)
                        vector_features = torch.cat((vector_features, vector_features_encoder), dim=-1)
                        matrix_encodings.append(matrix_features.cpu())
                        vector_encodings.append(vector_features.cpu())

                matrix_encodings = torch.cat(matrix_encodings, dim=0)
                vector_encodings = torch.cat(vector_encodings, dim=0)
                return matrix_encodings, vector_encodings
        elif method == "method3":
            def get_encodings(model, data_loader, device):
                matrix_encodings = []
                vector_encodings = []
                with torch.no_grad():
                    for matrix, vector in data_loader:
                        matrix, vector = matrix.to(device), vector.to(device)
                        matrix_features = model.matrix_encoder(matrix)
                        vector_features = model.vector_encoder(vector)
                        matrix_encodings.append(matrix_features.cpu())
                        vector_encodings.append(vector_features.cpu())

                matrix_encodings = torch.cat(matrix_encodings, dim=0)
                vector_encodings = torch.cat(vector_encodings, dim=0)
                return matrix_encodings, vector_encodings
        
        # Get encodings
        train_matrix_encodings, train_vector_encodings = get_encodings(model, train_loader, device)
        val_matrix_encodings, val_vector_encodings = get_encodings(model, val_loader, device)
        test_matrix_encodings, test_vector_encodings = get_encodings(model, test_loader, device)

        # Train transformer-based neural network to predict labels
        metrics_df = train_transformer_nn(
            train_matrix_encodings, train_vector_encodings, y_train,
            val_matrix_encodings, val_vector_encodings, y_val
        )

        # Test the best model
        best_test_rmse, best_test_pr = test_transformer_nn(test_matrix_encodings, test_vector_encodings, y_test, model_path=f"best_model_state_{method}_rep_{rep + 1}.pth")

        # Store metrics for best model
        best_metrics = metrics_df[metrics_df['Valid RMSE'] == metrics_df['Valid RMSE'].min()].copy()
        best_metrics["Method"] = method
        best_metrics["Replication"] = rep + 1
        best_metrics["Model Type"] = "Best"
        best_metrics["Test RMSE"] = best_test_rmse
        best_metrics["Test PR"] = best_test_pr
        metrics_summary = pd.concat([metrics_summary, best_metrics], ignore_index=True)

# Save summary metrics to CSV
metrics_summary.to_csv("encoding_methods_summary.csv", index=False)


Testing method1...
Replication 1 for method1...


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 7.7796, Train RMSE: 2.7892
Epoch [1/500], Validation Loss: 15.1475, Validation RMSE: 3.8920, Valid PR: -0.2985
Epoch [2/500], Train Loss: 4.0086, Train RMSE: 2.0022
Epoch [2/500], Validation Loss: 10.8663, Validation RMSE: 3.2964, Valid PR: -0.5420
Epoch [3/500], Train Loss: 2.9437, Train RMSE: 1.7157
Epoch [3/500], Validation Loss: 8.7432, Validation RMSE: 2.9569, Valid PR: -0.5994
Epoch [4/500], Train Loss: 2.3292, Train RMSE: 1.5262
Epoch [4/500], Validation Loss: 7.4690, Validation RMSE: 2.7329, Valid PR: -0.5956
Epoch [5/500], Train Loss: 2.0891, Train RMSE: 1.4454
Epoch [5/500], Validation Loss: 6.5939, Validation RMSE: 2.5679, Valid PR: -0.5663
Epoch [6/500], Train Loss: 1.9912, Train RMSE: 1.4111
Epoch [6/500], Validation Loss: 5.9226, Validation RMSE: 2.4336, Valid PR: -0.4949
Epoch [7/500], Train Loss: 1.8439, Train RMSE: 1.3579
Epoch [7/500], Validation Loss: 5.4093, Validation RMSE: 2.3258, Valid PR: -0.3970
Epoch [8/500], Train Loss: 1.7612, Trai

  checkpoint = torch.load(model_path, map_location=device)
  metrics_summary = pd.concat([metrics_summary, best_metrics], ignore_index=True)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 10.7757, Train RMSE: 3.2826
Epoch [1/500], Validation Loss: 25.5035, Validation RMSE: 5.0501, Valid PR: -0.6463
Epoch [2/500], Train Loss: 5.8029, Train RMSE: 2.4089
Epoch [2/500], Validation Loss: 17.9131, Validation RMSE: 4.2324, Valid PR: -0.6892
Epoch [3/500], Train Loss: 4.2249, Train RMSE: 2.0555
Epoch [3/500], Validation Loss: 13.9664, Validation RMSE: 3.7372, Valid PR: -0.6262
Epoch [4/500], Train Loss: 3.6464, Train RMSE: 1.9096
Epoch [4/500], Validation Loss: 11.7866, Validation RMSE: 3.4332, Valid PR: -0.5572
Epoch [5/500], Train Loss: 3.0932, Train RMSE: 1.7588
Epoch [5/500], Validation Loss: 10.4093, Validation RMSE: 3.2264, Valid PR: -0.5385
Epoch [6/500], Train Loss: 2.7990, Train RMSE: 1.6730
Epoch [6/500], Validation Loss: 9.4211, Validation RMSE: 3.0694, Valid PR: -0.5663
Epoch [7/500], Train Loss: 2.6604, Train RMSE: 1.6311
Epoch [7/500], Validation Loss: 8.6611, Validation RMSE: 2.9430, Valid PR: -0.5584
Epoch [8/500], Train Loss: 2.4736, 

  checkpoint = torch.load(model_path, map_location=device)


Test Loss: 3.2353, Test RMSE: 1.7987, Test PR: 0.0874
Replication 3 for method1...


  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 9.2155, Train RMSE: 3.0357
Epoch [1/500], Validation Loss: 23.5362, Validation RMSE: 4.8514, Valid PR: -0.6936
Epoch [2/500], Train Loss: 5.2078, Train RMSE: 2.2821
Epoch [2/500], Validation Loss: 16.8607, Validation RMSE: 4.1062, Valid PR: -0.6458
Epoch [3/500], Train Loss: 4.0268, Train RMSE: 2.0067
Epoch [3/500], Validation Loss: 13.5433, Validation RMSE: 3.6801, Valid PR: -0.5474
Epoch [4/500], Train Loss: 3.4302, Train RMSE: 1.8521
Epoch [4/500], Validation Loss: 11.5554, Validation RMSE: 3.3993, Valid PR: -0.4526
Epoch [5/500], Train Loss: 3.1069, Train RMSE: 1.7626
Epoch [5/500], Validation Loss: 10.2936, Validation RMSE: 3.2084, Valid PR: -0.3971
Epoch [6/500], Train Loss: 2.8520, Train RMSE: 1.6888
Epoch [6/500], Validation Loss: 9.3930, Validation RMSE: 3.0648, Valid PR: -0.3574
Epoch [7/500], Train Loss: 2.7563, Train RMSE: 1.6602
Epoch [7/500], Validation Loss: 8.6926, Validation RMSE: 2.9483, Valid PR: -0.3447
Epoch [8/500], Train Loss: 2.4370, T

  checkpoint = torch.load(model_path, map_location=device)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 7.9074, Train RMSE: 2.8120
Epoch [1/500], Validation Loss: 11.4614, Validation RMSE: 3.3855, Valid PR: -0.3847
Epoch [2/500], Train Loss: 3.4957, Train RMSE: 1.8697
Epoch [2/500], Validation Loss: 8.8228, Validation RMSE: 2.9703, Valid PR: 0.5597
Epoch [3/500], Train Loss: 2.7620, Train RMSE: 1.6619
Epoch [3/500], Validation Loss: 7.6697, Validation RMSE: 2.7694, Valid PR: 0.3425
Epoch [4/500], Train Loss: 2.4342, Train RMSE: 1.5602
Epoch [4/500], Validation Loss: 6.8866, Validation RMSE: 2.6242, Valid PR: -0.1198
Epoch [5/500], Train Loss: 2.2072, Train RMSE: 1.4857
Epoch [5/500], Validation Loss: 6.3445, Validation RMSE: 2.5188, Valid PR: -0.2312
Epoch [6/500], Train Loss: 2.1014, Train RMSE: 1.4496
Epoch [6/500], Validation Loss: 5.9652, Validation RMSE: 2.4424, Valid PR: -0.5039
Epoch [7/500], Train Loss: 1.9248, Train RMSE: 1.3874
Epoch [7/500], Validation Loss: 5.7023, Validation RMSE: 2.3879, Valid PR: -0.6052
Epoch [8/500], Train Loss: 1.9715, Train R

  checkpoint = torch.load(model_path, map_location=device)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 10.5718, Train RMSE: 3.2514
Epoch [1/500], Validation Loss: 17.8629, Validation RMSE: 4.2265, Valid PR: 0.8980
Epoch [2/500], Train Loss: 5.0817, Train RMSE: 2.2543
Epoch [2/500], Validation Loss: 11.8746, Validation RMSE: 3.4460, Valid PR: 0.3433
Epoch [3/500], Train Loss: 3.5044, Train RMSE: 1.8720
Epoch [3/500], Validation Loss: 9.5249, Validation RMSE: 3.0862, Valid PR: -0.2880
Epoch [4/500], Train Loss: 2.8648, Train RMSE: 1.6926
Epoch [4/500], Validation Loss: 8.4119, Validation RMSE: 2.9003, Valid PR: -0.4929
Epoch [5/500], Train Loss: 2.5523, Train RMSE: 1.5976
Epoch [5/500], Validation Loss: 7.7863, Validation RMSE: 2.7904, Valid PR: -0.5541
Epoch [6/500], Train Loss: 2.3717, Train RMSE: 1.5400
Epoch [6/500], Validation Loss: 7.3659, Validation RMSE: 2.7140, Valid PR: -0.5789
Epoch [7/500], Train Loss: 2.1921, Train RMSE: 1.4806
Epoch [7/500], Validation Loss: 7.0602, Validation RMSE: 2.6571, Valid PR: -0.5947
Epoch [8/500], Train Loss: 2.2100, Train

  checkpoint = torch.load(model_path, map_location=device)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 11.4360, Train RMSE: 3.3817
Epoch [1/500], Validation Loss: 20.0828, Validation RMSE: 4.4814, Valid PR: 0.8253
Epoch [2/500], Train Loss: 5.6826, Train RMSE: 2.3838
Epoch [2/500], Validation Loss: 14.9857, Validation RMSE: 3.8711, Valid PR: 0.2111
Epoch [3/500], Train Loss: 4.3959, Train RMSE: 2.0966
Epoch [3/500], Validation Loss: 12.6134, Validation RMSE: 3.5515, Valid PR: 0.0708
Epoch [4/500], Train Loss: 3.7374, Train RMSE: 1.9332
Epoch [4/500], Validation Loss: 11.2464, Validation RMSE: 3.3536, Valid PR: -0.0818
Epoch [5/500], Train Loss: 3.2896, Train RMSE: 1.8137
Epoch [5/500], Validation Loss: 10.3516, Validation RMSE: 3.2174, Valid PR: 0.1605
Epoch [6/500], Train Loss: 3.0816, Train RMSE: 1.7554
Epoch [6/500], Validation Loss: 9.6448, Validation RMSE: 3.1056, Valid PR: 0.3397
Epoch [7/500], Train Loss: 2.9244, Train RMSE: 1.7101
Epoch [7/500], Validation Loss: 9.1260, Validation RMSE: 3.0209, Valid PR: 0.4761
Epoch [8/500], Train Loss: 2.7768, Train 

  checkpoint = torch.load(model_path, map_location=device)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 9.0195, Train RMSE: 3.0032
Epoch [1/500], Validation Loss: 13.9802, Validation RMSE: 3.7390, Valid PR: -0.4310
Epoch [2/500], Train Loss: 4.1304, Train RMSE: 2.0323
Epoch [2/500], Validation Loss: 9.9246, Validation RMSE: 3.1503, Valid PR: 0.1135
Epoch [3/500], Train Loss: 3.0812, Train RMSE: 1.7553
Epoch [3/500], Validation Loss: 8.5526, Validation RMSE: 2.9245, Valid PR: 0.2939
Epoch [4/500], Train Loss: 2.7680, Train RMSE: 1.6637
Epoch [4/500], Validation Loss: 7.5928, Validation RMSE: 2.7555, Valid PR: 0.4786
Epoch [5/500], Train Loss: 2.3506, Train RMSE: 1.5332
Epoch [5/500], Validation Loss: 6.8718, Validation RMSE: 2.6214, Valid PR: -0.1180
Epoch [6/500], Train Loss: 2.1902, Train RMSE: 1.4799
Epoch [6/500], Validation Loss: 6.2948, Validation RMSE: 2.5089, Valid PR: -0.5543
Epoch [7/500], Train Loss: 1.9020, Train RMSE: 1.3791
Epoch [7/500], Validation Loss: 5.8556, Validation RMSE: 2.4198, Valid PR: -0.5246
Epoch [8/500], Train Loss: 1.8673, Train RM

  checkpoint = torch.load(model_path, map_location=device)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 9.5509, Train RMSE: 3.0905
Epoch [1/500], Validation Loss: 14.3740, Validation RMSE: 3.7913, Valid PR: 0.4244
Epoch [2/500], Train Loss: 4.2126, Train RMSE: 2.0525
Epoch [2/500], Validation Loss: 10.2709, Validation RMSE: 3.2048, Valid PR: 0.5515
Epoch [3/500], Train Loss: 3.1053, Train RMSE: 1.7622
Epoch [3/500], Validation Loss: 8.8349, Validation RMSE: 2.9724, Valid PR: 0.4356
Epoch [4/500], Train Loss: 2.8104, Train RMSE: 1.6764
Epoch [4/500], Validation Loss: 7.9462, Validation RMSE: 2.8189, Valid PR: 0.3699
Epoch [5/500], Train Loss: 2.5512, Train RMSE: 1.5972
Epoch [5/500], Validation Loss: 7.2989, Validation RMSE: 2.7016, Valid PR: 0.3711
Epoch [6/500], Train Loss: 2.3421, Train RMSE: 1.5304
Epoch [6/500], Validation Loss: 6.8148, Validation RMSE: 2.6105, Valid PR: 0.4156
Epoch [7/500], Train Loss: 2.3078, Train RMSE: 1.5191
Epoch [7/500], Validation Loss: 6.4448, Validation RMSE: 2.5387, Valid PR: 0.2997
Epoch [8/500], Train Loss: 1.9916, Train RMSE:

  checkpoint = torch.load(model_path, map_location=device)
  metrics_df = pd.concat([metrics_df, pd.DataFrame([{


Epoch [1/500], Train Loss: 11.3094, Train RMSE: 3.3629
Epoch [1/500], Validation Loss: 14.7058, Validation RMSE: 3.8348, Valid PR: 0.3587
Epoch [2/500], Train Loss: 4.4556, Train RMSE: 2.1108
Epoch [2/500], Validation Loss: 9.9291, Validation RMSE: 3.1510, Valid PR: 0.2585
Epoch [3/500], Train Loss: 3.0838, Train RMSE: 1.7561
Epoch [3/500], Validation Loss: 8.4251, Validation RMSE: 2.9026, Valid PR: 0.1513
Epoch [4/500], Train Loss: 2.5859, Train RMSE: 1.6081
Epoch [4/500], Validation Loss: 7.6484, Validation RMSE: 2.7656, Valid PR: 0.0190
Epoch [5/500], Train Loss: 2.2583, Train RMSE: 1.5028
Epoch [5/500], Validation Loss: 7.1441, Validation RMSE: 2.6729, Valid PR: 0.1646
Epoch [6/500], Train Loss: 2.3136, Train RMSE: 1.5210
Epoch [6/500], Validation Loss: 6.7650, Validation RMSE: 2.6010, Valid PR: 0.1410
Epoch [7/500], Train Loss: 2.2460, Train RMSE: 1.4987
Epoch [7/500], Validation Loss: 6.4676, Validation RMSE: 2.5431, Valid PR: -0.0795
Epoch [8/500], Train Loss: 2.0826, Train RMSE

  checkpoint = torch.load(model_path, map_location=device)


In [195]:
metrics_summary

Unnamed: 0,Method,Epoch,Model Type,Train Loss,Train RMSE,Valid RMSE,Valid PR,Test RMSE,Test PR,Replication
0,method1,198,Best,0.49953,0.706775,0.655467,0.922382,1.859185,0.122103,1.0
1,method1,266,Best,0.42736,0.653728,0.717947,0.929559,1.798706,0.087363,2.0
2,method1,259,Best,0.55069,0.742085,0.618312,0.935124,1.782357,0.12566,3.0
3,method2,254,Best,0.641414,0.800883,1.362149,0.484742,1.759069,-0.150561,1.0
4,method2,352,Best,0.623433,0.789578,1.3741,0.381237,1.858708,-0.114307,2.0
5,method2,379,Best,0.657787,0.811041,1.362216,0.407839,1.889383,-0.162566,3.0
6,method3,146,Best,0.693114,0.832534,1.480243,0.391878,1.747118,-0.114832,1.0
7,method3,268,Best,0.664402,0.815109,1.385777,0.434656,1.795099,-0.174513,2.0
8,method3,156,Best,0.742992,0.86197,1.479654,0.432712,1.753488,-0.117452,3.0
