# SafeTensors Functionality Test

This notebook tests SafeTensors functionality in flexynesis-mps - a secure format for storing ML model weights without the security risks of pickle-based formats.

In [1]:
# Import required libraries
import torch
import tempfile
import os
from safetensors.torch import save_file, load_file
import flexynesis

print("Testing SafeTensors functionality...")
print(f"PyTorch version: {torch.__version__}")

# Check device availability
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using MPS backend")
else:
    device = torch.device("cpu")
    print("Using CPU backend")

Testing SafeTensors functionality...
PyTorch version: 2.8.0
Using MPS backend


In [2]:
# Create sample tensors for testing
test_tensors = {
    "weight_matrix": torch.randn(256, 128, device=device),
    "bias_vector": torch.randn(128, device=device),
    "embeddings": torch.randn(1000, 64, device=device),
    "conv_weight": torch.randn(32, 3, 3, 3, device=device),
}

print("Created test tensors:")
for name, tensor in test_tensors.items():
    print(f"  {name}: {tuple(tensor.shape)} ({tensor.dtype})")

Created test tensors:
  weight_matrix: (256, 128) (torch.float32)
  bias_vector: (128,) (torch.float32)
  embeddings: (1000, 64) (torch.float32)
  conv_weight: (32, 3, 3, 3) (torch.float32)


In [3]:
# Test SafeTensors save functionality
demo_dir = tempfile.mkdtemp(prefix="safetensors_test_")
safetensors_path = os.path.join(demo_dir, "test_model.safetensors")

print(f"Saving tensors to: {safetensors_path}")
save_file(test_tensors, safetensors_path)

# Check file was created
if os.path.exists(safetensors_path):
    file_size = os.path.getsize(safetensors_path)
    print(f"File created successfully. Size: {file_size / (1024**2):.2f} MB")
else:
    print("ERROR: File was not created")

Saving tensors to: /var/folders/yh/0j70fczd72jcxjytt2t_9b_m0000gn/T/safetensors_test_03wvrbpk/test_model.safetensors
File created successfully. Size: 0.37 MB


In [4]:
# Test SafeTensors load functionality
print("Loading tensors from SafeTensors file...")
loaded_tensors = load_file(safetensors_path)

print("Loaded tensors:")
for name, tensor in loaded_tensors.items():
    print(f"  {name}: {tuple(tensor.shape)} ({tensor.dtype})")

Loading tensors from SafeTensors file...
Loaded tensors:
  bias_vector: (128,) (torch.float32)
  conv_weight: (32, 3, 3, 3) (torch.float32)
  embeddings: (1000, 64) (torch.float32)
  weight_matrix: (256, 128) (torch.float32)


In [6]:
# Verify data integrity
print("Verifying data integrity...")
all_correct = True

for name in test_tensors.keys():
    original = test_tensors[name]
    loaded = loaded_tensors[name]
    
    # Move loaded tensor to same device as original for comparison
    loaded = loaded.to(original.device)
    
    if torch.equal(original, loaded):
        print(f"  {name}: OK")
    else:
        print(f"  {name}: FAILED")
        all_correct = False

if all_correct:
    print("All tensors loaded correctly!")
else:
    print("Some tensors failed verification!")

Verifying data integrity...
  weight_matrix: OK
  bias_vector: OK
  embeddings: OK
  weight_matrix: OK
  bias_vector: OK
  embeddings: OK
  conv_weight: OK
All tensors loaded correctly!
  conv_weight: OK
All tensors loaded correctly!


In [8]:
# Test with flexynesis model
print("Testing SafeTensors with flexynesis model...")

# Download test data if needed
if not os.path.exists("lgggbm_tcga_pub_processed"):
    print("Downloading test dataset...")
    import subprocess
    subprocess.run(["curl", "-O", "https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis-benchmark-datasets/lgggbm_tcga_pub_processed.tgz"])
    subprocess.run(["tar", "-xzvf", "lgggbm_tcga_pub_processed.tgz"])

# Load data using the correct import method
data_importer = flexynesis.data.DataImporter(
    path='./lgggbm_tcga_pub_processed/', 
    data_types=['mut'], 
    concatenate=False, 
    top_percentile=5, 
    min_features=100
)
train_dataset, _ = data_importer.import_data()
print(f"Dataset loaded: {train_dataset.dat['mut'].shape}")

Testing SafeTensors with flexynesis model...





[INFO] Validating data folders...

[INFO] ----------------- Reading Data ----------------- 
[INFO] Importing ./lgggbm_tcga_pub_processed/train/mut.csv...
[INFO] Importing ./lgggbm_tcga_pub_processed/train/clin.csv...

[INFO] ----------------- Reading Data ----------------- 
[INFO] Importing ./lgggbm_tcga_pub_processed/test/mut.csv...
[INFO] Importing ./lgggbm_tcga_pub_processed/test/clin.csv...

[INFO] ----------------- Checking for problems with the input data ----------------- 

[INFO] ----------------- Processing Data (train) ----------------- 

[INFO] ----------------- Cleaning Up Data ----------------- 

[INFO] working on layer:  mut
[INFO] Number of NA values:  0
[INFO] DataFrame mut - Removed 0 features.
[INFO] DataFrame mut - Removed 0 samples (0.00%).
[INFO] Implementing feature selection using laplacian score for layer: mut with  11064 features  and  556  samples 
[INFO] Implementing feature selection using laplacian score for layer: mut with  11064 features  and  556  sampl

Calculating Laplacian scores: 100%|██████████| 11064/11064 [00:00<00:00, 17689.35it/s]
Calculating Laplacian scores: 100%|██████████| 11064/11064 [00:00<00:00, 17689.35it/s]
Filtering redundant features: 100%|██████████| 553/553 [00:00<00:00, 22572.41it/s]




[INFO] ----------------- Processing Data (test) ----------------- 

[INFO] ----------------- Cleaning Up Data ----------------- 

[INFO] working on layer:  mut
[INFO] Number of NA values:  0
[INFO] DataFrame mut - Removed 0 features.
[INFO] DataFrame mut - Removed 0 samples (0.00%).

[INFO] ----------------- Harmonizing Data Sets ----------------- 

[INFO] ----------------- Finished Harmonizing ----------------- 

[INFO] ----------------- Normalizing Data ----------------- 

[INFO] ----------------- Normalizing Data ----------------- 
[INFO] Training Data Stats:  {'feature_count in: mut': 553, 'sample_count': 556}
[INFO] Test Data Stats:  {'feature_count in: mut': 553, 'sample_count': 238}
[INFO] Merging Feature Logs...
[INFO] Data import successful.
Dataset loaded: torch.Size([556, 553])


In [15]:
# Create and train a simple model
import lightning as pl
from torch.utils.data import DataLoader

# Create a complete config manually
config = {
    'hidden_dim_factor': 0.5,
    'latent_dim': 32,
    'supervisor_hidden_dim': 16,
    'lr': 0.001,  # Use 'lr' instead of 'learning_rate'
    'weight_decay': 0.01,
    'dropout_rate': 0.1
}

model = flexynesis.models.DirectPred(
    config=config,
    dataset=train_dataset,
    target_variables=['HISTOLOGICAL_DIAGNOSIS'],
    device_type='mps' if torch.backends.mps.is_available() else 'cpu'
)

# Create dataloader
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Quick training
trainer = pl.Trainer(max_epochs=1, enable_checkpointing=False, logger=False)
trainer.fit(model, train_dataloader)
print("Model training completed")

GPU available: True (mps), used: True


TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
HPU available: False, using: 0 HPUs

  | Name     | Type          | Params | Mode 
---------------------------------------------------
0 | log_vars | ParameterDict | 1      | train
1 | encoders | ModuleList    | 162 K  | train
2 | MLPs     | ModuleDict    | 645    | train
---------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.652     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode

  | Name     | Type          | Params | Mode 
---------------------------------------------------
0 | log_vars | ParameterDict | 1      | train
1 | encoders | ModuleList    | 162 K  | train
2 | MLPs     | ModuleDict    | 645    | train
---------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.652     Total estimated model par

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
`Trainer.fit` stopped: `max_epochs=1` reached.


Model training completed


In [16]:
# Test SafeTensors with trained model
model_path = os.path.join(demo_dir, "flexynesis_model.safetensors")

print("Saving flexynesis model with SafeTensors...")
save_file(model.state_dict(), model_path)

print("Loading flexynesis model from SafeTensors...")
loaded_state = load_file(model_path)

print(f"Model parameters saved: {len(model.state_dict())}")
print(f"Model parameters loaded: {len(loaded_state)}")

# Verify parameter names match
original_keys = set(model.state_dict().keys())
loaded_keys = set(loaded_state.keys())

if original_keys == loaded_keys:
    print("Parameter names match: OK")
else:
    print("Parameter names mismatch: FAILED")

Saving flexynesis model with SafeTensors...
Loading flexynesis model from SafeTensors...
Model parameters saved: 19
Model parameters loaded: 19
Parameter names match: OK


In [17]:
# Clean up and summary
import shutil
shutil.rmtree(demo_dir)

print("SafeTensors functionality test completed")

SafeTensors functionality test completed
