In [1]:
import ssgetpy
import json
import os

def fetch_and_store_results(filename="ss_matrix_metadata.json"):
    # Clear the metadata file if it exists
    if os.path.exists(filename):
        os.remove(filename)
        print(f"Existing metadata file '{filename}' cleared.")
        
    print("Fetching all results...")
    results = ssgetpy.search(limit=3000)  #There are 2893 total matrices in the Suite Sparse Matrix Collection
    print(f"Total matrices found: {len(results)}")

    matrix_metadata = []
    for i, result in enumerate(results):
        print(f"Processing matrix {i + 1}/{len(results)}: {result.name}")
        total_elements = result.rows * result.cols
        sparsity = 1 - (result.nnz / total_elements)

        matrix_metadata.append({
            "id": result.id,
            "name": result.name,
            "group": result.group,
            "rows": result.rows,
            "cols": result.cols,
            "nnz": result.nnz,
            "sparsity": sparsity,
            "kind": result.kind,
            "is2d3d": result.is2d3d,
            "isSPD": result.isspd
        })

    
    with open(filename, "w") as f:
        json.dump(matrix_metadata, f, indent=4)

    print(f"Matrix metadata saved to {filename}")

if __name__ == "__main__":

    metadata_file = "ss_matrix_metadata.json"
    fetch_and_store_results(metadata_file)


Fetching all results...
Total matrices found: 2904
Processing matrix 1/2904: 1138_bus
Processing matrix 2/2904: 494_bus
Processing matrix 3/2904: 662_bus
Processing matrix 4/2904: 685_bus
Processing matrix 5/2904: abb313
Processing matrix 6/2904: arc130
Processing matrix 7/2904: ash219
Processing matrix 8/2904: ash292
Processing matrix 9/2904: ash331
Processing matrix 10/2904: ash608
Processing matrix 11/2904: ash85
Processing matrix 12/2904: ash958
Processing matrix 13/2904: bcspwr01
Processing matrix 14/2904: bcspwr02
Processing matrix 15/2904: bcspwr03
Processing matrix 16/2904: bcspwr04
Processing matrix 17/2904: bcspwr05
Processing matrix 18/2904: bcspwr06
Processing matrix 19/2904: bcspwr07
Processing matrix 20/2904: bcspwr08
Processing matrix 21/2904: bcspwr09
Processing matrix 22/2904: bcspwr10
Processing matrix 23/2904: bcsstk01
Processing matrix 24/2904: bcsstk02
Processing matrix 25/2904: bcsstk03
Processing matrix 26/2904: bcsstk04
Processing matrix 27/2904: bcsstk05
Proces

In [8]:
def filter_matrices(filename="ss_matrix_metadata.json", row_range=None, col_range=None, sparsity_range=None):
    
    if not os.path.exists(filename):
        print(f"Metadata file {filename} not found. Maybe run the fetch_and_store_results() first!")
        return []


    with open(filename, "r") as f:
        matrix_metadata = json.load(f)


    filtered = []
    for matrix in matrix_metadata:
        if row_range and not (row_range[0] <= matrix["rows"] <= row_range[1]):
            continue
        if col_range and not (col_range[0] <= matrix["cols"] <= col_range[1]):
            continue
        if sparsity_range and not (sparsity_range[0] <= matrix["sparsity"] <= sparsity_range[1]):
            continue
        filtered.append(matrix)

    return filtered

if __name__ == "__main__":
    
    row_range = (10000, 50000)         
    col_range = (10000, 50000)         
    sparsity_range = (0.9, 1.0) 

    metadata_file= "ss_matrix_metadata.json"

    filtered_matrices = filter_matrices(
        filename=metadata_file,
        row_range=row_range,
        col_range=col_range,
        sparsity_range=sparsity_range
    )

    
    print(f"Found {len(filtered_matrices)} matrices matching the criteria")
    for matrix in filtered_matrices:
        print(f"  Name: {matrix['name']}, Rows: {matrix['rows']}, Cols: {matrix['cols']}, Sparsity: {matrix['sparsity']:.6f}")
        
    

Found 503 matrices matching the criteria
  Name: bcsstk17, Rows: 10974, Cols: 10974, Sparsity: 0.996441
  Name: bcsstk18, Rows: 11948, Cols: 11948, Sparsity: 0.998956
  Name: bcsstk25, Rows: 15439, Cols: 15439, Sparsity: 0.998942
  Name: bcsstk29, Rows: 13992, Cols: 13992, Sparsity: 0.996836
  Name: bcsstk30, Rows: 28924, Cols: 28924, Sparsity: 0.997557
  Name: bcsstk31, Rows: 35588, Cols: 35588, Sparsity: 0.999067
  Name: bcsstk32, Rows: 44609, Cols: 44609, Sparsity: 0.998988
  Name: bcsstm25, Rows: 15439, Cols: 15439, Sparsity: 0.999935
  Name: onetone1, Rows: 36057, Cols: 36057, Sparsity: 0.999742
  Name: onetone2, Rows: 36057, Cols: 36057, Sparsity: 0.999829
  Name: epb1, Rows: 14734, Cols: 14734, Sparsity: 0.999562
  Name: epb2, Rows: 25228, Cols: 25228, Sparsity: 0.999725
  Name: af23560, Rows: 23560, Cols: 23560, Sparsity: 0.999170
  Name: bcsstk35, Rows: 30237, Cols: 30237, Sparsity: 0.998414
  Name: bcsstk36, Rows: 23052, Cols: 23052, Sparsity: 0.997849
  Name: bcsstk37, Rows:

In [28]:
import ssgetpy
from IPython.display import display
import matplotlib.pyplot as plt
from scipy.io import mmread

# List of matrix names
matrix_names = ['1138_bus', 'bcsstk01', 'bcspwr01']  # Add more names as needed

# Collect all matrices
all_matrices = []

for name in matrix_names:
    results = ssgetpy.search(name=name)  # Perform search for each name
    all_matrices.extend(results)  # Add results to the list

# Display metadata and spy plots
for matrix in all_matrices:
    display(matrix)  # Display metadata and spy plot for each matrix


Id,Group,Name,Rows,Cols,NNZ,DType,2D/3D Discretization?,SPD?,Pattern Symmetry,Numerical Symmetry,Kind,Spy Plot
1,HB,1138_bus,1138,1138,4054,real,No,Yes,1.0,1.0,power network problem,


Id,Group,Name,Rows,Cols,NNZ,DType,2D/3D Discretization?,SPD?,Pattern Symmetry,Numerical Symmetry,Kind,Spy Plot
23,HB,bcsstk01,48,48,400,real,Yes,Yes,1.0,1.0,structural problem,


Id,Group,Name,Rows,Cols,NNZ,DType,2D/3D Discretization?,SPD?,Pattern Symmetry,Numerical Symmetry,Kind,Spy Plot
13,HB,bcspwr01,39,39,131,binary,No,No,1.0,1.0,power network problem,


In [42]:
import ssgetpy
from IPython.display import display
import matplotlib.pyplot as plt
from scipy.io import mmread

# List of matrix names
matrix_names = ['1138_bus', 'bcsstk01', 'bcspwr01']  # Add more names as needed

matrix= ssgetpy.search(name= matrix_names[0])
display(matrix)

print("Download business")

dest_dir = "../ssmatrices"

matrix.download(format= 'MM', destpath= dest_dir, extract= True)


Id,Group,Name,Rows,Cols,NNZ,DType,2D/3D Discretization?,SPD?,Pattern Symmetry,Numerical Symmetry,Kind,Spy Plot
1,HB,1138_bus,1138,1138,4054,real,No,Yes,1.0,1.0,power network problem,


Download business


Overall progress:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
import os
import shutil
import json
import ssgetpy

def filter_matrices(filename, row_range=None, col_range=None, sparsity_range=None):
    """
    Filter matrices based on the row, column, and sparsity range.
    
    Parameters:
        filename (str): Path to the metadata file.
        row_range (tuple): Min and max number of rows.
        col_range (tuple): Min and max number of columns.
        sparsity_range (tuple): Min and max sparsity.

    Returns:
        list: Filtered matrix metadata.
    """
    if not os.path.exists(filename):
        print(f"Metadata file {filename} not found. Maybe run the fetch_and_store_results() first!")
        return []

    with open(filename, "r") as f:
        matrix_metadata = json.load(f)

    filtered = []
    for matrix in matrix_metadata:
        if row_range and not (row_range[0] <= matrix["rows"] <= row_range[1]):
            continue
        if col_range and not (col_range[0] <= matrix["cols"] <= col_range[1]):
            continue
        if sparsity_range and not (sparsity_range[0] <= matrix["sparsity"] <= sparsity_range[1]):
            continue
        filtered.append(matrix)

    return filtered

def download_and_save_matrices(filtered_matrices, dest_dir):
    """
    Download filtered matrices and save their .mtx files directly to the destination directory.
    
    Parameters:
        filtered_matrices (list): List of filtered matrix metadata.
        dest_dir (str): Path to the directory where the .mtx files should be saved.
    """

    for matrix_meta in filtered_matrices:
        matrix_name = matrix_meta["name"]
        print(f"Downloading matrix: {matrix_name}")
        try:
            matrix = ssgetpy.search(name=matrix_name,limit=10)[0]  # Search for the matrix
            download_path, extract_path = matrix.download(format='MM', destpath=dest_dir, extract=True)  # Download matrix
            
            # Find and move .mtx file
            extracted_dir = os.path.join(dest_dir, matrix_name)
            mtx_files = [f for f in os.listdir(extracted_dir) if f.endswith('.mtx')]

            if not mtx_files:
                print(f"No .mtx file found for matrix: {matrix_name}")
                continue

            mtx_file = mtx_files[0]
            shutil.move(os.path.join(extracted_dir, mtx_file), os.path.join(dest_dir, mtx_file))

            # Remove the now-empty extracted folder
            shutil.rmtree(extracted_dir)
            print(f"Matrix '{matrix_name}' saved as: {mtx_file}")
        except Exception as e:
            print(f"Error downloading matrix '{matrix_name}': {e}")

if __name__ == "__main__":
    # Define filtering criteria
    row_range = (10000, 50000)         
    col_range = (10000, 50000)         
    sparsity_range = (0.9, 1.0) 

    metadata_file = "../ssmatrices/ss_matrix_metadata.json"
    dest_dir = "../ssmatrices"

    # Filter matrices
    filtered_matrices = filter_matrices(
        filename=metadata_file,
        row_range=row_range,
        col_range=col_range,
        sparsity_range=sparsity_range
    )

    print(f"Found {len(filtered_matrices)} matrices matching the criteria")
    # for matrix in filtered_matrices:
    #     print(f"  Name: {matrix['name']}, Rows: {matrix['rows']}, Cols: {matrix['cols']}, Sparsity: {matrix['sparsity']:.6f}")
    
    # Download and save matrices
    download_and_save_matrices(filtered_matrices, dest_dir)


Found 503 matrices matching the criteria
Downloading matrix: bcsstk17


bcsstk17:   0%|          | 0/1915413 [00:00<?, ?B/s]

KeyboardInterrupt: 

In [8]:
import os
import shutil
import json
import ssgetpy

def filter_matrices(filename, row_range=None, col_range=None, sparsity_range=None):
    """
    Filter matrices based on the row, column, and sparsity range.
    
    Parameters:
        filename (str): Path to the metadata file.
        row_range (tuple): Min and max number of rows.
        col_range (tuple): Min and max number of columns.
        sparsity_range (tuple): Min and max sparsity.

    Returns:
        list: Filtered matrix metadata.
    """
    if not os.path.exists(filename):
        print(f"Metadata file {filename} not found. Maybe run the fetch_and_store_results() first!")
        return []

    with open(filename, "r") as f:
        matrix_metadata = json.load(f)

    filtered = []
    for matrix in matrix_metadata:
        if row_range and not (row_range[0] <= matrix["rows"] <= row_range[1]):
            continue
        if col_range and not (col_range[0] <= matrix["cols"] <= col_range[1]):
            continue
        if sparsity_range and not (sparsity_range[0] <= matrix["sparsity"] <= sparsity_range[1]):
            continue
        filtered.append(matrix)

    return filtered

def download_and_save_matrices(filtered_matrices, dest_dir):
    """
    Download filtered matrices and save their .mtx files directly to the destination directory, instead of under a sub-folder.
    
    Parameters:
        filtered_matrices (list): List of filtered matrix metadata.
        dest_dir (str): Path to the directory where the .mtx files should be saved.
    """
    os.makedirs(dest_dir, exist_ok=True)

    for matrix_meta in filtered_matrices:
        matrix_name = matrix_meta["name"]
        mtx_file_path = os.path.join(dest_dir, f"{matrix_name}.mtx")

        # Check if the .mtx file already exists
        if os.path.exists(mtx_file_path):
            print(f"Matrix '{matrix_name}' already exists at {mtx_file_path}. Skipping download.")
            continue

        print(f"Downloading matrix: {matrix_name}")
        try:
            matrix = ssgetpy.search(name=matrix_name, limit=10)[0]  # Search for the matrix
            download_path, extract_path = matrix.download(format='MM', destpath=dest_dir, extract=True)  # Download matrix

            # Find and move .mtx file
            extracted_dir = os.path.join(dest_dir, matrix_name)
            mtx_files = [f for f in os.listdir(extracted_dir) if f.endswith('.mtx')]

            if not mtx_files:
                print(f"No .mtx file found for matrix: {matrix_name}")
                continue

            mtx_file = mtx_files[0]
            shutil.move(os.path.join(extracted_dir, mtx_file), mtx_file_path)

            # Remove the now-empty extracted folder
            shutil.rmtree(extracted_dir)
            print(f"Matrix '{matrix_name}' saved as: {mtx_file_path}")
        except Exception as e:
            print(f"Error downloading matrix '{matrix_name}': {e}")

if __name__ == "__main__":
    # Define filtering criteria
    row_range = (10000, 50000)         
    col_range = (10000, 50000)         
    sparsity_range = (0.9, 1.0) 

    metadata_file = "../ssmatrices/ss_matrix_metadata.json"
    dest_dir = "../ssmatrices"

    # Filter matrices
    filtered_matrices = filter_matrices(
        filename=metadata_file,
        row_range=row_range,
        col_range=col_range,
        sparsity_range=sparsity_range
    )

    print(f"Found {len(filtered_matrices)} matrices matching the criteria")
    
    # Download and save matrices
    download_and_save_matrices(filtered_matrices, dest_dir)


Found 503 matrices matching the criteria
Downloading matrix: bcsstk17


bcsstk17:   0%|          | 0/1915413 [00:00<?, ?B/s]

Matrix 'bcsstk17' saved as: ../ssmatrices/bcsstk17.mtx
Downloading matrix: bcsstk18


bcsstk18:   0%|          | 0/709409 [00:00<?, ?B/s]

Matrix 'bcsstk18' saved as: ../ssmatrices/bcsstk18.mtx
Downloading matrix: bcsstk25


bcsstk25:   0%|          | 0/1147553 [00:00<?, ?B/s]

Matrix 'bcsstk25' saved as: ../ssmatrices/bcsstk25.mtx
Downloading matrix: bcsstk29


bcsstk29:   0%|          | 0/642731 [00:00<?, ?B/s]

Matrix 'bcsstk29' saved as: ../ssmatrices/bcsstk29.mtx
Downloading matrix: bcsstk30


bcsstk30:   0%|          | 0/2330997 [00:00<?, ?B/s]

Matrix 'bcsstk30' saved as: ../ssmatrices/bcsstk30.mtx
Downloading matrix: bcsstk31


bcsstk31:   0%|          | 0/1310801 [00:00<?, ?B/s]

Matrix 'bcsstk31' saved as: ../ssmatrices/bcsstk31.mtx
Downloading matrix: bcsstk32


bcsstk32:   0%|          | 0/2187664 [00:00<?, ?B/s]

Matrix 'bcsstk32' saved as: ../ssmatrices/bcsstk32.mtx
Downloading matrix: bcsstm25


bcsstm25:   0%|          | 0/119670 [00:00<?, ?B/s]

Matrix 'bcsstm25' saved as: ../ssmatrices/bcsstm25.mtx
Downloading matrix: onetone1


onetone1:   0%|          | 0/1997386 [00:00<?, ?B/s]

Matrix 'onetone1' saved as: ../ssmatrices/onetone1.mtx
Downloading matrix: onetone2


onetone2:   0%|          | 0/1303923 [00:00<?, ?B/s]

Matrix 'onetone2' saved as: ../ssmatrices/onetone2.mtx
Downloading matrix: epb1


epb1:   0%|          | 0/956972 [00:00<?, ?B/s]

Matrix 'epb1' saved as: ../ssmatrices/epb1.mtx
Downloading matrix: epb2


epb2:   0%|          | 0/1604248 [00:00<?, ?B/s]

Matrix 'epb2' saved as: ../ssmatrices/epb2.mtx
Downloading matrix: af23560


af23560:   0%|          | 0/3504689 [00:00<?, ?B/s]

Matrix 'af23560' saved as: ../ssmatrices/af23560.mtx
Downloading matrix: bcsstk35


bcsstk35:   0%|          | 0/6959360 [00:00<?, ?B/s]

Matrix 'bcsstk35' saved as: ../ssmatrices/bcsstk35.mtx
Downloading matrix: bcsstk36


bcsstk36:   0%|          | 0/5435971 [00:00<?, ?B/s]

Matrix 'bcsstk36' saved as: ../ssmatrices/bcsstk36.mtx
Downloading matrix: bcsstk37


bcsstk37:   0%|          | 0/5348044 [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
'''
Downloads only max_matrices number of matrices after filtering
'''


import os
import shutil
import json
import ssgetpy

def filter_matrices(filename, row_range=None, col_range=None, sparsity_range=None):
    """
    Filter matrices based on the row, column, and sparsity range.
    
    Parameters:
        filename (str): Path to the metadata file.
        row_range (tuple): Min and max number of rows.
        col_range (tuple): Min and max number of columns.
        sparsity_range (tuple): Min and max sparsity.

    Returns:
        list: Filtered matrix metadata.
    """
    if not os.path.exists(filename):
        print(f"Metadata file {filename} not found. Maybe run the fetch_and_store_results() first!")
        return []

    with open(filename, "r") as f:
        matrix_metadata = json.load(f)

    filtered = []
    for matrix in matrix_metadata:
        if row_range and not (row_range[0] <= matrix["rows"] <= row_range[1]):
            continue
        if col_range and not (col_range[0] <= matrix["cols"] <= col_range[1]):
            continue
        if sparsity_range and not (sparsity_range[0] <= matrix["sparsity"] <= sparsity_range[1]):
            continue
        filtered.append(matrix)

    return filtered

def download_and_save_matrices(filtered_matrices, dest_dir, max_matrices=10):
    """
    Download filtered matrices and save their .mtx files directly to the destination directory, instead of under a sub-folder.
    
    Parameters:
        filtered_matrices (list): List of filtered matrix metadata.
        dest_dir (str): Path to the directory where the .mtx files should be saved.
        max_matrices (int): Maximum number of matrices to process.
    """
    os.makedirs(dest_dir, exist_ok=True)

    processed_count = 0
    for matrix_meta in filtered_matrices:
        if processed_count >= max_matrices:
            break

        matrix_name = matrix_meta["name"]
        mtx_file_path = os.path.join(dest_dir, f"{matrix_name}.mtx")

        # Check if the .mtx file already exists
        if os.path.exists(mtx_file_path):
            print(f"Matrix '{matrix_name}' already exists at {mtx_file_path}. Skipping download.")
            continue

        print(f"Downloading matrix: {matrix_name}")
        try:
            matrix = ssgetpy.search(name=matrix_name, limit=10)[0]  # Search for the matrix
            download_path, extract_path = matrix.download(format='MM', destpath=dest_dir, extract=True)  # Download matrix

            # Find and move .mtx file
            extracted_dir = os.path.join(dest_dir, matrix_name)
            mtx_files = [f for f in os.listdir(extracted_dir) if f.endswith('.mtx')]

            if not mtx_files:
                print(f"No .mtx file found for matrix: {matrix_name}")
                continue

            mtx_file = mtx_files[0]
            shutil.move(os.path.join(extracted_dir, mtx_file), mtx_file_path)

            # Remove the now-empty extracted folder
            shutil.rmtree(extracted_dir)
            print(f"Matrix '{matrix_name}' saved as: {mtx_file_path}")
            processed_count += 1
        except Exception as e:
            print(f"Error downloading matrix '{matrix_name}': {e}")

if __name__ == "__main__":
    # Define filtering criteria
    row_range = (10000, 50000)         
    col_range = (10000, 50000)         
    sparsity_range = (0.9, 1.0) 

    metadata_file = "../ssmatrices/ss_matrix_metadata.json"
    dest_dir = "../ssmatrices"

    # Filter matrices
    filtered_matrices = filter_matrices(
        filename=metadata_file,
        row_range=row_range,
        col_range=col_range,
        sparsity_range=sparsity_range
    )

    print(f"Found {len(filtered_matrices)} matrices matching the criteria")
    
    # Download and save up to 10 matrices
    download_and_save_matrices(filtered_matrices, dest_dir, max_matrices=10)
