# Ripley L Analysis Workflow
# 
This notebook runs the complete Ripley L analysis workflow using SPAC templates.
# 
**Location**: This notebook should be in `SCSAWORKFLOW/examples/spac_ripley_analysis/`

## Step 1: Setup Paths and Imports

In [1]:
# %%
import sys
import os
import json
from pathlib import Path

In [2]:
# Get the current notebook directory
notebook_dir = Path.cwd()
print(f"Current directory: {notebook_dir}")

# Navigate to the SCSAWORKFLOW root to import spac modules
# Go up from examples/spac_ripley_analysis to SCSAWORKFLOW
project_root = notebook_dir.parent.parent
sys.path.insert(0, str(project_root))

Current directory: /Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis


In [3]:
# Add the local src directory for any local imports
local_src = notebook_dir / "src"
if local_src.exists():
    sys.path.insert(0, str(local_src))

print(f"Project root: {project_root}")
print(f"Local src: {local_src}")

Project root: /Users/liuf9/Projects/SCSAWorkflow
Local src: /Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/src


In [4]:
# Import from the spac package
from src.spac.templates_2.ripley_l_template import run_from_json as run_ripley
from src.spac.templates_2.visualize_ripley_template import run_from_json as run_viz

print("Successfully imported SPAC templates!")

Successfully imported SPAC templates!


# Step 2: Check Input Data



In [5]:
# %%
# Check if data directory exists and list files
data_dir = notebook_dir / "data"
if data_dir.exists():
    print("Files in data directory:")
    for file in data_dir.iterdir():
        print(f"  - {file.name}")
else:
    print("Data directory not found. Creating it...")
    data_dir.mkdir(exist_ok=True)


Files in data directory:
  - transform_output.pickle
  - sample_spatial_data.h5ad


In [6]:
# Define input file
input_file = data_dir / "sample_spatial_data.h5ad"
if input_file.exists():
    print(f"\n✓ Input file found: {input_file}")
else:
    print(f"\n✗ Input file not found: {input_file}")
    print("  Please place your transform_output.h5ad file in the data directory")


✓ Input file found: /Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/data/sample_spatial_data.h5ad


## Step 3: Create Ripley L Parameters

In [7]:
# %%
# Create ripley_l_params.json in configs directory
configs_dir = notebook_dir / "configs"
configs_dir.mkdir(exist_ok=True) 

ripley_params = {
    "Upstream_Analysis": str(data_dir / "sample_spatial_data.h5ad"),
    "Radii": [0, 50, 100, 150, 200, 250, 300],
    "Annotation": "renamed_phenotypes",
    "Center_Phenotype": "B cells",
    "Neighbor_Phenotype": "CD8 T cells",
    "Stratify_By": "None",
    "Number_of_Simulations": 100,
    "Area": "None",
    "Seed": 42,
    "Spatial_Key": "spatial",
    "Edge_Correction": True,
    "Output_File": str(notebook_dir / "outputs" / "ripley_output.h5ad")
}

In [8]:
# Save to configs directory
ripley_config_path = configs_dir / "ripley_l_config.json"
with open(ripley_config_path, "w") as f:
    json.dump(ripley_params, f, indent=2)

print(f"Created Ripley L config at: {ripley_config_path}")
print("\nParameters:")
print(json.dumps(ripley_params, indent=2))

Created Ripley L config at: /Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/configs/ripley_l_config.json

Parameters:
{
  "Upstream_Analysis": "/Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/data/sample_spatial_data.h5ad",
  "Radii": [
    0,
    50,
    100,
    150,
    200,
    250,
    300
  ],
  "Annotation": "renamed_phenotypes",
  "Center_Phenotype": "B cells",
  "Neighbor_Phenotype": "CD8 T cells",
  "Stratify_By": "None",
  "Number_of_Simulations": 100,
  "Area": "None",
  "Seed": 42,
  "Spatial_Key": "spatial",
  "Edge_Correction": true,
  "Output_File": "/Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/outputs/ripley_output.h5ad"
}


In [9]:
# # %%
# # Debug input data structure
# import anndata as ad
# import pandas as pd

# input_file = data_dir / "sample_spatial_data.h5ad"

# if input_file.exists():
#     print("=== INPUT DATA INVESTIGATION ===")
#     print(f"Loading: {input_file}")
    
#     try:
#         adata = ad.read_h5ad(input_file)
        
#         print(f"\n✓ Successfully loaded AnnData object")
#         print(f"Shape: {adata.shape} (cells x genes)")
        
#         print(f"\n--- OBSERVATIONS (adata.obs) ---")
#         print(f"Columns: {list(adata.obs.columns)}")
#         print(f"Shape: {adata.obs.shape}")
        
#         # Check for the annotation column
#         annotation_col = "renamed_phenotypes"
#         if annotation_col in adata.obs.columns:
#             print(f"\n✓ Found annotation column: '{annotation_col}'")
#             print(f"Data type: {adata.obs[annotation_col].dtype}")
#             print(f"Unique values: {adata.obs[annotation_col].unique()}")
#             print(f"Value counts:")
#             print(adata.obs[annotation_col].value_counts())
#         else:
#             print(f"\n✗ Annotation column '{annotation_col}' not found")
#             print("Available columns:")
#             for col in adata.obs.columns:
#                 print(f"  - {col}: {adata.obs[col].dtype}")
        
#         print(f"\n--- SPATIAL COORDINATES (adata.obsm) ---")
#         print(f"Available obsm keys: {list(adata.obsm.keys())}")
        
#         spatial_key = "spatial"
#         if spatial_key in adata.obsm.keys():
#             print(f"\n✓ Found spatial coordinates: '{spatial_key}'")
#             print(f"Shape: {adata.obsm[spatial_key].shape}")
#             print(f"Data type: {adata.obsm[spatial_key].dtype}")
#             print(f"Coordinate range:")
#             print(f"  X: {adata.obsm[spatial_key][:, 0].min():.2f} to {adata.obsm[spatial_key][:, 0].max():.2f}")
#             print(f"  Y: {adata.obsm[spatial_key][:, 1].min():.2f} to {adata.obsm[spatial_key][:, 1].max():.2f}")
#         else:
#             print(f"\n✗ Spatial key '{spatial_key}' not found")
#             for key in adata.obsm.keys():
#                 print(f"  - {key}: {adata.obsm[key].shape}")
        
#         print(f"\n--- UNSTRUCTURED DATA (adata.uns) ---")
#         if len(adata.uns) > 0:
#             print(f"Available uns keys: {list(adata.uns.keys())}")
#             for key, value in adata.uns.items():
#                 print(f"  - {key}: {type(value)}")
#         else:
#             print("No unstructured data found")
        
#         print(f"\n--- DATA SUMMARY ---")
#         print(f"X (expression) shape: {adata.X.shape}")
#         print(f"X data type: {type(adata.X)}")
        
#         # Check for required phenotypes
#         center_pheno = "B cells"
#         neighbor_pheno = "CD8 T cells"
        
#         if annotation_col in adata.obs.columns:
#             unique_phenotypes = adata.obs[annotation_col].unique()
#             print(f"\nChecking required phenotypes:")
#             print(f"  Center phenotype '{center_pheno}': {'✓ Found' if center_pheno in unique_phenotypes else '✗ Not found'}")
#             print(f"  Neighbor phenotype '{neighbor_pheno}': {'✓ Found' if neighbor_pheno in unique_phenotypes else '✗ Not found'}")
            
#             if center_pheno in unique_phenotypes:
#                 center_count = (adata.obs[annotation_col] == center_pheno).sum()
#                 print(f"    {center_pheno} count: {center_count}")
            
#             if neighbor_pheno in unique_phenotypes:
#                 neighbor_count = (adata.obs[annotation_col] == neighbor_pheno).sum()
#                 print(f"    {neighbor_pheno} count: {neighbor_count}")
        
#     except Exception as e:
#         print(f"\n✗ Error loading file: {e}")
#         import traceback
#         traceback.print_exc()
        
# else:
#     print(f"✗ Input file not found: {input_file}")
#     print("\nFiles in data directory:")
#     if data_dir.exists():
#         for file in data_dir.iterdir():
#             print(f"  - {file.name}")
#     else:
#         print("  Data directory does not exist")

## Step 4: Run Ripley L Analysis

In [10]:
# %%
# Make sure outputs directory exists
outputs_dir = notebook_dir / "outputs"
outputs_dir.mkdir(exist_ok=True)

# Run the analysis
print("Running Ripley L analysis...")
try:
    ripley_results = run_ripley(str(ripley_config_path))
    print("Analysis complete!")
    print(f"Results saved to: {ripley_results}")
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("Make sure transform_output.h5ad is in the data directory")
except Exception as e:
    print(f"Error running analysis: {e}")

Running Ripley L analysis...
Error running analysis: expected str, bytes or os.PathLike object, not dict


## Step 5: Create Visualization Parameters

In [11]:
# %%
# Create visualization parameters
viz_params = {
    "Upstream_Analysis": str(outputs_dir / "ripley_output.h5ad"),
    "Center_Phenotype": "B cells",
    "Neighbor_Phenotype": "CD8 T cells",
    "Plot_Specific_Regions": False,
    "Regions_Label_s_": [],
    "Plot_Simulations": True,
    "Output_File": str(outputs_dir / "ripley_plots.h5ad")
}

# Save to configs directory
viz_config_path = configs_dir / "visualize_ripley_config.json"
with open(viz_config_path, "w") as f:
    json.dump(viz_params, f, indent=2)

print(f"Created visualization config at: {viz_config_path}")
print("\nParameters:")
print(json.dumps(viz_params, indent=2))

Created visualization config at: /Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/configs/visualize_ripley_config.json

Parameters:
{
  "Upstream_Analysis": "/Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/outputs/ripley_output.h5ad",
  "Center_Phenotype": "B cells",
  "Neighbor_Phenotype": "CD8 T cells",
  "Plot_Specific_Regions": false,
  "Regions_Label_s_": [],
  "Plot_Simulations": true,
  "Output_File": "/Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/outputs/ripley_plots.h5ad"
}


In [12]:
# %%
# Debug the visualization function
print("Debugging visualization function...")
print(f"Function type: {type(run_viz)}")
print(f"Config path: {viz_config_path}")
print(f"Config exists: {viz_config_path.exists()}")

# Test with a simple call to see what it returns
try:
    # Load the config to verify it's valid JSON
    with open(viz_config_path, 'r') as f:
        config_data = json.load(f)
    print("Config loaded successfully:")
    print(json.dumps(config_data, indent=2))
except Exception as e:
    print(f"Config loading error: {e}")

Debugging visualization function...
Function type: <class 'function'>
Config path: /Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/configs/visualize_ripley_config.json
Config exists: True
Config loaded successfully:
{
  "Upstream_Analysis": "/Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/outputs/ripley_output.h5ad",
  "Center_Phenotype": "B cells",
  "Neighbor_Phenotype": "CD8 T cells",
  "Plot_Specific_Regions": false,
  "Regions_Label_s_": [],
  "Plot_Simulations": true,
  "Output_File": "/Users/liuf9/Projects/SCSAWorkflow/examples/spac_ripley_analysis/outputs/ripley_plots.h5ad"
}


## Step 6: Run Visualization

In [13]:
# %%
# Run visualization
print("Creating visualization...")
try:
    viz_results = run_viz(viz_config_path)
    print("Visualization complete!")
    print(f"Results saved to: {viz_results}")

    # Check if viz_results is a dictionary or string
    if isinstance(viz_results, dict):
        print(f"Results saved to: {viz_results}")
        # Access specific files if needed
        for filename, filepath in viz_results.items():
            print(f"  {filename}: {filepath}")
    else:
        # If it's a string, just print it
        print(f"Results: {viz_results}")
    
    # The plot should display automatically
    # CSV data is saved to outputs/ripley_plots.csv
except FileNotFoundError as e:
    print(f"Error: {e}")
    print("Make sure the Ripley L analysis completed successfully first")
except Exception as e:
    print(f"Error creating visualization: {e}")
    print(f"Type of viz_results: {type(viz_results) if 'viz_results' in locals() else 'undefined'}")
    # Add debugging information
    import traceback
    print("Full traceback:")
    traceback.print_exc()



Creating visualization...
Error creating visualization: expected str, bytes or os.PathLike object, not dict
Type of viz_results: undefined
Full traceback:


Traceback (most recent call last):
  File "/var/folders/fd/krg499q52bq1bjk316swjcsxspb1wj/T/ipykernel_29007/50157902.py", line 5, in <module>
    viz_results = run_viz(viz_config_path)
  File "/Users/liuf9/Projects/SCSAWorkflow/src/spac/templates_2/visualize_ripley_template.py", line 40, in run_from_json
    adata = load_input(params)
  File "/Users/liuf9/Projects/SCSAWorkflow/src/spac/templates_2/template_utils.py", line 27, in load_input
    path = Path(file_path)
  File "/Users/liuf9/opt/anaconda3/envs/spac-3913/lib/python3.9/pathlib.py", line 1082, in __new__
    self = cls._from_parts(args, init=False)
  File "/Users/liuf9/opt/anaconda3/envs/spac-3913/lib/python3.9/pathlib.py", line 707, in _from_parts
    drv, root, parts = self._parse_args(args)
  File "/Users/liuf9/opt/anaconda3/envs/spac-3913/lib/python3.9/pathlib.py", line 691, in _parse_args
    a = os.fspath(a)
TypeError: expected str, bytes or os.PathLike object, not dict


## Step 7: Check Output Files

In [14]:
# %%
# List all output files
print("Output files created:")
if outputs_dir.exists():
    for file in outputs_dir.iterdir():
        print(f"  - {file.name} ({file.stat().st_size} bytes)")
        
# Load and display the CSV data
import pandas as pd

csv_path = outputs_dir / "ripley_plots.csv"
if csv_path.exists():
    df = pd.read_csv(csv_path)
    print(f"\nRipley L data ({len(df)} rows):")
    print(df.head(10))

Output files created:
  - .DS_Store (6148 bytes)
  - ripley_output.h5ad (4159484 bytes)
