In [1]:
import pandas as pd

from utils import load_test_data, visualize_test_region, generate_training_samples
from baseline import RandomRegionBaseline
from evaluate import Evaluator

In [7]:
training_samples = generate_training_samples(num_samples_per_slice=100, minimum_cell=40)

Seeding all randomness with seed=2024


In [None]:
all_test_items = load_test_data()

In [None]:
for i, test_item in enumerate(all_test_items):
    print(f"Test Area {i+1}:")
    print(f"  Min X: {test_item.test_area.hole_min_x}")
    print(f"  Max X: {test_item.test_area.hole_max_x}")
    print(f"  Min Y: {test_item.test_area.hole_min_y}")
    print(f"  Max Y: {test_item.test_area.hole_max_y}")
    print(f"  Dominant Tissue: {test_item.test_area.dominant_tissue}")
    print(f"  Number of cells in ground truth: {len(test_item.ground_truth.hole_cells)}")
    print(f"  Number of cells in adata after masking: {test_item.adata.shape[0]}")
    print(f"  Gene expression shape: {test_item.ground_truth.gene_expression.shape}")
    
    print("  Tissue Percentages in Ground Truth:")
    for tissue, percentage in test_item.ground_truth.tissue_percentages.items():
        print(f"    {tissue}: {percentage:.2%}")
        
    break
        

In [None]:
visualize_test_region(pd.DataFrame(test_item.adata.obs), test_item.test_area, title=f'Test Region {i+1}')

In [None]:
# Apply RandomRegionBaseline
baseline = RandomRegionBaseline(test_item.adata, test_item.test_area)
pred_coords, pred_gene_expressions = baseline.fill_region()

In [None]:
# Evaluate predictions
true_coords = test_item.ground_truth.hole_cells[['center_x', 'center_y']].values
true_gene_expressions = test_item.ground_truth.gene_expression

mse, f1, cosine_sim = Evaluator.evaluate_expression(true_coords, true_gene_expressions, pred_coords, pred_gene_expressions)
chamfer_dist = Evaluator.chamfer_distance(true_coords, pred_coords)
emd = Evaluator.calculate_emd(true_coords, pred_coords)

print(f"  Evaluation Metrics for Random Region Baseline:")
print(f"    MSE: {mse}")
print(f"    F1 Score: {f1}")
print(f"    Cosine Similarity: {cosine_sim}")
print(f"    Chamfer Distance: {chamfer_dist}")
print(f"    EMD: {emd}")

In [None]:
# Visualize test region with generated coordinates
visualize_test_region(pd.DataFrame(test_item.adata.obs), test_item.test_area, title=f'Test Region {i+1}', new_coords=pred_coords)