In [None]:
import pandas as pd 
from validate_functions import create_value_grid
import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt 
import seaborn as sns 
import os 
import pickle
import git
import sys
from sklearn.linear_model import LinearRegression

REPO_DIR = git.Repo(os.getcwd(), search_parent_directories=True).working_tree_dir
sys.path.append(f"{REPO_DIR}/4_validation")

In [None]:
single_cell_dir = "/home/evos/Data/CRC/Orion/Orion_single_cell_tables"
single_cell_files = sorted(os.listdir(single_cell_dir))
single_cell_metadata_dir = os.path.join(os.path.dirname(__file__), "..", "text.txt")
single_cell_metadata = pd.read_csv(single_cell_metadata_dir, sep="\t")

predictions_dir = "/home/evos/Outputs/CRC/Orion/3_tile_level_quantification_gamma/test_tile_predictions_proba.csv"
predictions_all = pd.read_csv(predictions_dir, sep="\t")
cell_type = "tumor_purity"
marker = 'Pan-CK'
tile_size = 512
grid_directory = f"/home/evos/Outputs/CRC/grids/fluorescence_grids_{marker}"

In [None]:
counter = 1

all_scores = {}
for file in single_cell_files:
    print(counter)

    single_cell_data = pd.read_csv(f"{single_cell_dir}/{file}", sep=",", index_col=0)
    slide_id = single_cell_metadata.loc[single_cell_metadata['single_cell_file'] == file, 'slide_id'].iloc[0]

    predictions_slide = predictions_all[predictions_all.slide_id == slide_id]
    predictions_slide = predictions_slide.reset_index(drop=True)
    prob_grid = create_value_grid(predictions_slide, cell_type)

    # Adjust centroids for scaling factor
    scaling_factor = 0.325 / 0.5  # Resolution adjustment
    single_cell_data['X_centroid_adjusted'] = single_cell_data['X_centroid'] * scaling_factor
    single_cell_data['Y_centroid_adjusted'] = single_cell_data['Y_centroid'] * scaling_factor

    # Determine grid shapes
    grid_shape_y = prob_grid.shape[0]
    grid_shape_x = prob_grid.shape[1]

    # Initialize grids
    average_fluorescence_grid = np.zeros((grid_shape_y, grid_shape_x))
    cell_count_grid = np.zeros((grid_shape_y, grid_shape_x))

    # Vectorized computation for grid updates
    single_cell_data['X_tile_index'] = (single_cell_data['X_centroid_adjusted'] // tile_size).astype(int)
    single_cell_data['Y_tile_index'] = (single_cell_data['Y_centroid_adjusted'] // tile_size).astype(int)

    # Filter valid cells within grid bounds
    valid_cells = single_cell_data[
        (single_cell_data['X_tile_index'] < grid_shape_x) &
        (single_cell_data['Y_tile_index'] < grid_shape_y)
    ]

    # Convert to NumPy arrays for efficient updates
    x_indices = valid_cells['X_tile_index'].to_numpy()
    y_indices = valid_cells['Y_tile_index'].to_numpy()
    fluorescence = valid_cells[marker].to_numpy()

    # Combine indices for bincount
    flat_indices = y_indices * grid_shape_x + x_indices
    # Sum fluorescence and count cells per tile
    tile_sums = np.bincount(flat_indices, weights=fluorescence, minlength=grid_shape_x * grid_shape_y)
    tile_counts = np.bincount(flat_indices, minlength=grid_shape_x * grid_shape_y)
    # Reshape back to grid dimensions
    average_fluorescence_grid = tile_sums.reshape(grid_shape_y, grid_shape_x)
    cell_count_grid = tile_counts.reshape(grid_shape_y, grid_shape_x)
    # Avoid divide-by-zero
    average_fluorescence_grid[cell_count_grid > 0] /= cell_count_grid[cell_count_grid > 0]

    grids = {}
    grids["average_fluorescence_grid"] = average_fluorescence_grid
    grids["cell_count_grid"] = cell_count_grid

    #save average fluorescence grids 
    with open(f"{grid_directory}/grids_{file}.pkl", 'wb') as f:
        pickle.dump(grids, f)
    