# Stride

## Preprocessing

### Importing

In [None]:
import os
import pandas as pd
import sys
from multiprocessing import Pool
import subprocess
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Wedge

### Convert .h5ad to .h5

In [2]:
NUM_WORKERS = 8  

def convert_tissue_positions(input_file, output_file, sample_id=None):
    """Convert tissue_positions_list.csv to STRIDE format"""
    try:
        df = pd.read_csv(input_file, header=None, index_col=0)

        if df.shape[1] < 5:
            print(f"[ERROR] {input_file} has {df.shape[1]} columns, waited >= 5")
            return False

        stride_df = pd.DataFrame({
            'X_coord': df.iloc[:, 4],  # pxl_col_in_fullres
            'Y_coord': df.iloc[:, 3],  # pxl_row_in_fullres
        }, index=df.index)

        if sample_id:
            stride_df['Sample_ID'] = sample_id

        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        stride_df.to_csv(output_file, sep='\t')

        print(f"[OK] {sample_id} → {output_file} ({len(stride_df)} spots)")
        return True

    except Exception as e:
        print(f"[ERROR] Failed to process {input_file}: {e}")
        return False


def process_file(args):
    """Processing one file"""
    input_file, output_dir = args
    sample_id = os.path.basename(os.path.dirname(os.path.dirname(input_file)))


    original_name = os.path.basename(input_file)
    output_name = f"{sample_id}_{original_name.replace('.csv', '_stride.txt')}"

    output_file = os.path.join(output_dir, output_name)

    return convert_tissue_positions(input_file, output_file, sample_id)


if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage: python format_tissue_positions_parallel.py <root_dir> <output_dir>")
        sys.exit(1)

    root_dir = '/data/Datasets/Annotated/TLS_VISIUM/10x_Visium/'
    output_dir = '/data/tissue_locations/'

    input_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for file in filenames:
            if file == "tissue_positions_list.csv":
                input_files.append(os.path.join(dirpath, file))

    print(f"Found {len(input_files)} files for processing")
    os.makedirs(output_dir, exist_ok=True)

    with Pool(NUM_WORKERS) as pool:
        pool.map(process_file, [(f, output_dir) for f in input_files])


Found 31 files for processing
[OK] RC8 → /data/tissue_locations/RC8_tissue_positions_list_stride.txt (4992 spots)[OK] RC3 → /data/tissue_locations/RC3_tissue_positions_list_stride.txt (4992 spots)[OK] RC17 → /data/tissue_locations/RC17_tissue_positions_list_stride.txt (4992 spots)[OK] RC14 → /data/tissue_locations/RC14_tissue_positions_list_stride.txt (4992 spots)[OK] LC3 → /data/tissue_locations/LC3_tissue_positions_list_stride.txt (4993 spots)[OK] KC2 → /data/tissue_locations/KC2_tissue_positions_list_stride.txt (4993 spots)





[OK] RC12 → /data/tissue_locations/RC12_tissue_positions_list_stride.txt (4992 spots)
[OK] RC13 → /data/tissue_locations/RC13_tissue_positions_list_stride.txt (4992 spots)[OK] RC15 → /data/tissue_locations/RC15_tissue_positions_list_stride.txt (4992 spots)[OK] LC4 → /data/tissue_locations/LC4_tissue_positions_list_stride.txt (4993 spots)


[OK] LC5 → /data/tissue_locations/LC5_tissue_positions_list_stride.txt (4993 spots)[OK] RC21 → /data/tissue_locations/RC

## Processing

### Deconvolution

In [None]:
data_dir = "/data/Tools/STRIDE/STRIDE/h5/" 
num_workers = 12

def run_stride(sample):
    print(f"Starting STRIDE for sample: {sample}")
    sc_count = "/data/Tools/STRIDE/STRIDE/pbmcref_10x.h5"
    sc_celltype = "/data/Tools/STRIDE/STRIDE/pbmcref_celltype_l1.txt"
    st_count = f"/data/Tools/STRIDE/STRIDE/h5/{sample}_filtered_feature_bc_matrix.h5"
    outdir = "our_results/"
    outprefix = f"our_results_{sample}"
    if not os.path.exists(st_count):
        print(f"ST count file not found for sample {sample}: {st_count}")
        return

    cmd = [
        "STRIDE", "deconvolve",
        "--sc-count", sc_count,
        "--sc-celltype", sc_celltype,
        "--st-count", st_count,
        "--outdir", outdir,
        "--outprefix", outprefix,
        "--normalize"
    ]

    try:
        subprocess.run(cmd, check=True)
        print(f"Finished STRIDE for sample: {sample}")
    except subprocess.CalledProcessError as e:
        print(f"Error running STRIDE for {sample}: {e}")

if __name__ == "__main__":
    files = [f for f in os.listdir(data_dir)]
    samples = [f.split("_")[0] for f in files]
    samples = list(set(samples))  
    print(f"Found samples: {samples}")

    with Pool(num_workers) as pool:
        pool.map(run_stride, samples)


### Plot scatterpies

In [None]:

# ==== Файлы ====
frac_file = "./Result/our_results/our_results_KC1_spot_celltype_frac.txt"  # пропорции (spot x celltype)
loc_file = "./Result/KC1_tissue_positions_list_stride.txt"                          # координаты (index = barcode)

# ==== Загрузка данных ====
frac_df = pd.read_csv(frac_file, sep="\t", index_col=0)
# Приводим всё к числам
frac_df = frac_df.apply(pd.to_numeric, errors="coerce").fillna(0.0)

loc_df = pd.read_csv(loc_file, sep="\t", index_col=0)
loc_df = loc_df.rename(columns={
    loc_df.columns[0]: 'X_coord',
    loc_df.columns[1]: 'Y_coord'
})
loc_df['X_coord'] = pd.to_numeric(loc_df['X_coord'], errors='coerce')
loc_df['Y_coord'] = pd.to_numeric(loc_df['Y_coord'], errors='coerce')

# ==== Сопоставляем баркоды ====
common = frac_df.index.intersection(loc_df.index)
frac_df = frac_df.loc[common]
loc_df = loc_df.loc[common]

# ==== Цвета ====
celltypes = list(frac_df.columns)
colors = plt.cm.tab20(np.linspace(0, 1, len(celltypes)))

# ==== Функция отрисовки одного pie ====
def draw_pie(ax, ratios, x, y, radius, colors):
    total = np.sum(ratios)
    if total <= 0:
        return
    start = 0
    for frac, color in zip(ratios, colors):
        if frac <= 0:
            continue
        theta1 = start * 360
        theta2 = (start + frac / total) * 360
        wedge = Wedge((x, y), radius, theta1, theta2, facecolor=color, edgecolor='none')
        ax.add_patch(wedge)
        start += frac / total

# ==== Построение ====
fig, ax = plt.subplots(figsize=(8, 8))
for idx in common:
    x = loc_df.at[idx, 'X_coord']
    y = loc_df.at[idx, 'Y_coord']
    ratios = frac_df.loc[idx].values
    draw_pie(ax, ratios, x, y, radius=20, colors=colors)  # ← тут радиус меньше

ax.set_aspect('equal')
ax.invert_yaxis()
ax.axis('off')

# Легенда
for ct, col in zip(celltypes, colors):
    ax.scatter([], [], color=col, label=ct, s=50)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')

output_file = "scatterpie_highres.png"
plt.savefig(output_file, dpi=1200, bbox_inches='tight', pad_inches=0.1)
print(f"Сохранено: {output_file}")

plt.show()